0
0
Fork 0

Optimize x64 loads and stores using complex addressing modes (#972)

* Optimize x64 loads and stores using complex addressing modes

* This was meant to be used for testing
This commit is contained in:
gdkchan 2020-03-09 19:29:34 -03:00 committed by GitHub
parent e2bb5e8091
commit 61d79facd1
No known key found for this signature in database
GPG key ID: 4AEE18F83AFDEB23
10 changed files with 538 additions and 86 deletions

View file

@ -1,8 +1,6 @@
using ARMeilleure.IntermediateRepresentation;
using ARMeilleure.Translation;
using System.Collections.Generic;
using System.Diagnostics;
using System.Linq;
namespace ARMeilleure.CodeGen.Optimizations
{
@ -60,6 +58,36 @@ namespace ARMeilleure.CodeGen.Optimizations
while (modified);
}
public static void RemoveUnusedNodes(ControlFlowGraph cfg)
{
bool modified;
do
{
modified = false;
for (BasicBlock block = cfg.Blocks.First; block != null; block = block.ListNext)
{
Node node = block.Operations.First;
while (node != null)
{
Node nextNode = node.ListNext;
if (IsUnused(node))
{
RemoveNode(block, node);
modified = true;
}
node = nextNode;
}
}
}
while (modified);
}
private static void PropagateCopy(Operation copyOp)
{
// Propagate copy source operand to all uses of the destination operand.

View file

@ -110,6 +110,20 @@ namespace ARMeilleure.CodeGen.RegisterAllocators
{
locInfo[source.AsInt32() - 1].SetBlockIndex(block.Index);
}
else if (source.Kind == OperandKind.Memory)
{
MemoryOperand memOp = (MemoryOperand)source;
if (memOp.BaseAddress != null)
{
locInfo[memOp.BaseAddress.AsInt32() - 1].SetBlockIndex(block.Index);
}
if (memOp.Index != null)
{
locInfo[memOp.Index.AsInt32() - 1].SetBlockIndex(block.Index);
}
}
}
for (int dstIndex = 0; dstIndex < node.DestinationsCount; dstIndex++)
@ -181,15 +195,8 @@ namespace ARMeilleure.CodeGen.RegisterAllocators
int intLocalUse = 0;
int vecLocalUse = 0;
for (int srcIndex = 0; srcIndex < node.SourcesCount; srcIndex++)
void AllocateRegister(Operand source, MemoryOperand memOp, int srcIndex)
{
Operand source = node.GetSource(srcIndex);
if (source.Kind != OperandKind.LocalVariable)
{
continue;
}
LocalInfo info = locInfo[source.AsInt32() - 1];
info.UseCount++;
@ -198,7 +205,23 @@ namespace ARMeilleure.CodeGen.RegisterAllocators
if (info.Register != -1)
{
node.SetSource(srcIndex, Register(info.Register, source.Type.ToRegisterType(), source.Type));
Operand reg = Register(info.Register, source.Type.ToRegisterType(), source.Type);
if (memOp != null)
{
if (srcIndex == 0)
{
memOp.BaseAddress = reg;
}
else /* if (srcIndex == 1) */
{
memOp.Index = reg;
}
}
else
{
node.SetSource(srcIndex, reg);
}
if (info.UseCount == info.Uses && !info.PreAllocated)
{
@ -226,7 +249,21 @@ namespace ARMeilleure.CodeGen.RegisterAllocators
info.Temp = temp;
}
if (memOp != null)
{
if (srcIndex == 0)
{
memOp.BaseAddress = temp;
}
else /* if (srcIndex == 1) */
{
memOp.Index = temp;
}
}
else
{
node.SetSource(srcIndex, temp);
}
Operation fillOp = new Operation(Instruction.Fill, temp, Const(info.SpillOffset));
@ -234,6 +271,30 @@ namespace ARMeilleure.CodeGen.RegisterAllocators
}
}
for (int srcIndex = 0; srcIndex < node.SourcesCount; srcIndex++)
{
Operand source = node.GetSource(srcIndex);
if (source.Kind == OperandKind.LocalVariable)
{
AllocateRegister(source, null, srcIndex);
}
else if (source.Kind == OperandKind.Memory)
{
MemoryOperand memOp = (MemoryOperand)source;
if (memOp.BaseAddress != null)
{
AllocateRegister(memOp.BaseAddress, memOp, 0);
}
if (memOp.Index != null)
{
AllocateRegister(memOp.Index, memOp, 1);
}
}
}
int intLocalAsg = 0;
int vecLocalAsg = 0;

View file

@ -711,6 +711,20 @@ namespace ARMeilleure.CodeGen.RegisterAllocators
{
operation.SetSource(index, register);
}
else if (source.Kind == OperandKind.Memory)
{
MemoryOperand memOp = (MemoryOperand)source;
if (memOp.BaseAddress == current.Local)
{
memOp.BaseAddress = register;
}
if (memOp.Index == current.Local)
{
memOp.Index = register;
}
}
}
for (int index = 0; index < operation.DestinationsCount; index++)
@ -1011,6 +1025,20 @@ namespace ARMeilleure.CodeGen.RegisterAllocators
{
yield return source;
}
else if (source.Kind == OperandKind.Memory)
{
MemoryOperand memOp = (MemoryOperand)source;
if (memOp.BaseAddress != null)
{
yield return memOp.BaseAddress;
}
if (memOp.Index != null)
{
yield return memOp.Index;
}
}
}
}

View file

@ -14,6 +14,8 @@ namespace ARMeilleure.CodeGen.X86
private const byte RexWPrefix = 0x48;
private const byte LockPrefix = 0xf0;
private const int MaxRegNumber = 15;
[Flags]
private enum InstructionFlags
{
@ -842,10 +844,7 @@ namespace ARMeilleure.CodeGen.X86
{
X86Register shiftReg = (X86Register)source.GetRegister().Index;
if (shiftReg != X86Register.Rcx)
{
throw new ArgumentException($"Invalid shift register \"{shiftReg}\".");
}
Debug.Assert(shiftReg == X86Register.Rcx, $"Invalid shift register \"{shiftReg}\".");
source = null;
}
@ -1080,6 +1079,8 @@ namespace ARMeilleure.CodeGen.X86
if (baseReg.Index >= 8)
{
Debug.Assert((uint)baseReg.Index <= MaxRegNumber);
rexPrefix |= RexPrefix | (baseReg.Index >> 3);
}
@ -1091,13 +1092,12 @@ namespace ARMeilleure.CodeGen.X86
{
int indexReg = memOp.Index.GetRegister().Index;
if (indexReg == (int)X86Register.Rsp)
{
throw new ArgumentException("Using RSP as index register on the memory operand is not allowed.");
}
Debug.Assert(indexReg != (int)X86Register.Rsp, "Using RSP as index register on the memory operand is not allowed.");
if (indexReg >= 8)
{
Debug.Assert((uint)indexReg <= MaxRegNumber);
rexPrefix |= RexPrefix | (indexReg >> 3) << 1;
}

View file

@ -0,0 +1,19 @@
using ARMeilleure.IntermediateRepresentation;
namespace ARMeilleure.CodeGen.X86
{
static class CodeGenCommon
{
public static bool IsLongConst(Operand op)
{
long value = op.Type == OperandType.I32 ? op.AsInt32() : op.AsInt64();
return !ConstFitsOnS32(value);
}
private static bool ConstFitsOnS32(long value)
{
return value == (int)value;
}
}
}

View file

@ -109,6 +109,8 @@ namespace ARMeilleure.CodeGen.X86
Optimizer.RunPass(cfg);
}
X86Optimizer.RunPass(cfg);
Logger.EndPass(PassName.Optimization, cfg);
Logger.StartPass(PassName.PreAllocation);

View file

@ -184,7 +184,7 @@ namespace ARMeilleure.CodeGen.X86
operation.SetSource(1, src2);
}
else if (!HasConstSrc2(inst) || IsLongConst(src2))
else if (!HasConstSrc2(inst) || CodeGenCommon.IsLongConst(src2))
{
src2 = AddCopy(nodes, node, src2);
@ -1046,7 +1046,7 @@ namespace ARMeilleure.CodeGen.X86
nodes.AddBefore(node, retCopyOp);
}
operation.SetSources(new Operand[0]);
operation.SetSources(System.Array.Empty<Operand>());
}
private static void HandleReturnSystemVAbi(IntrusiveList<Node> nodes, Node node, Operation operation)
@ -1116,20 +1116,6 @@ namespace ARMeilleure.CodeGen.X86
return value;
}
private static bool IsLongConst(Operand operand)
{
long value = operand.Type == OperandType.I32
? operand.AsInt32()
: operand.AsInt64();
return !ConstFitsOnS32(value);
}
private static bool ConstFitsOnS32(long value)
{
return value == (int)value;
}
private static void Delete(IntrusiveList<Node> nodes, Node node, Operation operation)
{
operation.Destination = null;

View file

@ -0,0 +1,251 @@
using ARMeilleure.CodeGen.Optimizations;
using ARMeilleure.IntermediateRepresentation;
using ARMeilleure.Translation;
using static ARMeilleure.IntermediateRepresentation.OperandHelper;
namespace ARMeilleure.CodeGen.X86
{
static class X86Optimizer
{
public static void RunPass(ControlFlowGraph cfg)
{
for (BasicBlock block = cfg.Blocks.First; block != null; block = block.ListNext)
{
Node nextNode;
for (Node node = block.Operations.First; node != null; node = nextNode)
{
nextNode = node.ListNext;
if (!(node is Operation operation))
{
continue;
}
// Insert copies for constants that can't fit on a 32-bits immediate.
// Doing this early unblocks a few optimizations.
if (operation.Instruction == Instruction.Add)
{
Operand src1 = operation.GetSource(0);
Operand src2 = operation.GetSource(1);
if (src1.Kind == OperandKind.Constant && CodeGenCommon.IsLongConst(src1))
{
Operand temp = Local(src1.Type);
Operation copyOp = new Operation(Instruction.Copy, temp, src1);
block.Operations.AddBefore(operation, copyOp);
operation.SetSource(0, temp);
}
if (src2.Kind == OperandKind.Constant && CodeGenCommon.IsLongConst(src2))
{
Operand temp = Local(src2.Type);
Operation copyOp = new Operation(Instruction.Copy, temp, src2);
block.Operations.AddBefore(operation, copyOp);
operation.SetSource(1, temp);
}
}
// Try to fold something like:
// shl rbx, 2
// add rax, rbx
// add rax, 0xcafe
// mov rax, [rax]
// Into:
// mov rax, [rax+rbx*4+0xcafe]
if (IsMemoryLoadOrStore(operation.Instruction))
{
OperandType type;
if (operation.Destination != null)
{
type = operation.Destination.Type;
}
else
{
type = operation.GetSource(1).Type;
}
MemoryOperand memOp = GetMemoryOperandOrNull(operation.GetSource(0), type);
if (memOp != null)
{
operation.SetSource(0, memOp);
}
}
}
}
Optimizer.RemoveUnusedNodes(cfg);
}
private static MemoryOperand GetMemoryOperandOrNull(Operand addr, OperandType type)
{
Operand baseOp = addr;
// First we check if the address is the result of a local X with 32-bits immediate
// addition. If that is the case, then the baseOp is X, and the memory operand immediate
// becomes the addition immediate. Otherwise baseOp keeps being the address.
int imm = GetConstOp(ref baseOp);
// Now we check if the baseOp is the result of a local Y with a local Z addition.
// If that is the case, we now set baseOp to Y and indexOp to Z. We further check
// if Z is the result of a left shift of local W by a value >= 0 and <= 3, if that
// is the case, we set indexOp to W and adjust the scale value of the memory operand
// to match that of the left shift.
// There is one missed case, which is the address being a shift result, but this is
// probably not worth optimizing as it should never happen.
(Operand indexOp, Multiplier scale) = GetIndexOp(ref baseOp);
// If baseOp is still equal to address, then there's nothing that can be optimized.
if (baseOp == addr)
{
return null;
}
return new MemoryOperand(type, baseOp, indexOp, scale, imm);
}
private static int GetConstOp(ref Operand baseOp)
{
Operation operation = GetAsgOpWithInst(baseOp, Instruction.Add);
if (operation == null)
{
return 0;
}
Operand src1 = operation.GetSource(0);
Operand src2 = operation.GetSource(1);
Operand constOp;
Operand otherOp;
if (src1.Kind == OperandKind.Constant && src2.Kind == OperandKind.LocalVariable)
{
constOp = src1;
otherOp = src2;
}
else if (src1.Kind == OperandKind.LocalVariable && src2.Kind == OperandKind.Constant)
{
constOp = src2;
otherOp = src1;
}
else
{
return 0;
}
// If we have addition by 64-bits constant, then we can't optimize it further,
// as we can't encode a 64-bits immediate on the memory operand.
if (CodeGenCommon.IsLongConst(constOp))
{
return 0;
}
baseOp = otherOp;
return constOp.AsInt32();
}
private static (Operand, Multiplier) GetIndexOp(ref Operand baseOp)
{
Operand indexOp = null;
Multiplier scale = Multiplier.x1;
Operation addOp = GetAsgOpWithInst(baseOp, Instruction.Add);
if (addOp == null)
{
return (indexOp, scale);
}
Operand src1 = addOp.GetSource(0);
Operand src2 = addOp.GetSource(1);
if (src1.Kind != OperandKind.LocalVariable || src2.Kind != OperandKind.LocalVariable)
{
return (indexOp, scale);
}
baseOp = src1;
indexOp = src2;
Operation shlOp = GetAsgOpWithInst(src1, Instruction.ShiftLeft);
bool indexOnSrc2 = false;
if (shlOp == null)
{
shlOp = GetAsgOpWithInst(src2, Instruction.ShiftLeft);
indexOnSrc2 = true;
}
if (shlOp != null)
{
Operand shSrc = shlOp.GetSource(0);
Operand shift = shlOp.GetSource(1);
if (shSrc.Kind == OperandKind.LocalVariable && shift.Kind == OperandKind.Constant && shift.Value <= 3)
{
scale = shift.Value switch
{
1 => Multiplier.x2,
2 => Multiplier.x4,
3 => Multiplier.x8,
_ => Multiplier.x1
};
baseOp = indexOnSrc2 ? src1 : src2;
indexOp = shSrc;
}
}
return (indexOp, scale);
}
private static Operation GetAsgOpWithInst(Operand op, Instruction inst)
{
// If we have multiple assignments, folding is not safe
// as the value may be different depending on the
// control flow path.
if (op.Assignments.Count != 1)
{
return null;
}
Node asgOp = op.Assignments[0];
if (!(asgOp is Operation operation))
{
return null;
}
if (operation.Instruction != inst)
{
return null;
}
return operation;
}
private static bool IsMemoryLoadOrStore(Instruction inst)
{
return inst == Instruction.Load ||
inst == Instruction.Load16 ||
inst == Instruction.Load8 ||
inst == Instruction.Store ||
inst == Instruction.Store16 ||
inst == Instruction.Store8;
}
}
}

View file

@ -58,34 +58,18 @@ namespace ARMeilleure.IntermediateRepresentation
public void SetDestination(int index, Operand destination)
{
Operand oldOp = _destinations[index];
RemoveAssignment(_destinations[index]);
if (oldOp != null && oldOp.Kind == OperandKind.LocalVariable)
{
oldOp.Assignments.Remove(this);
}
if (destination != null && destination.Kind == OperandKind.LocalVariable)
{
destination.Assignments.Add(this);
}
AddAssignment(destination);
_destinations[index] = destination;
}
public void SetSource(int index, Operand source)
{
Operand oldOp = _sources[index];
RemoveUse(_sources[index]);
if (oldOp != null && oldOp.Kind == OperandKind.LocalVariable)
{
oldOp.Uses.Remove(this);
}
if (source != null && source.Kind == OperandKind.LocalVariable)
{
source.Uses.Add(this);
}
AddUse(source);
_sources[index] = source;
}
@ -96,12 +80,7 @@ namespace ARMeilleure.IntermediateRepresentation
{
for (int index = 0; index < _destinations.Length; index++)
{
Operand oldOp = _destinations[index];
if (oldOp != null && oldOp.Kind == OperandKind.LocalVariable)
{
oldOp.Assignments.Remove(this);
}
RemoveAssignment(_destinations[index]);
}
_destinations = destinations;
@ -117,10 +96,7 @@ namespace ARMeilleure.IntermediateRepresentation
_destinations[index] = newOp;
if (newOp.Kind == OperandKind.LocalVariable)
{
newOp.Assignments.Add(this);
}
AddAssignment(newOp);
}
}
@ -128,12 +104,7 @@ namespace ARMeilleure.IntermediateRepresentation
{
for (int index = 0; index < _sources.Length; index++)
{
Operand oldOp = _sources[index];
if (oldOp != null && oldOp.Kind == OperandKind.LocalVariable)
{
oldOp.Uses.Remove(this);
}
RemoveUse(_sources[index]);
}
_sources = new Operand[sources.Length];
@ -144,9 +115,114 @@ namespace ARMeilleure.IntermediateRepresentation
_sources[index] = newOp;
if (newOp.Kind == OperandKind.LocalVariable)
AddUse(newOp);
}
}
private void AddAssignment(Operand op)
{
newOp.Uses.Add(this);
if (op == null)
{
return;
}
if (op.Kind == OperandKind.LocalVariable)
{
op.Assignments.Add(this);
}
else if (op.Kind == OperandKind.Memory)
{
MemoryOperand memOp = (MemoryOperand)op;
if (memOp.BaseAddress != null)
{
memOp.BaseAddress.Assignments.Add(this);
}
if (memOp.Index != null)
{
memOp.Index.Assignments.Add(this);
}
}
}
private void RemoveAssignment(Operand op)
{
if (op == null)
{
return;
}
if (op.Kind == OperandKind.LocalVariable)
{
op.Assignments.Remove(this);
}
else if (op.Kind == OperandKind.Memory)
{
MemoryOperand memOp = (MemoryOperand)op;
if (memOp.BaseAddress != null)
{
memOp.BaseAddress.Assignments.Remove(this);
}
if (memOp.Index != null)
{
memOp.Index.Assignments.Remove(this);
}
}
}
private void AddUse(Operand op)
{
if (op == null)
{
return;
}
if (op.Kind == OperandKind.LocalVariable)
{
op.Uses.Add(this);
}
else if (op.Kind == OperandKind.Memory)
{
MemoryOperand memOp = (MemoryOperand)op;
if (memOp.BaseAddress != null)
{
memOp.BaseAddress.Uses.Add(this);
}
if (memOp.Index != null)
{
memOp.Index.Uses.Add(this);
}
}
}
private void RemoveUse(Operand op)
{
if (op == null)
{
return;
}
if (op.Kind == OperandKind.LocalVariable)
{
op.Uses.Remove(this);
}
else if (op.Kind == OperandKind.Memory)
{
MemoryOperand memOp = (MemoryOperand)op;
if (memOp.BaseAddress != null)
{
memOp.BaseAddress.Uses.Remove(this);
}
if (memOp.Index != null)
{
memOp.Index.Uses.Remove(this);
}
}
}

View file

@ -9,11 +9,16 @@ namespace ARMeilleure.Translation
{
static class Compiler
{
public static T Compile<T>(
ControlFlowGraph cfg,
OperandType[] funcArgTypes,
OperandType funcReturnType,
CompilerOptions options)
public static T Compile<T>(ControlFlowGraph cfg, OperandType[] argTypes, OperandType retType, CompilerOptions options)
{
CompiledFunction func = CompileAndGetCf(cfg, argTypes, retType, options);
IntPtr codePtr = JitCache.Map(func);
return Marshal.GetDelegateForFunctionPointer<T>(codePtr);
}
public static CompiledFunction CompileAndGetCf(ControlFlowGraph cfg, OperandType[] argTypes, OperandType retType, CompilerOptions options)
{
Logger.StartPass(PassName.Dominance);
@ -35,13 +40,9 @@ namespace ARMeilleure.Translation
Logger.EndPass(PassName.SsaConstruction, cfg);
CompilerContext cctx = new CompilerContext(cfg, funcArgTypes, funcReturnType, options);
CompilerContext cctx = new CompilerContext(cfg, argTypes, retType, options);
CompiledFunction func = CodeGenerator.Generate(cctx);
IntPtr codePtr = JitCache.Map(func);
return Marshal.GetDelegateForFunctionPointer<T>(codePtr);
return CodeGenerator.Generate(cctx);
}
}
}