0
0
Fork 0
mirror of https://github.com/ryujinx-mirror/ryujinx.git synced 2024-12-23 06:25:45 +00:00

Misc. CPU optimizations (#575)

* Add optimizations related to caller/callee saved registers, thread synchronization and disable tier 0

* Refactoring

* Add a config entry to enable or disable the reg load/store opt.

* Remove unnecessary register state stores for calls when the callee is know

* Rename IoType to VarType

* Enable tier 0 while fixing some perf issues related to tier 0

* Small tweak -- Compile before adding to the cache, to avoid lags

* Add required config entry
This commit is contained in:
gdkchan 2019-02-27 23:03:31 -03:00 committed by jduncanator
parent 884b4e5fd3
commit e21ebbf666
28 changed files with 456 additions and 280 deletions

View file

@ -39,7 +39,6 @@ namespace ChocolArm64.Instructions
context.EmitLdc_I(op.Position + 4); context.EmitLdc_I(op.Position + 4);
context.EmitStint(RegisterAlias.Lr); context.EmitStint(RegisterAlias.Lr);
context.EmitStoreState();
EmitCall(context, op.Imm); EmitCall(context, op.Imm);
} }
@ -60,6 +59,8 @@ namespace ChocolArm64.Instructions
{ {
OpCodeBReg64 op = (OpCodeBReg64)context.CurrOp; OpCodeBReg64 op = (OpCodeBReg64)context.CurrOp;
context.HasIndirectJump = true;
context.EmitStoreState(); context.EmitStoreState();
context.EmitLdintzr(op.Rn); context.EmitLdintzr(op.Rn);

View file

@ -65,7 +65,6 @@ namespace ChocolArm64.Instructions
} }
context.EmitStint(GetBankedRegisterAlias(context.Mode, RegisterAlias.Aarch32Lr)); context.EmitStint(GetBankedRegisterAlias(context.Mode, RegisterAlias.Aarch32Lr));
context.EmitStoreState();
//If x is true, then this is a branch with link and exchange. //If x is true, then this is a branch with link and exchange.
//In this case we need to swap the mode between Arm <-> Thumb. //In this case we need to swap the mode between Arm <-> Thumb.

View file

@ -11,6 +11,8 @@ namespace ChocolArm64.Instructions
{ {
if (context.Tier == TranslationTier.Tier0) if (context.Tier == TranslationTier.Tier0)
{ {
context.EmitStoreState();
context.TranslateAhead(imm); context.TranslateAhead(imm);
context.EmitLdc_I8(imm); context.EmitLdc_I8(imm);
@ -22,6 +24,10 @@ namespace ChocolArm64.Instructions
if (!context.TryOptEmitSubroutineCall()) if (!context.TryOptEmitSubroutineCall())
{ {
context.HasSlowCall = true;
context.EmitStoreState();
context.TranslateAhead(imm); context.TranslateAhead(imm);
context.EmitLdarg(TranslatedSub.StateArgIdx); context.EmitLdarg(TranslatedSub.StateArgIdx);
@ -32,6 +38,7 @@ namespace ChocolArm64.Instructions
context.EmitLdarg(TranslatedSub.StateArgIdx); context.EmitLdarg(TranslatedSub.StateArgIdx);
context.EmitLdc_I8(imm); context.EmitLdc_I8(imm);
context.EmitLdc_I4((int)CallType.Call);
context.EmitPrivateCall(typeof(Translator), nameof(Translator.GetOrTranslateSubroutine)); context.EmitPrivateCall(typeof(Translator), nameof(Translator.GetOrTranslateSubroutine));
@ -58,20 +65,6 @@ namespace ChocolArm64.Instructions
{ {
if (context.Tier == TranslationTier.Tier0) if (context.Tier == TranslationTier.Tier0)
{ {
context.Emit(OpCodes.Dup);
context.EmitSttmp();
context.EmitLdarg(TranslatedSub.StateArgIdx);
context.EmitFieldLoad(typeof(CpuThreadState).GetField(nameof(CpuThreadState.CurrentTranslator),
BindingFlags.Instance |
BindingFlags.NonPublic));
context.EmitLdarg(TranslatedSub.StateArgIdx);
context.EmitLdtmp();
context.EmitPrivateCall(typeof(Translator), nameof(Translator.TranslateVirtualSubroutine));
context.Emit(OpCodes.Ret); context.Emit(OpCodes.Ret);
} }
else else
@ -85,8 +78,11 @@ namespace ChocolArm64.Instructions
context.EmitLdarg(TranslatedSub.StateArgIdx); context.EmitLdarg(TranslatedSub.StateArgIdx);
context.EmitLdtmp(); context.EmitLdtmp();
context.EmitLdc_I4(isJump
? (int)CallType.VirtualJump
: (int)CallType.VirtualCall);
context.EmitPrivateCall(typeof(Translator), nameof(Translator.GetOrTranslateVirtualSubroutine)); context.EmitPrivateCall(typeof(Translator), nameof(Translator.GetOrTranslateSubroutine));
context.EmitLdarg(TranslatedSub.StateArgIdx); context.EmitLdarg(TranslatedSub.StateArgIdx);
context.EmitLdarg(TranslatedSub.MemoryArgIdx); context.EmitLdarg(TranslatedSub.MemoryArgIdx);

View file

@ -2,21 +2,23 @@ using System.Runtime.Intrinsics.X86;
public static class Optimizations public static class Optimizations
{ {
internal static bool FastFP = true; public static bool AssumeStrictAbiCompliance { get; set; }
private static bool _useAllSseIfAvailable = true; public static bool FastFP { get; set; } = true;
private static bool _useSseIfAvailable = true; private const bool UseAllSseIfAvailable = true;
private static bool _useSse2IfAvailable = true;
private static bool _useSse3IfAvailable = true;
private static bool _useSsse3IfAvailable = true;
private static bool _useSse41IfAvailable = true;
private static bool _useSse42IfAvailable = true;
internal static bool UseSse = (_useAllSseIfAvailable && _useSseIfAvailable) && Sse.IsSupported; public static bool UseSseIfAvailable { get; set; } = UseAllSseIfAvailable;
internal static bool UseSse2 = (_useAllSseIfAvailable && _useSse2IfAvailable) && Sse2.IsSupported; public static bool UseSse2IfAvailable { get; set; } = UseAllSseIfAvailable;
internal static bool UseSse3 = (_useAllSseIfAvailable && _useSse3IfAvailable) && Sse3.IsSupported; public static bool UseSse3IfAvailable { get; set; } = UseAllSseIfAvailable;
internal static bool UseSsse3 = (_useAllSseIfAvailable && _useSsse3IfAvailable) && Ssse3.IsSupported; public static bool UseSsse3IfAvailable { get; set; } = UseAllSseIfAvailable;
internal static bool UseSse41 = (_useAllSseIfAvailable && _useSse41IfAvailable) && Sse41.IsSupported; public static bool UseSse41IfAvailable { get; set; } = UseAllSseIfAvailable;
internal static bool UseSse42 = (_useAllSseIfAvailable && _useSse42IfAvailable) && Sse42.IsSupported; public static bool UseSse42IfAvailable { get; set; } = UseAllSseIfAvailable;
internal static bool UseSse => UseSseIfAvailable && Sse.IsSupported;
internal static bool UseSse2 => UseSse2IfAvailable && Sse2.IsSupported;
internal static bool UseSse3 => UseSse3IfAvailable && Sse3.IsSupported;
internal static bool UseSsse3 => UseSsse3IfAvailable && Ssse3.IsSupported;
internal static bool UseSse41 => UseSse41IfAvailable && Sse41.IsSupported;
internal static bool UseSse42 => UseSse42IfAvailable && Sse42.IsSupported;
} }

View file

@ -0,0 +1,9 @@
namespace ChocolArm64.Translation
{
enum CallType
{
Call,
VirtualCall,
VirtualJump
}
}

View file

@ -6,11 +6,11 @@ namespace ChocolArm64.Translation
{ {
public long IntInputs { get; private set; } public long IntInputs { get; private set; }
public long IntOutputs { get; private set; } public long IntOutputs { get; private set; }
public long IntAwOutputs { get; private set; } private long _intAwOutputs;
public long VecInputs { get; private set; } public long VecInputs { get; private set; }
public long VecOutputs { get; private set; } public long VecOutputs { get; private set; }
public long VecAwOutputs { get; private set; } private long _vecAwOutputs;
public bool HasStateStore { get; private set; } public bool HasStateStore { get; private set; }
@ -34,25 +34,25 @@ namespace ChocolArm64.Translation
//opcodes emitted by each ARM instruction. //opcodes emitted by each ARM instruction.
//We can only consider the new outputs for doing input elimination //We can only consider the new outputs for doing input elimination
//after all the CIL opcodes used by the instruction being emitted. //after all the CIL opcodes used by the instruction being emitted.
IntAwOutputs = IntOutputs; _intAwOutputs = IntOutputs;
VecAwOutputs = VecOutputs; _vecAwOutputs = VecOutputs;
} }
else if (emitter is ILOpCodeLoad ld && ILMethodBuilder.IsRegIndex(ld.Index)) else if (emitter is ILOpCodeLoad ld && ILMethodBuilder.IsRegIndex(ld.Index))
{ {
switch (ld.IoType) switch (ld.VarType)
{ {
case IoType.Flag: IntInputs |= ((1L << ld.Index) << 32) & ~IntAwOutputs; break; case VarType.Flag: IntInputs |= ((1L << ld.Index) << 32) & ~_intAwOutputs; break;
case IoType.Int: IntInputs |= (1L << ld.Index) & ~IntAwOutputs; break; case VarType.Int: IntInputs |= (1L << ld.Index) & ~_intAwOutputs; break;
case IoType.Vector: VecInputs |= (1L << ld.Index) & ~VecAwOutputs; break; case VarType.Vector: VecInputs |= (1L << ld.Index) & ~_vecAwOutputs; break;
} }
} }
else if (emitter is ILOpCodeStore st && ILMethodBuilder.IsRegIndex(st.Index)) else if (emitter is ILOpCodeStore st && ILMethodBuilder.IsRegIndex(st.Index))
{ {
switch (st.IoType) switch (st.VarType)
{ {
case IoType.Flag: IntOutputs |= (1L << st.Index) << 32; break; case VarType.Flag: IntOutputs |= (1L << st.Index) << 32; break;
case IoType.Int: IntOutputs |= 1L << st.Index; break; case VarType.Int: IntOutputs |= 1L << st.Index; break;
case IoType.Vector: VecOutputs |= 1L << st.Index; break; case VarType.Vector: VecOutputs |= 1L << st.Index; break;
} }
} }
else if (emitter is ILOpCodeStoreState) else if (emitter is ILOpCodeStoreState)

View file

@ -31,6 +31,10 @@ namespace ChocolArm64.Translation
public Aarch32Mode Mode { get; } = Aarch32Mode.User; //TODO public Aarch32Mode Mode { get; } = Aarch32Mode.User; //TODO
public bool HasIndirectJump { get; set; }
public bool HasSlowCall { get; set; }
private Dictionary<Block, ILBlock> _visitedBlocks; private Dictionary<Block, ILBlock> _visitedBlocks;
private Queue<Block> _branchTargets; private Queue<Block> _branchTargets;
@ -91,7 +95,12 @@ namespace ChocolArm64.Translation
ResetBlockState(); ResetBlockState();
AdvanceOpCode(); if (AdvanceOpCode())
{
EmitSynchronization();
_ilBlock.Add(new ILOpCodeLoadState(_ilBlock, isSubEntry: true));
}
} }
public static int GetIntTempIndex() public static int GetIntTempIndex()
@ -127,10 +136,18 @@ namespace ChocolArm64.Translation
return; return;
} }
if (_opcIndex == 0) int opcIndex = _opcIndex;
if (opcIndex == 0)
{ {
MarkLabel(GetLabel(_currBlock.Position)); MarkLabel(GetLabel(_currBlock.Position));
}
bool isLastOp = opcIndex == CurrBlock.OpCodes.Count - 1;
if (isLastOp && CurrBlock.Branch != null &&
(ulong)CurrBlock.Branch.Position <= (ulong)CurrBlock.Position)
{
EmitSynchronization(); EmitSynchronization();
} }
@ -161,7 +178,7 @@ namespace ChocolArm64.Translation
//of the next instruction to be executed (in the case that the condition //of the next instruction to be executed (in the case that the condition
//is false, and the branch was not taken, as all basic blocks should end with //is false, and the branch was not taken, as all basic blocks should end with
//some kind of branch). //some kind of branch).
if (CurrOp == CurrBlock.GetLastOp() && CurrBlock.Next == null) if (isLastOp && CurrBlock.Next == null)
{ {
EmitStoreState(); EmitStoreState();
EmitLdc_I8(CurrOp.Position + CurrOp.OpCodeSizeInBytes); EmitLdc_I8(CurrOp.Position + CurrOp.OpCodeSizeInBytes);
@ -285,32 +302,43 @@ namespace ChocolArm64.Translation
return; return;
} }
_queue.Enqueue(new TranslatorQueueItem(position, mode, TranslationTier.Tier1)); _queue.Enqueue(position, mode, TranslationTier.Tier1, isComplete: true);
} }
public bool TryOptEmitSubroutineCall() public bool TryOptEmitSubroutineCall()
{ {
//Calls should always have a next block, unless
//we're translating a single basic block.
if (_currBlock.Next == null) if (_currBlock.Next == null)
{ {
return false; return false;
} }
if (CurrOp.Emitter != InstEmit.Bl) if (!(CurrOp is IOpCodeBImm op))
{ {
return false; return false;
} }
if (!_cache.TryGetSubroutine(((OpCodeBImmAl64)CurrOp).Imm, out TranslatedSub subroutine)) if (!_cache.TryGetSubroutine(op.Imm, out TranslatedSub sub))
{ {
return false; return false;
} }
//It's not worth to call a Tier0 method, because
//it contains slow code, rather than the entire function.
if (sub.Tier == TranslationTier.Tier0)
{
return false;
}
EmitStoreState(sub);
for (int index = 0; index < TranslatedSub.FixedArgTypes.Length; index++) for (int index = 0; index < TranslatedSub.FixedArgTypes.Length; index++)
{ {
EmitLdarg(index); EmitLdarg(index);
} }
EmitCall(subroutine.Method); EmitCall(sub.Method);
return true; return true;
} }
@ -321,8 +349,8 @@ namespace ChocolArm64.Translation
InstEmitAluHelper.EmitAluLoadOpers(this); InstEmitAluHelper.EmitAluLoadOpers(this);
Stloc(CmpOptTmp2Index, IoType.Int); Stloc(CmpOptTmp2Index, VarType.Int);
Stloc(CmpOptTmp1Index, IoType.Int); Stloc(CmpOptTmp1Index, VarType.Int);
} }
private Dictionary<Condition, OpCode> _branchOps = new Dictionary<Condition, OpCode>() private Dictionary<Condition, OpCode> _branchOps = new Dictionary<Condition, OpCode>()
@ -346,8 +374,8 @@ namespace ChocolArm64.Translation
{ {
if (_optOpLastCompare.Emitter == InstEmit.Subs) if (_optOpLastCompare.Emitter == InstEmit.Subs)
{ {
Ldloc(CmpOptTmp1Index, IoType.Int, _optOpLastCompare.RegisterSize); Ldloc(CmpOptTmp1Index, VarType.Int, _optOpLastCompare.RegisterSize);
Ldloc(CmpOptTmp2Index, IoType.Int, _optOpLastCompare.RegisterSize); Ldloc(CmpOptTmp2Index, VarType.Int, _optOpLastCompare.RegisterSize);
Emit(_branchOps[cond], target); Emit(_branchOps[cond], target);
@ -369,7 +397,7 @@ namespace ChocolArm64.Translation
//Such invalid values can't be encoded on the immediate encodings. //Such invalid values can't be encoded on the immediate encodings.
if (_optOpLastCompare is IOpCodeAluImm64 op) if (_optOpLastCompare is IOpCodeAluImm64 op)
{ {
Ldloc(CmpOptTmp1Index, IoType.Int, _optOpLastCompare.RegisterSize); Ldloc(CmpOptTmp1Index, VarType.Int, _optOpLastCompare.RegisterSize);
if (_optOpLastCompare.RegisterSize == RegisterSize.Int32) if (_optOpLastCompare.RegisterSize == RegisterSize.Int32)
{ {
@ -491,14 +519,14 @@ namespace ChocolArm64.Translation
{ {
if (amount > 0) if (amount > 0)
{ {
Stloc(RorTmpIndex, IoType.Int); Stloc(RorTmpIndex, VarType.Int);
Ldloc(RorTmpIndex, IoType.Int); Ldloc(RorTmpIndex, VarType.Int);
EmitLdc_I4(amount); EmitLdc_I4(amount);
Emit(OpCodes.Shr_Un); Emit(OpCodes.Shr_Un);
Ldloc(RorTmpIndex, IoType.Int); Ldloc(RorTmpIndex, VarType.Int);
EmitLdc_I4(CurrOp.GetBitsCount() - amount); EmitLdc_I4(CurrOp.GetBitsCount() - amount);
@ -546,7 +574,7 @@ namespace ChocolArm64.Translation
public void EmitLdarg(int index) public void EmitLdarg(int index)
{ {
_ilBlock.Add(new ILOpCodeLoad(index, IoType.Arg)); _ilBlock.Add(new ILOpCodeLoad(index, VarType.Arg));
} }
public void EmitLdintzr(int index) public void EmitLdintzr(int index)
@ -588,6 +616,11 @@ namespace ChocolArm64.Translation
_ilBlock.Add(new ILOpCodeStoreState(_ilBlock)); _ilBlock.Add(new ILOpCodeStoreState(_ilBlock));
} }
private void EmitStoreState(TranslatedSub callSub)
{
_ilBlock.Add(new ILOpCodeStoreState(_ilBlock, callSub));
}
public void EmitLdtmp() => EmitLdint(IntGpTmp1Index); public void EmitLdtmp() => EmitLdint(IntGpTmp1Index);
public void EmitSttmp() => EmitStint(IntGpTmp1Index); public void EmitSttmp() => EmitStint(IntGpTmp1Index);
@ -600,13 +633,13 @@ namespace ChocolArm64.Translation
public void EmitLdvectmp2() => EmitLdvec(VecGpTmp2Index); public void EmitLdvectmp2() => EmitLdvec(VecGpTmp2Index);
public void EmitStvectmp2() => EmitStvec(VecGpTmp2Index); public void EmitStvectmp2() => EmitStvec(VecGpTmp2Index);
public void EmitLdint(int index) => Ldloc(index, IoType.Int); public void EmitLdint(int index) => Ldloc(index, VarType.Int);
public void EmitStint(int index) => Stloc(index, IoType.Int); public void EmitStint(int index) => Stloc(index, VarType.Int);
public void EmitLdvec(int index) => Ldloc(index, IoType.Vector); public void EmitLdvec(int index) => Ldloc(index, VarType.Vector);
public void EmitStvec(int index) => Stloc(index, IoType.Vector); public void EmitStvec(int index) => Stloc(index, VarType.Vector);
public void EmitLdflg(int index) => Ldloc(index, IoType.Flag); public void EmitLdflg(int index) => Ldloc(index, VarType.Flag);
public void EmitStflg(int index) public void EmitStflg(int index)
{ {
//Set this only if any of the NZCV flag bits were modified. //Set this only if any of the NZCV flag bits were modified.
@ -619,22 +652,22 @@ namespace ChocolArm64.Translation
_optOpLastFlagSet = CurrOp; _optOpLastFlagSet = CurrOp;
} }
Stloc(index, IoType.Flag); Stloc(index, VarType.Flag);
} }
private void Ldloc(int index, IoType ioType) private void Ldloc(int index, VarType varType)
{ {
_ilBlock.Add(new ILOpCodeLoad(index, ioType, CurrOp.RegisterSize)); _ilBlock.Add(new ILOpCodeLoad(index, varType, CurrOp.RegisterSize));
} }
private void Ldloc(int index, IoType ioType, RegisterSize registerSize) private void Ldloc(int index, VarType varType, RegisterSize registerSize)
{ {
_ilBlock.Add(new ILOpCodeLoad(index, ioType, registerSize)); _ilBlock.Add(new ILOpCodeLoad(index, varType, registerSize));
} }
private void Stloc(int index, IoType ioType) private void Stloc(int index, VarType varType)
{ {
_ilBlock.Add(new ILOpCodeStore(index, ioType, CurrOp.RegisterSize)); _ilBlock.Add(new ILOpCodeStore(index, varType, CurrOp.RegisterSize));
} }
public void EmitCallPropGet(Type objType, string propName) public void EmitCallPropGet(Type objType, string propName)

View file

@ -6,7 +6,7 @@ namespace ChocolArm64.Translation
{ {
private bool _hasLabel; private bool _hasLabel;
private Label _lbl; private Label _label;
public void Emit(ILMethodBuilder context) public void Emit(ILMethodBuilder context)
{ {
@ -17,12 +17,12 @@ namespace ChocolArm64.Translation
{ {
if (!_hasLabel) if (!_hasLabel)
{ {
_lbl = context.Generator.DefineLabel(); _label = context.Generator.DefineLabel();
_hasLabel = true; _hasLabel = true;
} }
return _lbl; return _label;
} }
} }
} }

View file

@ -8,7 +8,10 @@ namespace ChocolArm64.Translation
{ {
class ILMethodBuilder class ILMethodBuilder
{ {
public LocalAlloc LocalAlloc { get; private set; } private const int RegsCount = 32;
private const int RegsMask = RegsCount - 1;
public RegisterUsage RegUsage { get; private set; }
public ILGenerator Generator { get; private set; } public ILGenerator Generator { get; private set; }
@ -18,29 +21,47 @@ namespace ChocolArm64.Translation
private string _subName; private string _subName;
public bool IsAarch64 { get; }
public bool IsSubComplete { get; }
private int _localsCount; private int _localsCount;
public ILMethodBuilder(ILBlock[] ilBlocks, string subName) public ILMethodBuilder(
ILBlock[] ilBlocks,
string subName,
bool isAarch64,
bool isSubComplete = false)
{ {
_ilBlocks = ilBlocks; _ilBlocks = ilBlocks;
_subName = subName; _subName = subName;
IsAarch64 = isAarch64;
IsSubComplete = isSubComplete;
} }
public TranslatedSub GetSubroutine(TranslationTier tier) public TranslatedSub GetSubroutine(TranslationTier tier, bool isWorthOptimizing)
{ {
LocalAlloc = new LocalAlloc(_ilBlocks, _ilBlocks[0]); RegUsage = new RegisterUsage();
RegUsage.BuildUses(_ilBlocks[0]);
DynamicMethod method = new DynamicMethod(_subName, typeof(long), TranslatedSub.FixedArgTypes); DynamicMethod method = new DynamicMethod(_subName, typeof(long), TranslatedSub.FixedArgTypes);
Generator = method.GetILGenerator(); long intNiRegsMask = RegUsage.GetIntNotInputs(_ilBlocks[0]);
long vecNiRegsMask = RegUsage.GetVecNotInputs(_ilBlocks[0]);
TranslatedSub subroutine = new TranslatedSub(method, tier); TranslatedSub subroutine = new TranslatedSub(
method,
intNiRegsMask,
vecNiRegsMask,
tier,
isWorthOptimizing);
_locals = new Dictionary<Register, int>(); _locals = new Dictionary<Register, int>();
_localsCount = 0; _localsCount = 0;
new ILOpCodeLoadState(_ilBlocks[0]).Emit(this); Generator = method.GetILGenerator();
foreach (ILBlock ilBlock in _ilBlocks) foreach (ILBlock ilBlock in _ilBlocks)
{ {
@ -80,13 +101,13 @@ namespace ChocolArm64.Translation
public static Register GetRegFromBit(int bit, RegisterType baseType) public static Register GetRegFromBit(int bit, RegisterType baseType)
{ {
if (bit < 32) if (bit < RegsCount)
{ {
return new Register(bit, baseType); return new Register(bit, baseType);
} }
else if (baseType == RegisterType.Int) else if (baseType == RegisterType.Int)
{ {
return new Register(bit & 0x1f, RegisterType.Flag); return new Register(bit & RegsMask, RegisterType.Flag);
} }
else else
{ {
@ -96,7 +117,7 @@ namespace ChocolArm64.Translation
public static bool IsRegIndex(int index) public static bool IsRegIndex(int index)
{ {
return (uint)index < 32; return (uint)index < RegsCount;
} }
} }
} }

View file

@ -4,16 +4,16 @@ namespace ChocolArm64.Translation
{ {
struct ILOpCode : IILEmit struct ILOpCode : IILEmit
{ {
private OpCode _ilOp; public OpCode ILOp { get; }
public ILOpCode(OpCode ilOp) public ILOpCode(OpCode ilOp)
{ {
_ilOp = ilOp; ILOp = ilOp;
} }
public void Emit(ILMethodBuilder context) public void Emit(ILMethodBuilder context)
{ {
context.Generator.Emit(_ilOp); context.Generator.Emit(ILOp);
} }
} }
} }

View file

@ -4,18 +4,18 @@ namespace ChocolArm64.Translation
{ {
struct ILOpCodeBranch : IILEmit struct ILOpCodeBranch : IILEmit
{ {
private OpCode _ilOp; public OpCode ILOp { get; }
private ILLabel _label; public ILLabel Label { get; }
public ILOpCodeBranch(OpCode ilOp, ILLabel label) public ILOpCodeBranch(OpCode ilOp, ILLabel label)
{ {
_ilOp = ilOp; ILOp = ilOp;
_label = label; Label = label;
} }
public void Emit(ILMethodBuilder context) public void Emit(ILMethodBuilder context)
{ {
context.Generator.Emit(_ilOp, _label.GetLabel(context)); context.Generator.Emit(ILOp, Label.GetLabel(context));
} }
} }
} }

View file

@ -5,9 +5,9 @@ namespace ChocolArm64.Translation
{ {
struct ILOpCodeCall : IILEmit struct ILOpCodeCall : IILEmit
{ {
public MethodInfo Info { get; private set; } public MethodInfo Info { get; }
public bool IsVirtual { get; private set; } public bool IsVirtual { get; }
public ILOpCodeCall(MethodInfo info, bool isVirtual) public ILOpCodeCall(MethodInfo info, bool isVirtual)
{ {

View file

@ -16,6 +16,8 @@ namespace ChocolArm64.Translation
private ImmVal _value; private ImmVal _value;
public long Value => _value.I8;
private enum ConstType private enum ConstType
{ {
Int32, Int32,

View file

@ -5,28 +5,28 @@ namespace ChocolArm64.Translation
{ {
struct ILOpCodeLoad : IILEmit struct ILOpCodeLoad : IILEmit
{ {
public int Index { get; private set; } public int Index { get; }
public IoType IoType { get; private set; } public VarType VarType { get; }
public RegisterSize RegisterSize { get; private set; } public RegisterSize RegisterSize { get; }
public ILOpCodeLoad(int index, IoType ioType, RegisterSize registerSize = 0) public ILOpCodeLoad(int index, VarType varType, RegisterSize registerSize = 0)
{ {
Index = index; Index = index;
IoType = ioType; VarType = varType;
RegisterSize = registerSize; RegisterSize = registerSize;
} }
public void Emit(ILMethodBuilder context) public void Emit(ILMethodBuilder context)
{ {
switch (IoType) switch (VarType)
{ {
case IoType.Arg: context.Generator.EmitLdarg(Index); break; case VarType.Arg: context.Generator.EmitLdarg(Index); break;
case IoType.Flag: EmitLdloc(context, Index, RegisterType.Flag); break; case VarType.Flag: EmitLdloc(context, Index, RegisterType.Flag); break;
case IoType.Int: EmitLdloc(context, Index, RegisterType.Int); break; case VarType.Int: EmitLdloc(context, Index, RegisterType.Int); break;
case IoType.Vector: EmitLdloc(context, Index, RegisterType.Vector); break; case VarType.Vector: EmitLdloc(context, Index, RegisterType.Vector); break;
} }
} }

View file

@ -5,7 +5,7 @@ namespace ChocolArm64.Translation
{ {
struct ILOpCodeLoadField : IILEmit struct ILOpCodeLoadField : IILEmit
{ {
public FieldInfo Info { get; private set; } public FieldInfo Info { get; }
public ILOpCodeLoadField(FieldInfo info) public ILOpCodeLoadField(FieldInfo info)
{ {

View file

@ -7,15 +7,24 @@ namespace ChocolArm64.Translation
{ {
private ILBlock _block; private ILBlock _block;
public ILOpCodeLoadState(ILBlock block) private bool _isSubEntry;
public ILOpCodeLoadState(ILBlock block, bool isSubEntry = false)
{ {
_block = block; _block = block;
_isSubEntry = isSubEntry;
} }
public void Emit(ILMethodBuilder context) public void Emit(ILMethodBuilder context)
{ {
long intInputs = context.LocalAlloc.GetIntInputs(_block); long intInputs = context.RegUsage.GetIntInputs(_block);
long vecInputs = context.LocalAlloc.GetVecInputs(_block); long vecInputs = context.RegUsage.GetVecInputs(_block);
if (Optimizations.AssumeStrictAbiCompliance && context.IsSubComplete)
{
intInputs = RegisterUsage.ClearCallerSavedIntRegs(intInputs, context.IsAarch64);
vecInputs = RegisterUsage.ClearCallerSavedVecRegs(vecInputs, context.IsAarch64);
}
LoadLocals(context, intInputs, RegisterType.Int); LoadLocals(context, intInputs, RegisterType.Int);
LoadLocals(context, vecInputs, RegisterType.Vector); LoadLocals(context, vecInputs, RegisterType.Vector);

View file

@ -2,16 +2,16 @@ namespace ChocolArm64.Translation
{ {
struct ILOpCodeLog : IILEmit struct ILOpCodeLog : IILEmit
{ {
private string _text; public string Text { get; }
public ILOpCodeLog(string text) public ILOpCodeLog(string text)
{ {
_text = text; Text = text;
} }
public void Emit(ILMethodBuilder context) public void Emit(ILMethodBuilder context)
{ {
context.Generator.EmitWriteLine(_text); context.Generator.EmitWriteLine(Text);
} }
} }
} }

View file

@ -5,28 +5,28 @@ namespace ChocolArm64.Translation
{ {
struct ILOpCodeStore : IILEmit struct ILOpCodeStore : IILEmit
{ {
public int Index { get; private set; } public int Index { get; }
public IoType IoType { get; private set; } public VarType VarType { get; }
public RegisterSize RegisterSize { get; private set; } public RegisterSize RegisterSize { get; }
public ILOpCodeStore(int index, IoType ioType, RegisterSize registerSize = 0) public ILOpCodeStore(int index, VarType varType, RegisterSize registerSize = 0)
{ {
Index = index; Index = index;
IoType = ioType; VarType = varType;
RegisterSize = registerSize; RegisterSize = registerSize;
} }
public void Emit(ILMethodBuilder context) public void Emit(ILMethodBuilder context)
{ {
switch (IoType) switch (VarType)
{ {
case IoType.Arg: context.Generator.EmitStarg(Index); break; case VarType.Arg: context.Generator.EmitStarg(Index); break;
case IoType.Flag: EmitStloc(context, Index, RegisterType.Flag); break; case VarType.Flag: EmitStloc(context, Index, RegisterType.Flag); break;
case IoType.Int: EmitStloc(context, Index, RegisterType.Int); break; case VarType.Int: EmitStloc(context, Index, RegisterType.Int); break;
case IoType.Vector: EmitStloc(context, Index, RegisterType.Vector); break; case VarType.Vector: EmitStloc(context, Index, RegisterType.Vector); break;
} }
} }

View file

@ -7,15 +7,33 @@ namespace ChocolArm64.Translation
{ {
private ILBlock _block; private ILBlock _block;
public ILOpCodeStoreState(ILBlock block) private TranslatedSub _callSub;
public ILOpCodeStoreState(ILBlock block, TranslatedSub callSub = null)
{ {
_block = block; _block = block;
_callSub = callSub;
} }
public void Emit(ILMethodBuilder context) public void Emit(ILMethodBuilder context)
{ {
long intOutputs = context.LocalAlloc.GetIntOutputs(_block); long intOutputs = context.RegUsage.GetIntOutputs(_block);
long vecOutputs = context.LocalAlloc.GetVecOutputs(_block); long vecOutputs = context.RegUsage.GetVecOutputs(_block);
if (Optimizations.AssumeStrictAbiCompliance && context.IsSubComplete)
{
intOutputs = RegisterUsage.ClearCallerSavedIntRegs(intOutputs, context.IsAarch64);
vecOutputs = RegisterUsage.ClearCallerSavedVecRegs(vecOutputs, context.IsAarch64);
}
if (_callSub != null)
{
//Those register are assigned on the callee function, without
//reading it's value first. We don't need to write them because
//they are not going to be read on the callee.
intOutputs &= ~_callSub.IntNiRegsMask;
vecOutputs &= ~_callSub.VecNiRegsMask;
}
StoreLocals(context, intOutputs, RegisterType.Int); StoreLocals(context, intOutputs, RegisterType.Int);
StoreLocals(context, vecOutputs, RegisterType.Vector); StoreLocals(context, vecOutputs, RegisterType.Vector);

View file

@ -3,8 +3,13 @@ using System.Collections.Generic;
namespace ChocolArm64.Translation namespace ChocolArm64.Translation
{ {
class LocalAlloc class RegisterUsage
{ {
public const long CallerSavedIntRegistersMask = 0x7fL << 9;
public const long PStateNzcvFlagsMask = 0xfL << 60;
public const long CallerSavedVecRegistersMask = 0xffffL << 16;
private class PathIo private class PathIo
{ {
private Dictionary<ILBlock, long> _allInputs; private Dictionary<ILBlock, long> _allInputs;
@ -18,31 +23,30 @@ namespace ChocolArm64.Translation
_cmnOutputs = new Dictionary<ILBlock, long>(); _cmnOutputs = new Dictionary<ILBlock, long>();
} }
public PathIo(ILBlock root, long inputs, long outputs) : this() public void Set(ILBlock entry, long inputs, long outputs)
{ {
Set(root, inputs, outputs); if (!_allInputs.TryAdd(entry, inputs))
{
_allInputs[entry] |= inputs;
} }
public void Set(ILBlock root, long inputs, long outputs) if (!_cmnOutputs.TryAdd(entry, outputs))
{ {
if (!_allInputs.TryAdd(root, inputs)) _cmnOutputs[entry] &= outputs;
{
_allInputs[root] |= inputs;
}
if (!_cmnOutputs.TryAdd(root, outputs))
{
_cmnOutputs[root] &= outputs;
} }
_allOutputs |= outputs; _allOutputs |= outputs;
} }
public long GetInputs(ILBlock root) public long GetInputs(ILBlock entry)
{ {
if (_allInputs.TryGetValue(root, out long inputs)) if (_allInputs.TryGetValue(entry, out long inputs))
{ {
return inputs | (_allOutputs & ~_cmnOutputs[root]); //We also need to read the registers that may not be written
//by all paths that can reach a exit point, to ensure that
//the local variable will not remain uninitialized depending
//on the flow path taken.
return inputs | (_allOutputs & ~_cmnOutputs[entry]);
} }
return 0; return 0;
@ -57,15 +61,38 @@ namespace ChocolArm64.Translation
private Dictionary<ILBlock, PathIo> _intPaths; private Dictionary<ILBlock, PathIo> _intPaths;
private Dictionary<ILBlock, PathIo> _vecPaths; private Dictionary<ILBlock, PathIo> _vecPaths;
private struct BlockIo private struct BlockIo : IEquatable<BlockIo>
{ {
public ILBlock Block; public ILBlock Block { get; }
public ILBlock Entry; public ILBlock Entry { get; }
public long IntInputs; public long IntInputs { get; set; }
public long VecInputs; public long VecInputs { get; set; }
public long IntOutputs; public long IntOutputs { get; set; }
public long VecOutputs; public long VecOutputs { get; set; }
public BlockIo(ILBlock block, ILBlock entry)
{
Block = block;
Entry = entry;
IntInputs = IntOutputs = 0;
VecInputs = VecOutputs = 0;
}
public BlockIo(
ILBlock block,
ILBlock entry,
long intInputs,
long vecInputs,
long intOutputs,
long vecOutputs) : this(block, entry)
{
IntInputs = intInputs;
VecInputs = vecInputs;
IntOutputs = intOutputs;
VecOutputs = vecOutputs;
}
public override bool Equals(object obj) public override bool Equals(object obj)
{ {
@ -74,6 +101,11 @@ namespace ChocolArm64.Translation
return false; return false;
} }
return Equals(other);
}
public bool Equals(BlockIo other)
{
return other.Block == Block && return other.Block == Block &&
other.Entry == Entry && other.Entry == Entry &&
other.IntInputs == IntInputs && other.IntInputs == IntInputs &&
@ -98,25 +130,13 @@ namespace ChocolArm64.Translation
} }
} }
private const int MaxOptGraphLength = 40; public RegisterUsage()
public LocalAlloc(ILBlock[] graph, ILBlock entry)
{ {
_intPaths = new Dictionary<ILBlock, PathIo>(); _intPaths = new Dictionary<ILBlock, PathIo>();
_vecPaths = new Dictionary<ILBlock, PathIo>(); _vecPaths = new Dictionary<ILBlock, PathIo>();
if (graph.Length > 1 &&
graph.Length < MaxOptGraphLength)
{
InitializeOptimal(graph, entry);
}
else
{
InitializeFast(graph);
}
} }
private void InitializeOptimal(ILBlock[] graph, ILBlock entry) public void BuildUses(ILBlock entry)
{ {
//This will go through all possible paths on the graph, //This will go through all possible paths on the graph,
//and store all inputs/outputs for each block. A register //and store all inputs/outputs for each block. A register
@ -124,7 +144,7 @@ namespace ChocolArm64.Translation
//When a block can be reached by more than one path, then the //When a block can be reached by more than one path, then the
//output from all paths needs to be set for this block, and //output from all paths needs to be set for this block, and
//only outputs present in all of the parent blocks can be considered //only outputs present in all of the parent blocks can be considered
//when doing input elimination. Each block chain have a entry, that's where //when doing input elimination. Each block chain has a entry, that's where
//the code starts executing. They are present on the subroutine start point, //the code starts executing. They are present on the subroutine start point,
//and on call return points too (address written to X30 by BL). //and on call return points too (address written to X30 by BL).
HashSet<BlockIo> visited = new HashSet<BlockIo>(); HashSet<BlockIo> visited = new HashSet<BlockIo>();
@ -133,19 +153,13 @@ namespace ChocolArm64.Translation
void Enqueue(BlockIo block) void Enqueue(BlockIo block)
{ {
if (!visited.Contains(block)) if (visited.Add(block))
{ {
unvisited.Enqueue(block); unvisited.Enqueue(block);
visited.Add(block);
} }
} }
Enqueue(new BlockIo() Enqueue(new BlockIo(entry, entry));
{
Block = entry,
Entry = entry
});
while (unvisited.Count > 0) while (unvisited.Count > 0)
{ {
@ -177,19 +191,21 @@ namespace ChocolArm64.Translation
void EnqueueFromCurrent(ILBlock block, bool retTarget) void EnqueueFromCurrent(ILBlock block, bool retTarget)
{ {
BlockIo blockIo = new BlockIo() { Block = block }; BlockIo blockIo;
if (retTarget) if (retTarget)
{ {
blockIo.Entry = block; blockIo = new BlockIo(block, block);
} }
else else
{ {
blockIo.Entry = current.Entry; blockIo = new BlockIo(
blockIo.IntInputs = current.IntInputs; block,
blockIo.VecInputs = current.VecInputs; current.Entry,
blockIo.IntOutputs = current.IntOutputs; current.IntInputs,
blockIo.VecOutputs = current.VecOutputs; current.VecInputs,
current.IntOutputs,
current.VecOutputs);
} }
Enqueue(blockIo); Enqueue(blockIo);
@ -207,54 +223,63 @@ namespace ChocolArm64.Translation
} }
} }
private void InitializeFast(ILBlock[] graph) public long GetIntInputs(ILBlock entry) => GetInputsImpl(entry, _intPaths.Values);
{ public long GetVecInputs(ILBlock entry) => GetInputsImpl(entry, _vecPaths.Values);
//This is WAY faster than InitializeOptimal, but results in
//unneeded loads and stores, so the resulting code will be slower.
long intInputs = 0, intOutputs = 0;
long vecInputs = 0, vecOutputs = 0;
foreach (ILBlock block in graph) private long GetInputsImpl(ILBlock entry, IEnumerable<PathIo> values)
{
intInputs |= block.IntInputs;
intOutputs |= block.IntOutputs;
vecInputs |= block.VecInputs;
vecOutputs |= block.VecOutputs;
}
//It's possible that not all code paths writes to those output registers,
//in those cases if we attempt to write an output registers that was
//not written, we will be just writing zero and messing up the old register value.
//So we just need to ensure that all outputs are loaded.
if (graph.Length > 1)
{
intInputs |= intOutputs;
vecInputs |= vecOutputs;
}
foreach (ILBlock block in graph)
{
_intPaths.Add(block, new PathIo(block, intInputs, intOutputs));
_vecPaths.Add(block, new PathIo(block, vecInputs, vecOutputs));
}
}
public long GetIntInputs(ILBlock root) => GetInputsImpl(root, _intPaths.Values);
public long GetVecInputs(ILBlock root) => GetInputsImpl(root, _vecPaths.Values);
private long GetInputsImpl(ILBlock root, IEnumerable<PathIo> values)
{ {
long inputs = 0; long inputs = 0;
foreach (PathIo path in values) foreach (PathIo path in values)
{ {
inputs |= path.GetInputs(root); inputs |= path.GetInputs(entry);
} }
return inputs; return inputs;
} }
public long GetIntNotInputs(ILBlock entry) => GetNotInputsImpl(entry, _intPaths.Values);
public long GetVecNotInputs(ILBlock entry) => GetNotInputsImpl(entry, _vecPaths.Values);
private long GetNotInputsImpl(ILBlock entry, IEnumerable<PathIo> values)
{
//Returns a mask with registers that are written to
//before being read. Only those registers that are
//written in all paths, and is not read before being
//written to on those paths, should be set on the mask.
long mask = -1L;
foreach (PathIo path in values)
{
mask &= path.GetOutputs() & ~path.GetInputs(entry);
}
return mask;
}
public long GetIntOutputs(ILBlock block) => _intPaths[block].GetOutputs(); public long GetIntOutputs(ILBlock block) => _intPaths[block].GetOutputs();
public long GetVecOutputs(ILBlock block) => _vecPaths[block].GetOutputs(); public long GetVecOutputs(ILBlock block) => _vecPaths[block].GetOutputs();
public static long ClearCallerSavedIntRegs(long mask, bool isAarch64)
{
//TODO: ARM32 support.
if (isAarch64)
{
mask &= ~(CallerSavedIntRegistersMask | PStateNzcvFlagsMask);
}
return mask;
}
public static long ClearCallerSavedVecRegs(long mask, bool isAarch64)
{
//TODO: ARM32 support.
if (isAarch64)
{
mask &= ~CallerSavedVecRegistersMask;
}
return mask;
}
} }
} }

View file

@ -10,20 +10,40 @@ namespace ChocolArm64.Translation
class TranslatedSub class TranslatedSub
{ {
//This is the minimum amount of calls needed for the method
//to be retranslated with higher quality code. It's only worth
//doing that for hot code.
private const int MinCallCountForOpt = 30;
public ArmSubroutine Delegate { get; private set; } public ArmSubroutine Delegate { get; private set; }
public static int StateArgIdx { get; private set; } public static int StateArgIdx { get; }
public static int MemoryArgIdx { get; private set; } public static int MemoryArgIdx { get; }
public static Type[] FixedArgTypes { get; private set; } public static Type[] FixedArgTypes { get; }
public DynamicMethod Method { get; private set; } public DynamicMethod Method { get; }
public TranslationTier Tier { get; private set; } public TranslationTier Tier { get; }
public TranslatedSub(DynamicMethod method, TranslationTier tier) public long IntNiRegsMask { get; }
public long VecNiRegsMask { get; }
private bool _isWorthOptimizing;
private int _callCount;
public TranslatedSub(
DynamicMethod method,
long intNiRegsMask,
long vecNiRegsMask,
TranslationTier tier,
bool isWorthOptimizing)
{ {
Method = method ?? throw new ArgumentNullException(nameof(method));; Method = method ?? throw new ArgumentNullException(nameof(method));;
IntNiRegsMask = intNiRegsMask;
VecNiRegsMask = vecNiRegsMask;
_isWorthOptimizing = isWorthOptimizing;
Tier = tier; Tier = tier;
} }
@ -61,5 +81,24 @@ namespace ChocolArm64.Translation
{ {
return Delegate(threadState, memory); return Delegate(threadState, memory);
} }
public bool IsWorthOptimizing()
{
if (!_isWorthOptimizing)
{
return false;
}
if (_callCount++ < MinCallCountForOpt)
{
return false;
}
//Only return true once, so that it is
//added to the queue only once.
_isWorthOptimizing = false;
return true;
}
} }
} }

View file

@ -63,48 +63,36 @@ namespace ChocolArm64.Translation
CpuTrace?.Invoke(this, new CpuTraceEventArgs(position)); CpuTrace?.Invoke(this, new CpuTraceEventArgs(position));
} }
TranslatedSub subroutine = GetOrTranslateSubroutine(state, position); if (!_cache.TryGetSubroutine(position, out TranslatedSub sub))
{
sub = TranslateLowCq(position, state.GetExecutionMode());
}
position = subroutine.Execute(state, _memory); position = sub.Execute(state, _memory);
} }
while (position != 0 && state.Running); while (position != 0 && state.Running);
state.CurrentTranslator = null; state.CurrentTranslator = null;
} }
internal void TranslateVirtualSubroutine(CpuThreadState state, long position) internal ArmSubroutine GetOrTranslateSubroutine(CpuThreadState state, long position, CallType cs)
{
if (!_cache.TryGetSubroutine(position, out TranslatedSub sub) || sub.Tier == TranslationTier.Tier0)
{
_queue.Enqueue(new TranslatorQueueItem(position, state.GetExecutionMode(), TranslationTier.Tier1));
}
}
internal ArmSubroutine GetOrTranslateVirtualSubroutine(CpuThreadState state, long position)
{ {
if (!_cache.TryGetSubroutine(position, out TranslatedSub sub)) if (!_cache.TryGetSubroutine(position, out TranslatedSub sub))
{ {
sub = TranslateLowCq(position, state.GetExecutionMode()); sub = TranslateLowCq(position, state.GetExecutionMode());
} }
if (sub.Tier == TranslationTier.Tier0) if (sub.IsWorthOptimizing())
{ {
_queue.Enqueue(new TranslatorQueueItem(position, state.GetExecutionMode(), TranslationTier.Tier1)); bool isComplete = cs == CallType.Call ||
cs == CallType.VirtualCall;
_queue.Enqueue(position, state.GetExecutionMode(), TranslationTier.Tier1, isComplete);
} }
return sub.Delegate; return sub.Delegate;
} }
internal TranslatedSub GetOrTranslateSubroutine(CpuThreadState state, long position)
{
if (!_cache.TryGetSubroutine(position, out TranslatedSub subroutine))
{
subroutine = TranslateLowCq(position, state.GetExecutionMode());
}
return subroutine;
}
private void TranslateQueuedSubs() private void TranslateQueuedSubs()
{ {
while (_threadCount != 0) while (_threadCount != 0)
@ -124,7 +112,7 @@ namespace ChocolArm64.Translation
} }
else else
{ {
TranslateHighCq(item.Position, item.Mode); TranslateHighCq(item.Position, item.Mode, item.IsComplete);
} }
} }
else else
@ -142,14 +130,16 @@ namespace ChocolArm64.Translation
string subName = GetSubroutineName(position); string subName = GetSubroutineName(position);
ILMethodBuilder ilMthdBuilder = new ILMethodBuilder(context.GetILBlocks(), subName); bool isAarch64 = mode == ExecutionMode.Aarch64;
TranslatedSub subroutine = ilMthdBuilder.GetSubroutine(TranslationTier.Tier0); ILMethodBuilder ilMthdBuilder = new ILMethodBuilder(context.GetILBlocks(), subName, isAarch64);
TranslatedSub subroutine = ilMthdBuilder.GetSubroutine(TranslationTier.Tier0, isWorthOptimizing: true);
return _cache.GetOrAdd(position, subroutine, block.OpCodes.Count); return _cache.GetOrAdd(position, subroutine, block.OpCodes.Count);
} }
private void TranslateHighCq(long position, ExecutionMode mode) private TranslatedSub TranslateHighCq(long position, ExecutionMode mode, bool isComplete)
{ {
Block graph = Decoder.DecodeSubroutine(_memory, position, mode); Block graph = Decoder.DecodeSubroutine(_memory, position, mode);
@ -159,9 +149,13 @@ namespace ChocolArm64.Translation
string subName = GetSubroutineName(position); string subName = GetSubroutineName(position);
ILMethodBuilder ilMthdBuilder = new ILMethodBuilder(ilBlocks, subName); bool isAarch64 = mode == ExecutionMode.Aarch64;
TranslatedSub subroutine = ilMthdBuilder.GetSubroutine(TranslationTier.Tier1); isComplete &= !context.HasIndirectJump;
ILMethodBuilder ilMthdBuilder = new ILMethodBuilder(ilBlocks, subName, isAarch64, isComplete);
TranslatedSub subroutine = ilMthdBuilder.GetSubroutine(TranslationTier.Tier1, context.HasSlowCall);
int ilOpCount = 0; int ilOpCount = 0;
@ -170,9 +164,11 @@ namespace ChocolArm64.Translation
ilOpCount += ilBlock.Count; ilOpCount += ilBlock.Count;
} }
ForceAheadOfTimeCompilation(subroutine);
_cache.AddOrUpdate(position, subroutine, ilOpCount); _cache.AddOrUpdate(position, subroutine, ilOpCount);
ForceAheadOfTimeCompilation(subroutine); return subroutine;
} }
private string GetSubroutineName(long position) private string GetSubroutineName(long position)

View file

@ -1,3 +1,4 @@
using ChocolArm64.State;
using System.Collections.Concurrent; using System.Collections.Concurrent;
using System.Threading; using System.Threading;
@ -5,10 +6,6 @@ namespace ChocolArm64.Translation
{ {
class TranslatorQueue class TranslatorQueue
{ {
//This is the maximum number of functions to be translated that the queue can hold.
//The value may need some tuning to find the sweet spot.
private const int MaxQueueSize = 1024;
private ConcurrentStack<TranslatorQueueItem>[] _translationQueue; private ConcurrentStack<TranslatorQueueItem>[] _translationQueue;
private ManualResetEvent _queueDataReceivedEvent; private ManualResetEvent _queueDataReceivedEvent;
@ -27,14 +24,11 @@ namespace ChocolArm64.Translation
_queueDataReceivedEvent = new ManualResetEvent(false); _queueDataReceivedEvent = new ManualResetEvent(false);
} }
public void Enqueue(TranslatorQueueItem item) public void Enqueue(long position, ExecutionMode mode, TranslationTier tier, bool isComplete)
{ {
ConcurrentStack<TranslatorQueueItem> queue = _translationQueue[(int)item.Tier]; TranslatorQueueItem item = new TranslatorQueueItem(position, mode, tier, isComplete);
if (queue.Count >= MaxQueueSize) ConcurrentStack<TranslatorQueueItem> queue = _translationQueue[(int)tier];
{
queue.TryPop(out _);
}
queue.Push(item); queue.Push(item);

View file

@ -10,11 +10,18 @@ namespace ChocolArm64.Translation
public TranslationTier Tier { get; } public TranslationTier Tier { get; }
public TranslatorQueueItem(long position, ExecutionMode mode, TranslationTier tier) public bool IsComplete { get; }
public TranslatorQueueItem(
long position,
ExecutionMode mode,
TranslationTier tier,
bool isComplete = false)
{ {
Position = position; Position = position;
Mode = mode; Mode = mode;
Tier = tier; Tier = tier;
IsComplete = isComplete;
} }
} }
} }

View file

@ -1,6 +1,6 @@
namespace ChocolArm64.Translation namespace ChocolArm64.Translation
{ {
enum IoType enum VarType
{ {
Arg, Arg,
Flag, Flag,

View file

@ -29,18 +29,21 @@
// System Language list: https://gist.github.com/HorrorTroll/b6e4a88d774c3c9b3bdf54d79a7ca43b // System Language list: https://gist.github.com/HorrorTroll/b6e4a88d774c3c9b3bdf54d79a7ca43b
"system_language": "AmericanEnglish", "system_language": "AmericanEnglish",
// Enable or Disable Docked Mode // Enable or disable Docked Mode
"docked_mode": false, "docked_mode": false,
// Enable or Disable Game Vsync // Enable or disable Game Vsync
"enable_vsync": true, "enable_vsync": true,
// Enable or Disable Multi-core scheduling of threads // Enable or disable Multi-core scheduling of threads
"enable_multicore_scheduling": true, "enable_multicore_scheduling": true,
// Enable integrity checks on Switch content files // Enable integrity checks on Switch content files
"enable_fs_integrity_checks": true, "enable_fs_integrity_checks": true,
// Enable or disable aggressive CPU optimizations
"enable_aggressive_cpu_opts": true,
// The primary controller's type // The primary controller's type
// Supported Values: Handheld, ProController, NpadPair, NpadLeft, NpadRight // Supported Values: Handheld, ProController, NpadPair, NpadLeft, NpadRight
"controller_type": "Handheld", "controller_type": "Handheld",

View file

@ -86,6 +86,11 @@ namespace Ryujinx
/// </summary> /// </summary>
public bool EnableFsIntegrityChecks { get; private set; } public bool EnableFsIntegrityChecks { get; private set; }
/// <summary>
/// Enable or Disable aggressive CPU optimizations
/// </summary>
public bool EnableAggressiveCpuOpts { get; private set; }
/// <summary> /// <summary>
/// The primary controller's type /// The primary controller's type
/// </summary> /// </summary>
@ -197,6 +202,11 @@ namespace Ryujinx
? IntegrityCheckLevel.ErrorOnInvalid ? IntegrityCheckLevel.ErrorOnInvalid
: IntegrityCheckLevel.None; : IntegrityCheckLevel.None;
if (Instance.EnableAggressiveCpuOpts)
{
Optimizations.AssumeStrictAbiCompliance = true;
}
if(Instance.GamepadControls.Enabled) if(Instance.GamepadControls.Enabled)
{ {
if (GamePad.GetName(Instance.GamepadControls.Index) == "Unmapped Controller") if (GamePad.GetName(Instance.GamepadControls.Index) == "Unmapped Controller")

View file

@ -17,6 +17,7 @@
"enable_vsync", "enable_vsync",
"enable_multicore_scheduling", "enable_multicore_scheduling",
"enable_fs_integrity_checks", "enable_fs_integrity_checks",
"enable_aggressive_cpu_opts",
"controller_type", "controller_type",
"keyboard_controls", "keyboard_controls",
"gamepad_controls" "gamepad_controls"
@ -399,6 +400,17 @@
false false
] ]
}, },
"enable_aggressive_cpu_opts": {
"$id": "#/properties/enable_aggressive_cpu_opts",
"type": "boolean",
"title": "Enable Aggressive CPU Optimizations",
"description": "Enable or disable aggressive CPU optimizations",
"default": true,
"examples": [
true,
false
]
},
"controller_type": { "controller_type": {
"$id": "#/properties/controller_type", "$id": "#/properties/controller_type",
"type": "string", "type": "string",