From 60db4c353099e8656a330ede03fdbe57a421fa47 Mon Sep 17 00:00:00 2001 From: gdkchan Date: Sun, 2 Aug 2020 22:36:57 -0300 Subject: [PATCH] Implement a Macro JIT (#1445) * Implement a Macro JIT * Nit: space --- .../Engine/GPFifo/GPFifoClass.cs | 20 +- .../Engine/GPFifo/GPFifoProcessor.cs | 17 +- .../Engine/MME/AluOperation.cs | 15 + .../Engine/MME/AluRegOperation.cs | 18 + .../Engine/MME/AssignmentOperation.cs | 17 + Ryujinx.Graphics.Gpu/Engine/MME/IMacroEE.cs | 25 + Ryujinx.Graphics.Gpu/Engine/MME/Macro.cs | 20 +- .../{ => Engine/MME}/MacroInterpreter.cs | 174 ++---- Ryujinx.Graphics.Gpu/Engine/MME/MacroJit.cs | 39 ++ .../Engine/MME/MacroJitCompiler.cs | 516 ++++++++++++++++++ .../Engine/MME/MacroJitContext.cs | 57 ++ Ryujinx.Graphics.Gpu/GraphicsConfig.cs | 5 + Ryujinx.Graphics.Gpu/State/GpuState.cs | 10 +- 13 files changed, 774 insertions(+), 159 deletions(-) create mode 100644 Ryujinx.Graphics.Gpu/Engine/MME/AluOperation.cs create mode 100644 Ryujinx.Graphics.Gpu/Engine/MME/AluRegOperation.cs create mode 100644 Ryujinx.Graphics.Gpu/Engine/MME/AssignmentOperation.cs create mode 100644 Ryujinx.Graphics.Gpu/Engine/MME/IMacroEE.cs rename Ryujinx.Graphics.Gpu/{ => Engine/MME}/MacroInterpreter.cs (73%) create mode 100644 Ryujinx.Graphics.Gpu/Engine/MME/MacroJit.cs create mode 100644 Ryujinx.Graphics.Gpu/Engine/MME/MacroJitCompiler.cs create mode 100644 Ryujinx.Graphics.Gpu/Engine/MME/MacroJitContext.cs diff --git a/Ryujinx.Graphics.Gpu/Engine/GPFifo/GPFifoClass.cs b/Ryujinx.Graphics.Gpu/Engine/GPFifo/GPFifoClass.cs index 958253ec..0e87aa3d 100644 --- a/Ryujinx.Graphics.Gpu/Engine/GPFifo/GPFifoClass.cs +++ b/Ryujinx.Graphics.Gpu/Engine/GPFifo/GPFifoClass.cs @@ -13,6 +13,7 @@ namespace Ryujinx.Graphics.Gpu.Engine.GPFifo class GPFifoClass : IDeviceState { private readonly GpuContext _context; + private readonly GPFifoProcessor _parent; private readonly DeviceState _state; private const int MacrosCount = 0x80; @@ -24,18 +25,15 @@ namespace Ryujinx.Graphics.Gpu.Engine.GPFifo private readonly Macro[] _macros; private readonly int[] _macroCode; - /// - /// MME Shadow RAM Control. - /// - public ShadowRamControl ShadowCtrl { get; private set; } - /// /// Creates a new instance of the GPU General Purpose FIFO class. /// /// GPU context - public GPFifoClass(GpuContext context) + /// Parent GPU General Purpose FIFO processor + public GPFifoClass(GpuContext context, GPFifoProcessor parent) { _context = context; + _parent = parent; _state = new DeviceState(new Dictionary { { nameof(GPFifoClassState.Semaphored), new RwCallback(Semaphored, null) }, @@ -155,7 +153,7 @@ namespace Ryujinx.Graphics.Gpu.Engine.GPFifo } /// - /// Send macro code/data to the MME + /// Sends macro code/data to the MME. /// /// Method call argument public void LoadMmeInstructionRam(int argument) @@ -164,7 +162,7 @@ namespace Ryujinx.Graphics.Gpu.Engine.GPFifo } /// - /// Bind a macro index to a position for the MME + /// Binds a macro index to a position for the MME /// /// Method call argument public void LoadMmeStartAddressRam(int argument) @@ -173,12 +171,12 @@ namespace Ryujinx.Graphics.Gpu.Engine.GPFifo } /// - /// Change the shadow RAM setting + /// Changes the shadow RAM control. /// /// Method call argument public void SetMmeShadowRamControl(int argument) { - ShadowCtrl = (ShadowRamControl)argument; + _parent.SetShadowRamControl((ShadowRamControl)argument); } /// @@ -208,7 +206,7 @@ namespace Ryujinx.Graphics.Gpu.Engine.GPFifo /// Current GPU state public void CallMme(int index, GpuState state) { - _macros[index].Execute(_macroCode, ShadowCtrl, state); + _macros[index].Execute(_macroCode, state); } } } diff --git a/Ryujinx.Graphics.Gpu/Engine/GPFifo/GPFifoProcessor.cs b/Ryujinx.Graphics.Gpu/Engine/GPFifo/GPFifoProcessor.cs index 115361f3..32fd8b73 100644 --- a/Ryujinx.Graphics.Gpu/Engine/GPFifo/GPFifoProcessor.cs +++ b/Ryujinx.Graphics.Gpu/Engine/GPFifo/GPFifoProcessor.cs @@ -39,8 +39,7 @@ namespace Ryujinx.Graphics.Gpu.Engine.GPFifo { _context = context; - _fifoClass = new GPFifoClass(context); - + _fifoClass = new GPFifoClass(context, this); _subChannels = new GpuState[8]; for (int index = 0; index < _subChannels.Length; index++) @@ -152,7 +151,7 @@ namespace Ryujinx.Graphics.Gpu.Engine.GPFifo } else if (meth.Method < 0xe00) { - _subChannels[meth.SubChannel].CallMethod(meth, _fifoClass.ShadowCtrl); + _subChannels[meth.SubChannel].CallMethod(meth); } else { @@ -175,5 +174,17 @@ namespace Ryujinx.Graphics.Gpu.Engine.GPFifo } } } + + /// + /// Sets the shadow ram control value of all sub-channels. + /// + /// New shadow ram control value + public void SetShadowRamControl(ShadowRamControl control) + { + for (int i = 0; i < _subChannels.Length; i++) + { + _subChannels[i].ShadowRamControl = control; + } + } } } diff --git a/Ryujinx.Graphics.Gpu/Engine/MME/AluOperation.cs b/Ryujinx.Graphics.Gpu/Engine/MME/AluOperation.cs new file mode 100644 index 00000000..eeef9c67 --- /dev/null +++ b/Ryujinx.Graphics.Gpu/Engine/MME/AluOperation.cs @@ -0,0 +1,15 @@ +namespace Ryujinx.Graphics.Gpu.Engine.MME +{ + /// + /// GPU Macro Arithmetic and Logic unit operation. + /// + enum AluOperation + { + AluReg = 0, + AddImmediate = 1, + BitfieldReplace = 2, + BitfieldExtractLslImm = 3, + BitfieldExtractLslReg = 4, + ReadImmediate = 5 + } +} diff --git a/Ryujinx.Graphics.Gpu/Engine/MME/AluRegOperation.cs b/Ryujinx.Graphics.Gpu/Engine/MME/AluRegOperation.cs new file mode 100644 index 00000000..f3e05d38 --- /dev/null +++ b/Ryujinx.Graphics.Gpu/Engine/MME/AluRegOperation.cs @@ -0,0 +1,18 @@ +namespace Ryujinx.Graphics.Gpu.Engine.MME +{ + /// + /// GPU Macro Arithmetic and Logic unit binary register-to-register operation. + /// + enum AluRegOperation + { + Add = 0, + AddWithCarry = 1, + Subtract = 2, + SubtractWithBorrow = 3, + BitwiseExclusiveOr = 8, + BitwiseOr = 9, + BitwiseAnd = 10, + BitwiseAndNot = 11, + BitwiseNotAnd = 12 + } +} diff --git a/Ryujinx.Graphics.Gpu/Engine/MME/AssignmentOperation.cs b/Ryujinx.Graphics.Gpu/Engine/MME/AssignmentOperation.cs new file mode 100644 index 00000000..dc336026 --- /dev/null +++ b/Ryujinx.Graphics.Gpu/Engine/MME/AssignmentOperation.cs @@ -0,0 +1,17 @@ +namespace Ryujinx.Graphics.Gpu.Engine.MME +{ + /// + /// GPU Macro assignment operation. + /// + enum AssignmentOperation + { + IgnoreAndFetch = 0, + Move = 1, + MoveAndSetMaddr = 2, + FetchAndSend = 3, + MoveAndSend = 4, + FetchAndSetMaddr = 5, + MoveAndSetMaddrThenFetchAndSend = 6, + MoveAndSetMaddrThenSendHigh = 7 + } +} diff --git a/Ryujinx.Graphics.Gpu/Engine/MME/IMacroEE.cs b/Ryujinx.Graphics.Gpu/Engine/MME/IMacroEE.cs new file mode 100644 index 00000000..b056ecc8 --- /dev/null +++ b/Ryujinx.Graphics.Gpu/Engine/MME/IMacroEE.cs @@ -0,0 +1,25 @@ +using Ryujinx.Graphics.Gpu.State; +using System; +using System.Collections.Generic; + +namespace Ryujinx.Graphics.Gpu.Engine.MME +{ + /// + /// Macro Execution Engine interface. + /// + interface IMacroEE + { + /// + /// Arguments FIFO. + /// + public Queue Fifo { get; } + + /// + /// Should execute the GPU Macro code being passed. + /// + /// Code to be executed + /// GPU state at the time of the call + /// First argument to be passed to the GPU Macro + void Execute(ReadOnlySpan code, GpuState state, int arg0); + } +} diff --git a/Ryujinx.Graphics.Gpu/Engine/MME/Macro.cs b/Ryujinx.Graphics.Gpu/Engine/MME/Macro.cs index 10127d11..9847f4c0 100644 --- a/Ryujinx.Graphics.Gpu/Engine/MME/Macro.cs +++ b/Ryujinx.Graphics.Gpu/Engine/MME/Macro.cs @@ -1,4 +1,5 @@ using Ryujinx.Graphics.Gpu.State; +using System; namespace Ryujinx.Graphics.Gpu.Engine.MME { @@ -15,7 +16,7 @@ namespace Ryujinx.Graphics.Gpu.Engine.MME private bool _executionPending; private int _argument; - private readonly MacroInterpreter _interpreter; + private readonly IMacroEE _executionEngine; /// /// Creates a new instance of the GPU cached macro program. @@ -28,7 +29,14 @@ namespace Ryujinx.Graphics.Gpu.Engine.MME _executionPending = false; _argument = 0; - _interpreter = new MacroInterpreter(); + if (GraphicsConfig.EnableMacroJit) + { + _executionEngine = new MacroJit(); + } + else + { + _executionEngine = new MacroInterpreter(); + } } /// @@ -45,15 +53,15 @@ namespace Ryujinx.Graphics.Gpu.Engine.MME /// /// Starts executing the macro program code. /// - /// Program code + /// Program code /// Current GPU state - public void Execute(int[] mme, ShadowRamControl shadowCtrl, GpuState state) + public void Execute(ReadOnlySpan code, GpuState state) { if (_executionPending) { _executionPending = false; - _interpreter?.Execute(mme, Position, _argument, shadowCtrl, state); + _executionEngine?.Execute(code.Slice(Position), state, _argument); } } @@ -63,7 +71,7 @@ namespace Ryujinx.Graphics.Gpu.Engine.MME /// Argument to be pushed public void PushArgument(int argument) { - _interpreter?.Fifo.Enqueue(argument); + _executionEngine?.Fifo.Enqueue(argument); } } } diff --git a/Ryujinx.Graphics.Gpu/MacroInterpreter.cs b/Ryujinx.Graphics.Gpu/Engine/MME/MacroInterpreter.cs similarity index 73% rename from Ryujinx.Graphics.Gpu/MacroInterpreter.cs rename to Ryujinx.Graphics.Gpu/Engine/MME/MacroInterpreter.cs index 8d2d6202..a48b263a 100644 --- a/Ryujinx.Graphics.Gpu/MacroInterpreter.cs +++ b/Ryujinx.Graphics.Gpu/Engine/MME/MacroInterpreter.cs @@ -3,48 +3,16 @@ using Ryujinx.Graphics.Gpu.State; using System; using System.Collections.Generic; -namespace Ryujinx.Graphics.Gpu +namespace Ryujinx.Graphics.Gpu.Engine.MME { /// /// Macro code interpreter. /// - class MacroInterpreter + class MacroInterpreter : IMacroEE { - private enum AssignmentOperation - { - IgnoreAndFetch = 0, - Move = 1, - MoveAndSetMaddr = 2, - FetchAndSend = 3, - MoveAndSend = 4, - FetchAndSetMaddr = 5, - MoveAndSetMaddrThenFetchAndSend = 6, - MoveAndSetMaddrThenSendHigh = 7 - } - - private enum AluOperation - { - AluReg = 0, - AddImmediate = 1, - BitfieldReplace = 2, - BitfieldExtractLslImm = 3, - BitfieldExtractLslReg = 4, - ReadImmediate = 5 - } - - private enum AluRegOperation - { - Add = 0, - AddWithCarry = 1, - Subtract = 2, - SubtractWithBorrow = 3, - BitwiseExclusiveOr = 8, - BitwiseOr = 9, - BitwiseAnd = 10, - BitwiseAndNot = 11, - BitwiseNotAnd = 12 - } - + /// + /// Arguments FIFO. + /// public Queue Fifo { get; } private int[] _gprs; @@ -55,15 +23,12 @@ namespace Ryujinx.Graphics.Gpu private bool _carry; private int _opCode; - private int _pipeOp; private bool _ignoreExitFlag; private int _pc; - private ShadowRamControl _shadowCtrl; - /// /// Creates a new instance of the macro code interpreter. /// @@ -77,28 +42,24 @@ namespace Ryujinx.Graphics.Gpu /// /// Executes a macro program until it exits. /// - /// Code of the program to execute - /// Start position to execute - /// Optional argument passed to the program, 0 if not used - /// Shadow RAM control register value + /// Code of the program to execute /// Current GPU state - public void Execute(int[] mme, int position, int param, ShadowRamControl shadowCtrl, GpuState state) + /// Optional argument passed to the program, 0 if not used + public void Execute(ReadOnlySpan code, GpuState state, int arg0) { Reset(); - _gprs[1] = param; + _gprs[1] = arg0; - _pc = position; + _pc = 0; - _shadowCtrl = shadowCtrl; + FetchOpCode(code); - FetchOpCode(mme); - - while (Step(mme, state)); + while (Step(code, state)) ; // Due to the delay slot, we still need to execute // one more instruction before we actually exit. - Step(mme, state); + Step(code, state); } /// @@ -121,14 +82,14 @@ namespace Ryujinx.Graphics.Gpu /// /// Executes a single instruction of the program. /// - /// Program code to execute + /// Program code to execute /// Current GPU state /// True to continue execution, false if the program exited - private bool Step(int[] mme, GpuState state) + private bool Step(ReadOnlySpan code, GpuState state) { int baseAddr = _pc - 1; - FetchOpCode(mme); + FetchOpCode(code); if ((_opCode & 7) < 7) { @@ -141,83 +102,44 @@ namespace Ryujinx.Graphics.Gpu { // Fetch parameter and ignore result. case AssignmentOperation.IgnoreAndFetch: - { SetDstGpr(FetchParam()); - break; - } - // Move result. case AssignmentOperation.Move: - { SetDstGpr(result); - break; - } - // Move result and use as Method Address. case AssignmentOperation.MoveAndSetMaddr: - { SetDstGpr(result); - SetMethAddr(result); - break; - } - // Fetch parameter and send result. case AssignmentOperation.FetchAndSend: - { SetDstGpr(FetchParam()); - Send(state, result); - break; - } - // Move and send result. case AssignmentOperation.MoveAndSend: - { SetDstGpr(result); - Send(state, result); - break; - } - // Fetch parameter and use result as Method Address. case AssignmentOperation.FetchAndSetMaddr: - { SetDstGpr(FetchParam()); - SetMethAddr(result); - break; - } - // Move result and use as Method Address, then fetch and send parameter. case AssignmentOperation.MoveAndSetMaddrThenFetchAndSend: - { SetDstGpr(result); - SetMethAddr(result); - Send(state, FetchParam()); - break; - } - // Move result and use as Method Address, then send bits 17:12 of result. case AssignmentOperation.MoveAndSetMaddrThenSendHigh: - { SetDstGpr(result); - SetMethAddr(result); - Send(state, (result >> 12) & 0x3f); - break; - } } } else @@ -237,7 +159,7 @@ namespace Ryujinx.Graphics.Gpu if (noDelays) { - FetchOpCode(mme); + FetchOpCode(code); } else { @@ -259,11 +181,11 @@ namespace Ryujinx.Graphics.Gpu /// /// Fetches a single operation code from the program code. /// - /// Program code - private void FetchOpCode(int[] mme) + /// Program code + private void FetchOpCode(ReadOnlySpan code) { _opCode = _pipeOp; - _pipeOp = mme[_pc++]; + _pipeOp = code[_pc++]; } /// @@ -278,23 +200,16 @@ namespace Ryujinx.Graphics.Gpu switch (op) { case AluOperation.AluReg: - { - AluRegOperation aluOp = (AluRegOperation)((_opCode >> 17) & 0x1f); - - return GetAluResult(aluOp, GetGprA(), GetGprB()); - } + return GetAluResult((AluRegOperation)((_opCode >> 17) & 0x1f), GetGprA(), GetGprB()); case AluOperation.AddImmediate: - { return GetGprA() + GetImm(); - } case AluOperation.BitfieldReplace: case AluOperation.BitfieldExtractLslImm: case AluOperation.BitfieldExtractLslReg: - { int bfSrcBit = (_opCode >> 17) & 0x1f; - int bfSize = (_opCode >> 22) & 0x1f; + int bfSize = (_opCode >> 22) & 0x1f; int bfDstBit = (_opCode >> 27) & 0x1f; int bfMask = (1 << bfSize) - 1; @@ -305,7 +220,6 @@ namespace Ryujinx.Graphics.Gpu switch (op) { case AluOperation.BitfieldReplace: - { src = (int)((uint)src >> bfSrcBit) & bfMask; dst &= ~(bfMask << bfDstBit); @@ -313,33 +227,25 @@ namespace Ryujinx.Graphics.Gpu dst |= src << bfDstBit; return dst; - } case AluOperation.BitfieldExtractLslImm: - { src = (int)((uint)src >> dst) & bfMask; return src << bfDstBit; - } case AluOperation.BitfieldExtractLslReg: - { src = (int)((uint)src >> bfSrcBit) & bfMask; return src << dst; - } } break; - } case AluOperation.ReadImmediate: - { return Read(state, GetGprA() + GetImm()); - } } - throw new ArgumentException(nameof(_opCode)); + throw new InvalidOperationException($"Invalid operation \"{op}\" on instruction 0x{_opCode:X8}."); } /// @@ -351,52 +257,46 @@ namespace Ryujinx.Graphics.Gpu /// Operation result private int GetAluResult(AluRegOperation aluOp, int a, int b) { + ulong result; + switch (aluOp) { case AluRegOperation.Add: - { - ulong result = (ulong)a + (ulong)b; + result = (ulong)a + (ulong)b; _carry = result > 0xffffffff; return (int)result; - } case AluRegOperation.AddWithCarry: - { - ulong result = (ulong)a + (ulong)b + (_carry ? 1UL : 0UL); + result = (ulong)a + (ulong)b + (_carry ? 1UL : 0UL); _carry = result > 0xffffffff; return (int)result; - } case AluRegOperation.Subtract: - { - ulong result = (ulong)a - (ulong)b; + result = (ulong)a - (ulong)b; _carry = result < 0x100000000; return (int)result; - } case AluRegOperation.SubtractWithBorrow: - { - ulong result = (ulong)a - (ulong)b - (_carry ? 0UL : 1UL); + result = (ulong)a - (ulong)b - (_carry ? 0UL : 1UL); _carry = result < 0x100000000; return (int)result; - } - case AluRegOperation.BitwiseExclusiveOr: return a ^ b; - case AluRegOperation.BitwiseOr: return a | b; - case AluRegOperation.BitwiseAnd: return a & b; - case AluRegOperation.BitwiseAndNot: return a & ~b; - case AluRegOperation.BitwiseNotAnd: return ~(a & b); + case AluRegOperation.BitwiseExclusiveOr: return a ^ b; + case AluRegOperation.BitwiseOr: return a | b; + case AluRegOperation.BitwiseAnd: return a & b; + case AluRegOperation.BitwiseAndNot: return a & ~b; + case AluRegOperation.BitwiseNotAnd: return ~(a & b); } - throw new ArgumentOutOfRangeException(nameof(aluOp)); + throw new InvalidOperationException($"Invalid operation \"{aluOp}\" on instruction 0x{_opCode:X8}."); } /// @@ -415,7 +315,7 @@ namespace Ryujinx.Graphics.Gpu /// Packed address and increment value private void SetMethAddr(int value) { - _methAddr = (value >> 0) & 0xfff; + _methAddr = (value >> 0) & 0xfff; _methIncr = (value >> 12) & 0x3f; } @@ -492,7 +392,7 @@ namespace Ryujinx.Graphics.Gpu { MethodParams meth = new MethodParams(_methAddr, value); - state.CallMethod(meth, _shadowCtrl); + state.CallMethod(meth); _methAddr += _methIncr; } diff --git a/Ryujinx.Graphics.Gpu/Engine/MME/MacroJit.cs b/Ryujinx.Graphics.Gpu/Engine/MME/MacroJit.cs new file mode 100644 index 00000000..346ae6cf --- /dev/null +++ b/Ryujinx.Graphics.Gpu/Engine/MME/MacroJit.cs @@ -0,0 +1,39 @@ +using Ryujinx.Graphics.Gpu.State; +using System; +using System.Collections.Generic; + +namespace Ryujinx.Graphics.Gpu.Engine.MME +{ + /// + /// Represents a execution engine that uses a Just-in-Time compiler for fast execution. + /// + class MacroJit : IMacroEE + { + private readonly MacroJitContext _context = new MacroJitContext(); + + /// + /// Arguments FIFO. + /// + public Queue Fifo => _context.Fifo; + + private MacroJitCompiler.MacroExecute _execute; + + /// + /// Executes a macro program until it exits. + /// + /// Code of the program to execute + /// Current GPU state + /// Optional argument passed to the program, 0 if not used + public void Execute(ReadOnlySpan code, GpuState state, int arg0) + { + if (_execute == null) + { + MacroJitCompiler compiler = new MacroJitCompiler(); + + _execute = compiler.Compile(code); + } + + _execute(_context, state, arg0); + } + } +} diff --git a/Ryujinx.Graphics.Gpu/Engine/MME/MacroJitCompiler.cs b/Ryujinx.Graphics.Gpu/Engine/MME/MacroJitCompiler.cs new file mode 100644 index 00000000..524f51e4 --- /dev/null +++ b/Ryujinx.Graphics.Gpu/Engine/MME/MacroJitCompiler.cs @@ -0,0 +1,516 @@ +using Ryujinx.Graphics.Gpu.State; +using System; +using System.Collections.Generic; +using System.Reflection.Emit; + +namespace Ryujinx.Graphics.Gpu.Engine.MME +{ + /// + /// Represents a Macro Just-in-Time compiler. + /// R + class MacroJitCompiler + { + private readonly DynamicMethod _meth; + private readonly ILGenerator _ilGen; + private readonly LocalBuilder[] _gprs; + private readonly LocalBuilder _carry; + private readonly LocalBuilder _methAddr; + private readonly LocalBuilder _methIncr; + + /// + /// Creates a new instance of the Macro Just-in-Time compiler. + /// + public MacroJitCompiler() + { + _meth = new DynamicMethod("Macro", typeof(void), new Type[] { typeof(MacroJitContext), typeof(GpuState), typeof(int) }); + _ilGen = _meth.GetILGenerator(); + _gprs = new LocalBuilder[8]; + + for (int i = 1; i < 8; i++) + { + _gprs[i] = _ilGen.DeclareLocal(typeof(int)); + } + + _carry = _ilGen.DeclareLocal(typeof(int)); + _methAddr = _ilGen.DeclareLocal(typeof(int)); + _methIncr = _ilGen.DeclareLocal(typeof(int)); + + _ilGen.Emit(OpCodes.Ldarg_2); + _ilGen.Emit(OpCodes.Stloc, _gprs[1]); + } + + public delegate void MacroExecute(MacroJitContext context, GpuState state, int arg0); + + /// + /// Translates a new piece of GPU Macro code into host executable code. + /// + /// Code to be translated + /// Delegate of the host compiled code + public MacroExecute Compile(ReadOnlySpan code) + { + Dictionary labels = new Dictionary(); + + int lastTarget = 0; + int i; + + // Collect all branch targets. + for (i = 0; i < code.Length; i++) + { + int opCode = code[i]; + + if ((opCode & 7) == 7) + { + int target = i + (opCode >> 14); + + if (!labels.ContainsKey(target)) + { + labels.Add(target, _ilGen.DefineLabel()); + } + + if (lastTarget < target) + { + lastTarget = target; + } + } + + bool exit = (opCode & 0x80) != 0; + + if (exit && i >= lastTarget) + { + break; + } + } + + // Code generation. + for (i = 0; i < code.Length; i++) + { + if (labels.TryGetValue(i, out Label label)) + { + _ilGen.MarkLabel(label); + } + + Emit(code, i, labels); + + int opCode = code[i]; + + bool exit = (opCode & 0x80) != 0; + + if (exit) + { + Emit(code, i + 1, labels); + _ilGen.Emit(OpCodes.Ret); + + if (i >= lastTarget) + { + break; + } + } + } + + if (i == code.Length) + { + _ilGen.Emit(OpCodes.Ret); + } + + return (MacroExecute)_meth.CreateDelegate(typeof(MacroExecute)); + } + + /// + /// Emits IL equivalent to the Macro instruction at a given offset. + /// + /// GPU Macro code + /// Offset, in words, where the instruction is located + /// Labels for Macro branch targets, used by branch instructions + private void Emit(ReadOnlySpan code, int offset, Dictionary labels) + { + int opCode = code[offset]; + + if ((opCode & 7) < 7) + { + // Operation produces a value. + AssignmentOperation asgOp = (AssignmentOperation)((opCode >> 4) & 7); + + EmitAluOp(opCode); + + switch (asgOp) + { + // Fetch parameter and ignore result. + case AssignmentOperation.IgnoreAndFetch: + _ilGen.Emit(OpCodes.Pop); + EmitFetchParam(); + EmitStoreDstGpr(opCode); + break; + // Move result. + case AssignmentOperation.Move: + EmitStoreDstGpr(opCode); + break; + // Move result and use as Method Address. + case AssignmentOperation.MoveAndSetMaddr: + _ilGen.Emit(OpCodes.Dup); + EmitStoreDstGpr(opCode); + EmitStoreMethAddr(); + break; + // Fetch parameter and send result. + case AssignmentOperation.FetchAndSend: + EmitFetchParam(); + EmitStoreDstGpr(opCode); + EmitSend(); + break; + // Move and send result. + case AssignmentOperation.MoveAndSend: + _ilGen.Emit(OpCodes.Dup); + EmitStoreDstGpr(opCode); + EmitSend(); + break; + // Fetch parameter and use result as Method Address. + case AssignmentOperation.FetchAndSetMaddr: + EmitFetchParam(); + EmitStoreDstGpr(opCode); + EmitStoreMethAddr(); + break; + // Move result and use as Method Address, then fetch and send parameter. + case AssignmentOperation.MoveAndSetMaddrThenFetchAndSend: + _ilGen.Emit(OpCodes.Dup); + EmitStoreDstGpr(opCode); + EmitStoreMethAddr(); + EmitFetchParam(); + EmitSend(); + break; + // Move result and use as Method Address, then send bits 17:12 of result. + case AssignmentOperation.MoveAndSetMaddrThenSendHigh: + _ilGen.Emit(OpCodes.Dup); + _ilGen.Emit(OpCodes.Dup); + EmitStoreDstGpr(opCode); + EmitStoreMethAddr(); + _ilGen.Emit(OpCodes.Ldc_I4, 12); + _ilGen.Emit(OpCodes.Shr_Un); + _ilGen.Emit(OpCodes.Ldc_I4, 0x3f); + _ilGen.Emit(OpCodes.And); + EmitSend(); + break; + } + } + else + { + // Branch. + bool onNotZero = ((opCode >> 4) & 1) != 0; + + EmitLoadGprA(opCode); + + Label lblSkip = _ilGen.DefineLabel(); + + if (onNotZero) + { + _ilGen.Emit(OpCodes.Brfalse, lblSkip); + } + else + { + _ilGen.Emit(OpCodes.Brtrue, lblSkip); + } + + bool noDelays = (opCode & 0x20) != 0; + + if (!noDelays) + { + Emit(code, offset + 1, labels); + } + + int target = offset + (opCode >> 14); + + _ilGen.Emit(OpCodes.Br, labels[target]); + + _ilGen.MarkLabel(lblSkip); + } + } + + /// + /// Emits IL for a Arithmetic and Logic Unit instruction. + /// + /// Instruction to be translated + /// Throw when the instruction encoding is invalid + private void EmitAluOp(int opCode) + { + AluOperation op = (AluOperation)(opCode & 7); + + switch (op) + { + case AluOperation.AluReg: + EmitAluOp((AluRegOperation)((opCode >> 17) & 0x1f), opCode); + break; + + case AluOperation.AddImmediate: + EmitLoadGprA(opCode); + EmitLoadImm(opCode); + _ilGen.Emit(OpCodes.Add); + break; + + case AluOperation.BitfieldReplace: + case AluOperation.BitfieldExtractLslImm: + case AluOperation.BitfieldExtractLslReg: + int bfSrcBit = (opCode >> 17) & 0x1f; + int bfSize = (opCode >> 22) & 0x1f; + int bfDstBit = (opCode >> 27) & 0x1f; + + int bfMask = (1 << bfSize) - 1; + + switch (op) + { + case AluOperation.BitfieldReplace: + EmitLoadGprB(opCode); + _ilGen.Emit(OpCodes.Ldc_I4, bfSrcBit); + _ilGen.Emit(OpCodes.Shr_Un); + _ilGen.Emit(OpCodes.Ldc_I4, bfMask); + _ilGen.Emit(OpCodes.And); + _ilGen.Emit(OpCodes.Ldc_I4, bfDstBit); + _ilGen.Emit(OpCodes.Shl); + EmitLoadGprA(opCode); + _ilGen.Emit(OpCodes.Ldc_I4, ~(bfMask << bfDstBit)); + _ilGen.Emit(OpCodes.And); + _ilGen.Emit(OpCodes.Or); + break; + + case AluOperation.BitfieldExtractLslImm: + EmitLoadGprB(opCode); + EmitLoadGprA(opCode); + _ilGen.Emit(OpCodes.Shr_Un); + _ilGen.Emit(OpCodes.Ldc_I4, bfMask); + _ilGen.Emit(OpCodes.And); + _ilGen.Emit(OpCodes.Ldc_I4, bfDstBit); + _ilGen.Emit(OpCodes.Shl); + break; + + case AluOperation.BitfieldExtractLslReg: + EmitLoadGprB(opCode); + _ilGen.Emit(OpCodes.Ldc_I4, bfSrcBit); + _ilGen.Emit(OpCodes.Shr_Un); + _ilGen.Emit(OpCodes.Ldc_I4, bfMask); + _ilGen.Emit(OpCodes.And); + EmitLoadGprA(opCode); + _ilGen.Emit(OpCodes.Shl); + break; + } + break; + + case AluOperation.ReadImmediate: + _ilGen.Emit(OpCodes.Ldarg_1); + EmitLoadGprA(opCode); + EmitLoadImm(opCode); + _ilGen.Emit(OpCodes.Add); + _ilGen.Emit(OpCodes.Call, typeof(MacroJitContext).GetMethod(nameof(MacroJitContext.Read))); + break; + + default: + throw new InvalidOperationException($"Invalid operation \"{op}\" on instruction 0x{opCode:X8}."); + } + } + + /// + /// Emits IL for a binary Arithmetic and Logic Unit instruction. + /// + /// Arithmetic and Logic Unit instruction + /// Raw instruction + /// Throw when the instruction encoding is invalid + private void EmitAluOp(AluRegOperation aluOp, int opCode) + { + switch (aluOp) + { + case AluRegOperation.Add: + EmitLoadGprA(opCode); + _ilGen.Emit(OpCodes.Conv_U8); + EmitLoadGprB(opCode); + _ilGen.Emit(OpCodes.Conv_U8); + _ilGen.Emit(OpCodes.Add); + _ilGen.Emit(OpCodes.Dup); + _ilGen.Emit(OpCodes.Ldc_I8, 0xffffffffL); + _ilGen.Emit(OpCodes.Cgt_Un); + _ilGen.Emit(OpCodes.Stloc, _carry); + _ilGen.Emit(OpCodes.Conv_U4); + break; + case AluRegOperation.AddWithCarry: + EmitLoadGprA(opCode); + _ilGen.Emit(OpCodes.Conv_U8); + EmitLoadGprB(opCode); + _ilGen.Emit(OpCodes.Conv_U8); + _ilGen.Emit(OpCodes.Ldloc_S, _carry); + _ilGen.Emit(OpCodes.Conv_U8); + _ilGen.Emit(OpCodes.Add); + _ilGen.Emit(OpCodes.Add); + _ilGen.Emit(OpCodes.Dup); + _ilGen.Emit(OpCodes.Ldc_I8, 0xffffffffL); + _ilGen.Emit(OpCodes.Cgt_Un); + _ilGen.Emit(OpCodes.Stloc, _carry); + _ilGen.Emit(OpCodes.Conv_U4); + break; + case AluRegOperation.Subtract: + EmitLoadGprA(opCode); + _ilGen.Emit(OpCodes.Conv_U8); + EmitLoadGprB(opCode); + _ilGen.Emit(OpCodes.Conv_U8); + _ilGen.Emit(OpCodes.Sub); + _ilGen.Emit(OpCodes.Dup); + _ilGen.Emit(OpCodes.Ldc_I8, 0x100000000L); + _ilGen.Emit(OpCodes.Clt_Un); + _ilGen.Emit(OpCodes.Stloc, _carry); + _ilGen.Emit(OpCodes.Conv_U4); + break; + case AluRegOperation.SubtractWithBorrow: + EmitLoadGprA(opCode); + _ilGen.Emit(OpCodes.Conv_U8); + EmitLoadGprB(opCode); + _ilGen.Emit(OpCodes.Conv_U8); + _ilGen.Emit(OpCodes.Ldloc_S, _carry); + _ilGen.Emit(OpCodes.Conv_U8); + _ilGen.Emit(OpCodes.Neg); + _ilGen.Emit(OpCodes.Sub); + _ilGen.Emit(OpCodes.Add); + _ilGen.Emit(OpCodes.Dup); + _ilGen.Emit(OpCodes.Ldc_I8, 0x100000000L); + _ilGen.Emit(OpCodes.Clt_Un); + _ilGen.Emit(OpCodes.Stloc, _carry); + _ilGen.Emit(OpCodes.Conv_U4); + break; + case AluRegOperation.BitwiseExclusiveOr: + EmitLoadGprA(opCode); + EmitLoadGprB(opCode); + _ilGen.Emit(OpCodes.Xor); + break; + case AluRegOperation.BitwiseOr: + EmitLoadGprA(opCode); + EmitLoadGprB(opCode); + _ilGen.Emit(OpCodes.Or); + break; + case AluRegOperation.BitwiseAnd: + EmitLoadGprA(opCode); + EmitLoadGprB(opCode); + _ilGen.Emit(OpCodes.And); + break; + case AluRegOperation.BitwiseAndNot: + EmitLoadGprA(opCode); + EmitLoadGprB(opCode); + _ilGen.Emit(OpCodes.Not); + _ilGen.Emit(OpCodes.And); + break; + case AluRegOperation.BitwiseNotAnd: + EmitLoadGprA(opCode); + EmitLoadGprB(opCode); + _ilGen.Emit(OpCodes.And); + _ilGen.Emit(OpCodes.Not); + break; + default: + throw new InvalidOperationException($"Invalid operation \"{aluOp}\" on instruction 0x{opCode:X8}."); + } + } + + /// + /// Loads a immediate value on the IL evaluation stack. + /// + /// Instruction from where the immediate should be extracted + private void EmitLoadImm(int opCode) + { + // Note: The immediate is signed, the sign-extension is intended here. + _ilGen.Emit(OpCodes.Ldc_I4, opCode >> 14); + } + + /// + /// Loads a value from the General Purpose register specified as first operand on the IL evaluation stack. + /// + /// Instruction from where the register number should be extracted + private void EmitLoadGprA(int opCode) + { + EmitLoadGpr((opCode >> 11) & 7); + } + + /// + /// Loads a value from the General Purpose register specified as second operand on the IL evaluation stack. + /// + /// Instruction from where the register number should be extracted + private void EmitLoadGprB(int opCode) + { + EmitLoadGpr((opCode >> 14) & 7); + } + + /// + /// Loads a value a General Purpose register on the IL evaluation stack. + /// + /// + /// Register number 0 has a hardcoded value of 0. + /// + /// Register number + private void EmitLoadGpr(int index) + { + if (index == 0) + { + _ilGen.Emit(OpCodes.Ldc_I4_0); + } + else + { + _ilGen.Emit(OpCodes.Ldloc_S, _gprs[index]); + } + } + + /// + /// Emits a call to the method that fetches an argument from the arguments FIFO. + /// The argument is pushed into the IL evaluation stack. + /// + private void EmitFetchParam() + { + _ilGen.Emit(OpCodes.Ldarg_0); + _ilGen.Emit(OpCodes.Call, typeof(MacroJitContext).GetMethod(nameof(MacroJitContext.FetchParam))); + } + + /// + /// Stores the value on the top of the IL evaluation stack into a General Purpose register. + /// + /// + /// Register number 0 does not exist, reads are hardcoded to 0, and writes are simply discarded. + /// + /// Instruction from where the register number should be extracted + private void EmitStoreDstGpr(int opCode) + { + int index = (opCode >> 8) & 7; + + if (index == 0) + { + _ilGen.Emit(OpCodes.Pop); + } + else + { + _ilGen.Emit(OpCodes.Stloc_S, _gprs[index]); + } + } + + /// + /// Stores the value on the top of the IL evaluation stack as method address. + /// This will be used on subsequent send calls as the destination method address. + /// Additionally, the 6 bits starting at bit 12 will be used as increment value, + /// added to the method address after each sent value. + /// + private void EmitStoreMethAddr() + { + _ilGen.Emit(OpCodes.Dup); + _ilGen.Emit(OpCodes.Ldc_I4, 0xfff); + _ilGen.Emit(OpCodes.And); + _ilGen.Emit(OpCodes.Stloc_S, _methAddr); + _ilGen.Emit(OpCodes.Ldc_I4, 12); + _ilGen.Emit(OpCodes.Shr_Un); + _ilGen.Emit(OpCodes.Ldc_I4, 0x3f); + _ilGen.Emit(OpCodes.And); + _ilGen.Emit(OpCodes.Stloc_S, _methIncr); + } + + /// + /// Sends the value on the top of the IL evaluation stack to the GPU, + /// using the current method address. + /// + private void EmitSend() + { + _ilGen.Emit(OpCodes.Ldarg_1); + _ilGen.Emit(OpCodes.Ldloc_S, _methAddr); + _ilGen.Emit(OpCodes.Call, typeof(MacroJitContext).GetMethod(nameof(MacroJitContext.Send))); + _ilGen.Emit(OpCodes.Ldloc_S, _methAddr); + _ilGen.Emit(OpCodes.Ldloc_S, _methIncr); + _ilGen.Emit(OpCodes.Add); + _ilGen.Emit(OpCodes.Stloc_S, _methAddr); + } + } +} diff --git a/Ryujinx.Graphics.Gpu/Engine/MME/MacroJitContext.cs b/Ryujinx.Graphics.Gpu/Engine/MME/MacroJitContext.cs new file mode 100644 index 00000000..ba4e66eb --- /dev/null +++ b/Ryujinx.Graphics.Gpu/Engine/MME/MacroJitContext.cs @@ -0,0 +1,57 @@ +using Ryujinx.Common.Logging; +using Ryujinx.Graphics.Gpu.State; +using System.Collections.Generic; + +namespace Ryujinx.Graphics.Gpu.Engine.MME +{ + /// + /// Represents a Macro Just-in-Time compiler execution context. + /// + class MacroJitContext + { + /// + /// Arguments FIFO. + /// + public Queue Fifo { get; } = new Queue(); + + /// + /// Fetches a arguments from the arguments FIFO. + /// + /// + public int FetchParam() + { + if (!Fifo.TryDequeue(out int value)) + { + Logger.PrintWarning(LogClass.Gpu, "Macro attempted to fetch an inexistent argument."); + + return 0; + } + + return value; + } + + /// + /// Reads data from a GPU register. + /// + /// Current GPU state + /// Register offset to read + /// GPU register value + public static int Read(GpuState state, int reg) + { + return state.Read(reg); + } + + /// + /// Performs a GPU method call. + /// + /// Call argument + /// Current GPU state + /// Address, in words, of the method + public static void Send(int value, GpuState state, int methAddr) + { + MethodParams meth = new MethodParams(methAddr, value); + + state.CallMethod(meth); + } + } +} diff --git a/Ryujinx.Graphics.Gpu/GraphicsConfig.cs b/Ryujinx.Graphics.Gpu/GraphicsConfig.cs index 44b2b5e9..4d16628f 100644 --- a/Ryujinx.Graphics.Gpu/GraphicsConfig.cs +++ b/Ryujinx.Graphics.Gpu/GraphicsConfig.cs @@ -27,5 +27,10 @@ namespace Ryujinx.Graphics.Gpu /// This can avoid lower resolution on some games when GPU performance is poor. /// public static bool FastGpuTime = true; + + /// + /// Enables or disables the Just-in-Time compiler for GPU Macro code. + /// + public static bool EnableMacroJit = true; } } \ No newline at end of file diff --git a/Ryujinx.Graphics.Gpu/State/GpuState.cs b/Ryujinx.Graphics.Gpu/State/GpuState.cs index 67bcb291..4b93dd45 100644 --- a/Ryujinx.Graphics.Gpu/State/GpuState.cs +++ b/Ryujinx.Graphics.Gpu/State/GpuState.cs @@ -32,6 +32,11 @@ namespace Ryujinx.Graphics.Gpu.State private readonly Register[] _registers; + /// + /// Gets or sets the shadow ram control used for this sub-channel. + /// + public ShadowRamControl ShadowRamControl { get; set; } + /// /// Creates a new instance of the GPU state. /// @@ -72,14 +77,15 @@ namespace Ryujinx.Graphics.Gpu.State /// Calls a GPU method, using this state. /// /// The GPU method to be called - /// Shadow RAM control register value - public void CallMethod(MethodParams meth, ShadowRamControl shadowCtrl) + public void CallMethod(MethodParams meth) { int value = meth.Argument; // Methods < 0x80 shouldn't be affected by shadow RAM at all. if (meth.Method >= 0x80) { + ShadowRamControl shadowCtrl = ShadowRamControl; + // TODO: Figure out what TrackWithFilter does, compared to Track. if (shadowCtrl == ShadowRamControl.Track || shadowCtrl == ShadowRamControl.TrackWithFilter)