From 7f6b3d234a0dd82866e89930f05c520aca94946e Mon Sep 17 00:00:00 2001 From: gdkchan Date: Mon, 10 Jan 2022 12:08:00 -0300 Subject: [PATCH] Implement IMUL, PCNT and CONT shader instructions, fix FFMA32I and HFMA32I (#2972) * Implement IMUL shader instruction * Implement PCNT/CONT instruction and fix FFMA32I * Add HFMA232I to the table * Shader cache version bump * No Rc on Ffma32i --- Ryujinx.Graphics.Gpu/Shader/ShaderCache.cs | 2 +- Ryujinx.Graphics.Shader/Decoders/Decoder.cs | 42 +++++++++++++---- .../Decoders/InstDecoders.cs | 2 - Ryujinx.Graphics.Shader/Decoders/InstTable.cs | 5 +- .../Instructions/InstEmit.cs | 44 +---------------- .../Instructions/InstEmitFloatArithmetic.cs | 6 +-- .../Instructions/InstEmitFlowControl.cs | 26 +++++++--- .../Instructions/InstEmitIntegerArithmetic.cs | 47 ++++++++++++++++++- .../Translation/Translator.cs | 2 +- 9 files changed, 108 insertions(+), 68 deletions(-) diff --git a/Ryujinx.Graphics.Gpu/Shader/ShaderCache.cs b/Ryujinx.Graphics.Gpu/Shader/ShaderCache.cs index acd65621..4c6224e3 100644 --- a/Ryujinx.Graphics.Gpu/Shader/ShaderCache.cs +++ b/Ryujinx.Graphics.Gpu/Shader/ShaderCache.cs @@ -40,7 +40,7 @@ namespace Ryujinx.Graphics.Gpu.Shader /// /// Version of the codegen (to be changed when codegen or guest format change). /// - private const ulong ShaderCodeGenVersion = 2764; + private const ulong ShaderCodeGenVersion = 2972; // Progress reporting helpers private volatile int _shaderCount; diff --git a/Ryujinx.Graphics.Shader/Decoders/Decoder.cs b/Ryujinx.Graphics.Shader/Decoders/Decoder.cs index 80d2cb4a..8820527f 100644 --- a/Ryujinx.Graphics.Shader/Decoders/Decoder.cs +++ b/Ryujinx.Graphics.Shader/Decoders/Decoder.cs @@ -95,7 +95,7 @@ namespace Ryujinx.Graphics.Shader.Decoders if (currBlock.OpCodes.Count != 0) { // We should have blocks for all possible branch targets, - // including those from SSY/PBK instructions. + // including those from PBK/PCNT/SSY instructions. foreach (PushOpInfo pushOp in currBlock.PushOpCodes) { GetBlock(pushOp.Op.GetAbsoluteAddress()); @@ -243,7 +243,7 @@ namespace Ryujinx.Graphics.Shader.Decoders { SetUserAttributeUses(config, op.Name, opCode); } - else if (op.Name == InstName.Ssy || op.Name == InstName.Pbk) + else if (op.Name == InstName.Pbk || op.Name == InstName.Pcnt || op.Name == InstName.Ssy) { block.AddPushOp(op); } @@ -512,8 +512,9 @@ namespace Ryujinx.Graphics.Shader.Decoders private enum MergeType { - Brk = 0, - Sync = 1 + Brk, + Cont, + Sync } private struct PathBlockState @@ -629,7 +630,7 @@ namespace Ryujinx.Graphics.Shader.Decoders for (int index = pushOpIndex; index < pushOpsCount; index++) { InstOp currentPushOp = current.PushOpCodes[index].Op; - MergeType pushMergeType = currentPushOp.Name == InstName.Ssy ? MergeType.Sync : MergeType.Brk; + MergeType pushMergeType = GetMergeTypeFromPush(currentPushOp.Name); branchStack.Push((currentPushOp.GetAbsoluteAddress(), pushMergeType)); } } @@ -643,9 +644,9 @@ namespace Ryujinx.Graphics.Shader.Decoders } InstOp lastOp = current.GetLastOp(); - if (lastOp.Name == InstName.Sync || lastOp.Name == InstName.Brk) + if (IsPopBranch(lastOp.Name)) { - MergeType popMergeType = lastOp.Name == InstName.Sync ? MergeType.Sync : MergeType.Brk; + MergeType popMergeType = GetMergeTypeFromPop(lastOp.Name); bool found = true; ulong targetAddress = 0UL; @@ -662,7 +663,7 @@ namespace Ryujinx.Graphics.Shader.Decoders (targetAddress, mergeType) = branchStack.Pop(); // Push the target address (this will be used to push the address - // back into the SSY/PBK stack when we return from that block), + // back into the PBK/PCNT/SSY stack when we return from that block), Push(new PathBlockState(targetAddress, mergeType)); } while (mergeType != popMergeType); @@ -705,5 +706,30 @@ namespace Ryujinx.Graphics.Shader.Decoders } } } + + public static bool IsPopBranch(InstName name) + { + return name == InstName.Brk || name == InstName.Cont || name == InstName.Sync; + } + + private static MergeType GetMergeTypeFromPush(InstName name) + { + return name switch + { + InstName.Pbk => MergeType.Brk, + InstName.Pcnt => MergeType.Cont, + _ => MergeType.Sync + }; + } + + private static MergeType GetMergeTypeFromPop(InstName name) + { + return name switch + { + InstName.Brk => MergeType.Brk, + InstName.Cont => MergeType.Cont, + _ => MergeType.Sync + }; + } } } \ No newline at end of file diff --git a/Ryujinx.Graphics.Shader/Decoders/InstDecoders.cs b/Ryujinx.Graphics.Shader/Decoders/InstDecoders.cs index 397e327e..b7a0caf1 100644 --- a/Ryujinx.Graphics.Shader/Decoders/InstDecoders.cs +++ b/Ryujinx.Graphics.Shader/Decoders/InstDecoders.cs @@ -1960,7 +1960,6 @@ namespace Ryujinx.Graphics.Shader.Decoders public int Dest => (int)((_opcode >> 0) & 0xFF); public int SrcA => (int)((_opcode >> 8) & 0xFF); public int Imm32 => (int)(_opcode >> 20); - public int SrcC => (int)((_opcode >> 39) & 0xFF); public int Pred => (int)((_opcode >> 16) & 0x7); public bool PredInv => (_opcode & 0x80000) != 0; public bool NegC => (_opcode & 0x200000000000000) != 0; @@ -2460,7 +2459,6 @@ namespace Ryujinx.Graphics.Shader.Decoders public int Dest => (int)((_opcode >> 0) & 0xFF); public int SrcA => (int)((_opcode >> 8) & 0xFF); public int Imm => (int)(_opcode >> 20); - public int SrcC => (int)((_opcode >> 39) & 0xFF); public HalfSwizzle ASwizzle => (HalfSwizzle)((_opcode >> 47) & 0x3); public int Pred => (int)((_opcode >> 16) & 0x7); public bool PredInv => (_opcode & 0x80000) != 0; diff --git a/Ryujinx.Graphics.Shader/Decoders/InstTable.cs b/Ryujinx.Graphics.Shader/Decoders/InstTable.cs index 2d91f21a..eb3d6f3d 100644 --- a/Ryujinx.Graphics.Shader/Decoders/InstTable.cs +++ b/Ryujinx.Graphics.Shader/Decoders/InstTable.cs @@ -55,7 +55,7 @@ namespace Ryujinx.Graphics.Shader.Decoders Add("1110111110000xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx", InstName.Cctll, InstEmit.Cctll, InstProps.Ra); Add("1110101111110xx0000000000000xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx", InstName.Cctlt, InstEmit.Cctlt); Add("1110101111101xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx", InstName.Cctlt, InstEmit.Cctlt, InstProps.Rc); - Add("111000110101xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx", InstName.Cont, InstEmit.Cont); + Add("111000110101xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx", InstName.Cont, InstEmit.Cont, InstProps.Bra); Add("0101000010011xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx", InstName.Cset, InstEmit.Cset, InstProps.Rd | InstProps.Ps); Add("0101000010100xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx", InstName.Csetp, InstEmit.Csetp, InstProps.Pd | InstProps.Pdn | InstProps.Ps); Add("0101000011001xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx", InstName.Cs2r, InstEmit.Cs2r, InstProps.Rd); @@ -101,7 +101,7 @@ namespace Ryujinx.Graphics.Shader.Decoders Add("0011001x1xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx", InstName.Ffma, InstEmit.FfmaI, InstProps.Rd | InstProps.Ra | InstProps.Ib | InstProps.Rc); Add("010010011xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx", InstName.Ffma, InstEmit.FfmaC, InstProps.Rd | InstProps.Ra | InstProps.Rc); Add("010100011xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx", InstName.Ffma, InstEmit.FfmaRc, InstProps.Rd | InstProps.Ra | InstProps.Rc); - Add("000011xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx", InstName.Ffma32i, InstEmit.Ffma32i, InstProps.Rd | InstProps.Ra | InstProps.Rc); + Add("000011xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx", InstName.Ffma32i, InstEmit.Ffma32i, InstProps.Rd | InstProps.Ra); Add("0101110000110xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx", InstName.Flo, InstEmit.FloR, InstProps.Rd | InstProps.Rb); Add("0011100x00110xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx", InstName.Flo, InstEmit.FloI, InstProps.Rd | InstProps.Ib); Add("0100110000110xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx", InstName.Flo, InstEmit.FloC, InstProps.Rd); @@ -129,6 +129,7 @@ namespace Ryujinx.Graphics.Shader.Decoders Add("01110xxx0xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx", InstName.Hfma2, InstEmit.Hfma2I, InstProps.Rd | InstProps.Ra | InstProps.Ib | InstProps.Rc); Add("01110xxx1xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx", InstName.Hfma2, InstEmit.Hfma2C, InstProps.Rd | InstProps.Ra | InstProps.Rc); Add("01100xxx1xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx", InstName.Hfma2, InstEmit.Hfma2Rc, InstProps.Rd | InstProps.Ra | InstProps.Rc); + Add("0010100xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx", InstName.Hfma2, InstEmit.Hfma232i, InstProps.Rd | InstProps.Ra); Add("0101110100001xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx", InstName.Hmul2, InstEmit.Hmul2R, InstProps.Rd | InstProps.Ra | InstProps.Rb); Add("0111100x0xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx", InstName.Hmul2, InstEmit.Hmul2I, InstProps.Rd | InstProps.Ra | InstProps.Ib); Add("0111100x1xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx", InstName.Hmul2, InstEmit.Hmul2C, InstProps.Rd | InstProps.Ra); diff --git a/Ryujinx.Graphics.Shader/Instructions/InstEmit.cs b/Ryujinx.Graphics.Shader/Instructions/InstEmit.cs index b0cb7028..c242963a 100644 --- a/Ryujinx.Graphics.Shader/Instructions/InstEmit.cs +++ b/Ryujinx.Graphics.Shader/Instructions/InstEmit.cs @@ -54,18 +54,11 @@ namespace Ryujinx.Graphics.Shader.Instructions context.Config.GpuAccessor.Log("Shader instruction Cctlt is not implemented."); } - public static void Cont(EmitterContext context) - { - InstCont op = context.GetOp(); - - context.Config.GpuAccessor.Log("Shader instruction ContUnsup is not implemented."); - } - public static void Cset(EmitterContext context) { InstCset op = context.GetOp(); - context.Config.GpuAccessor.Log("Shader instruction CsetUnsup is not implemented."); + context.Config.GpuAccessor.Log("Shader instruction Cset is not implemented."); } public static void Cs2r(EmitterContext context) @@ -159,34 +152,6 @@ namespace Ryujinx.Graphics.Shader.Instructions context.Config.GpuAccessor.Log("Shader instruction ImadspRc is not implemented."); } - public static void ImulR(EmitterContext context) - { - InstImulR op = context.GetOp(); - - context.Config.GpuAccessor.Log("Shader instruction ImulR is not implemented."); - } - - public static void ImulI(EmitterContext context) - { - InstImulI op = context.GetOp(); - - context.Config.GpuAccessor.Log("Shader instruction ImulI is not implemented."); - } - - public static void ImulC(EmitterContext context) - { - InstImulC op = context.GetOp(); - - context.Config.GpuAccessor.Log("Shader instruction ImulC is not implemented."); - } - - public static void Imul32i(EmitterContext context) - { - InstImul32i op = context.GetOp(); - - context.Config.GpuAccessor.Log("Shader instruction Imul32i is not implemented."); - } - public static void Jcal(EmitterContext context) { InstJcal op = context.GetOp(); @@ -250,13 +215,6 @@ namespace Ryujinx.Graphics.Shader.Instructions context.Config.GpuAccessor.Log("Shader instruction P2rC is not implemented."); } - public static void Pcnt(EmitterContext context) - { - InstPcnt op = context.GetOp(); - - context.Config.GpuAccessor.Log("Shader instruction Pcnt is not implemented."); - } - public static void Pexit(EmitterContext context) { InstPexit op = context.GetOp(); diff --git a/Ryujinx.Graphics.Shader/Instructions/InstEmitFloatArithmetic.cs b/Ryujinx.Graphics.Shader/Instructions/InstEmitFloatArithmetic.cs index 11d724c4..29803c31 100644 --- a/Ryujinx.Graphics.Shader/Instructions/InstEmitFloatArithmetic.cs +++ b/Ryujinx.Graphics.Shader/Instructions/InstEmitFloatArithmetic.cs @@ -204,7 +204,7 @@ namespace Ryujinx.Graphics.Shader.Instructions var srcA = GetSrcReg(context, op.SrcA); var srcB = GetSrcImm(context, op.Imm32); - var srcC = GetSrcReg(context, op.SrcC); + var srcC = GetSrcReg(context, op.Dest); EmitFfma(context, Instruction.FP32, srcA, srcB, srcC, op.Dest, op.NegA, op.NegC, op.Sat, op.WriteCC); } @@ -333,13 +333,13 @@ namespace Ryujinx.Graphics.Shader.Instructions EmitHfma2(context, op.OFmt, srcA, srcB, srcC, op.Dest, op.Sat); } - public static void Hfma232iI(EmitterContext context) + public static void Hfma232i(EmitterContext context) { InstHfma232i op = context.GetOp(); var srcA = GetHalfSrc(context, op.ASwizzle, op.SrcA, false, false); var srcB = GetHalfSrc(context, op.Imm); - var srcC = GetHalfSrc(context, HalfSwizzle.F16, op.SrcC, op.NegC, false); + var srcC = GetHalfSrc(context, HalfSwizzle.F16, op.Dest, op.NegC, false); EmitHfma2(context, OFmt.F16, srcA, srcB, srcC, op.Dest, saturate: false); } diff --git a/Ryujinx.Graphics.Shader/Instructions/InstEmitFlowControl.cs b/Ryujinx.Graphics.Shader/Instructions/InstEmitFlowControl.cs index da34c1be..3cb8fe72 100644 --- a/Ryujinx.Graphics.Shader/Instructions/InstEmitFlowControl.cs +++ b/Ryujinx.Graphics.Shader/Instructions/InstEmitFlowControl.cs @@ -22,7 +22,7 @@ namespace Ryujinx.Graphics.Shader.Instructions { InstBrk op = context.GetOp(); - EmitBrkOrSync(context); + EmitBrkContSync(context); } public static void Brx(EmitterContext context) @@ -87,6 +87,13 @@ namespace Ryujinx.Graphics.Shader.Instructions } } + public static void Cont(EmitterContext context) + { + InstCont op = context.GetOp(); + + EmitBrkContSync(context); + } + public static void Exit(EmitterContext context) { InstExit op = context.GetOp(); @@ -116,7 +123,14 @@ namespace Ryujinx.Graphics.Shader.Instructions { InstPbk op = context.GetOp(); - EmitPbkOrSsy(context); + EmitPbkPcntSsy(context); + } + + public static void Pcnt(EmitterContext context) + { + InstPcnt op = context.GetOp(); + + EmitPbkPcntSsy(context); } public static void Ret(EmitterContext context) @@ -137,17 +151,17 @@ namespace Ryujinx.Graphics.Shader.Instructions { InstSsy op = context.GetOp(); - EmitPbkOrSsy(context); + EmitPbkPcntSsy(context); } public static void Sync(EmitterContext context) { InstSync op = context.GetOp(); - EmitBrkOrSync(context); + EmitBrkContSync(context); } - private static void EmitPbkOrSsy(EmitterContext context) + private static void EmitPbkPcntSsy(EmitterContext context) { var consumers = context.CurrBlock.PushOpCodes.First(x => x.Op.Address == context.CurrOp.Address).Consumers; @@ -162,7 +176,7 @@ namespace Ryujinx.Graphics.Shader.Instructions } } - private static void EmitBrkOrSync(EmitterContext context) + private static void EmitBrkContSync(EmitterContext context) { var targets = context.CurrBlock.SyncTargets; diff --git a/Ryujinx.Graphics.Shader/Instructions/InstEmitIntegerArithmetic.cs b/Ryujinx.Graphics.Shader/Instructions/InstEmitIntegerArithmetic.cs index ac8cca1b..374e3d61 100644 --- a/Ryujinx.Graphics.Shader/Instructions/InstEmitIntegerArithmetic.cs +++ b/Ryujinx.Graphics.Shader/Instructions/InstEmitIntegerArithmetic.cs @@ -138,6 +138,46 @@ namespace Ryujinx.Graphics.Shader.Instructions EmitImad(context, srcA, srcB, srcC, op.Dest, op.AvgMode, op.ASigned, op.BSigned, op.Hilo); } + public static void ImulR(EmitterContext context) + { + InstImulR op = context.GetOp(); + + var srcA = GetSrcReg(context, op.SrcA); + var srcB = GetSrcReg(context, op.SrcB); + + EmitImad(context, srcA, srcB, Const(0), op.Dest, AvgMode.NoNeg, op.ASigned, op.BSigned, op.Hilo); + } + + public static void ImulI(EmitterContext context) + { + InstImulI op = context.GetOp(); + + var srcA = GetSrcReg(context, op.SrcA); + var srcB = GetSrcImm(context, Imm20ToSInt(op.Imm20)); + + EmitImad(context, srcA, srcB, Const(0), op.Dest, AvgMode.NoNeg, op.ASigned, op.BSigned, op.Hilo); + } + + public static void ImulC(EmitterContext context) + { + InstImulC op = context.GetOp(); + + var srcA = GetSrcReg(context, op.SrcA); + var srcB = GetSrcCbuf(context, op.CbufSlot, op.CbufOffset); + + EmitImad(context, srcA, srcB, Const(0), op.Dest, AvgMode.NoNeg, op.ASigned, op.BSigned, op.Hilo); + } + + public static void Imul32i(EmitterContext context) + { + InstImul32i op = context.GetOp(); + + var srcA = GetSrcReg(context, op.SrcA); + var srcB = GetSrcImm(context, op.Imm32); + + EmitImad(context, srcA, srcB, Const(0), op.Dest, AvgMode.NoNeg, op.ASigned, op.BSigned, op.Hilo); + } + public static void IscaddR(EmitterContext context) { InstIscaddR op = context.GetOp(); @@ -366,7 +406,7 @@ namespace Ryujinx.Graphics.Shader.Instructions // TODO: CC, X, corner cases. } - public static void EmitImad( + private static void EmitImad( EmitterContext context, Operand srcA, Operand srcB, @@ -407,7 +447,10 @@ namespace Ryujinx.Graphics.Shader.Instructions res = context.IMultiply(srcA, srcB); } - res = context.IAdd(res, srcC); + if (srcC.Type != OperandType.Constant || srcC.Value != 0) + { + res = context.IAdd(res, srcC); + } // TODO: CC, X, SAT, and more? diff --git a/Ryujinx.Graphics.Shader/Translation/Translator.cs b/Ryujinx.Graphics.Shader/Translation/Translator.cs index cef25350..709b16db 100644 --- a/Ryujinx.Graphics.Shader/Translation/Translator.cs +++ b/Ryujinx.Graphics.Shader/Translation/Translator.cs @@ -295,7 +295,7 @@ namespace Ryujinx.Graphics.Shader.Translation Operand predSkipLbl = null; - if (op.Name == InstName.Sync || op.Name == InstName.Brk) + if (Decoder.IsPopBranch(op.Name)) { // If the instruction is a SYNC or BRK instruction with only one // possible target address, then the instruction is basically