mirror of
https://github.com/Ryujinx/Ryujinx.git
synced 2024-10-01 12:30:00 +02:00
Fix Vnmls_S fast path (F64: losing input d value). Fix Vnmla_S & Vnmls_S slow paths (using fused inst.s). Fix Vfma_V slow path not using StandardFPSCRValue(). (#1775)
* Fix Vnmls_S fast path (F64: losing input d value). Fix Vnmla_S & Vnmls_S slow paths (using fused inst.s). Add Vfma_S & Vfms_S Fma fast paths. Add Vfnma_S inst. with Fma/Sse fast paths and slow path. Add Vfnms_S Sse fast path. Add Tests for affected inst.s. Nits. * InternalVersion = 1775 * Nits. * Fix Vfma_V slow path not using StandardFPSCRValue(). * Nit: Fix Vfma_V order. * Add Vfms_V Sse fast path and slow path. * Add Vfma_V and Vfms_V Test.
This commit is contained in:
parent
b5c215111d
commit
8a33e884f8
13 changed files with 292 additions and 221 deletions
|
@ -274,17 +274,15 @@ namespace ARMeilleure.CodeGen.X86
|
||||||
Add(X86Instruction.Vcvtph2ps, new InstructionInfo(BadOp, BadOp, BadOp, BadOp, 0x000f3813, InstructionFlags.Vex | InstructionFlags.Prefix66));
|
Add(X86Instruction.Vcvtph2ps, new InstructionInfo(BadOp, BadOp, BadOp, BadOp, 0x000f3813, InstructionFlags.Vex | InstructionFlags.Prefix66));
|
||||||
Add(X86Instruction.Vcvtps2ph, new InstructionInfo(0x000f3a1d, BadOp, BadOp, BadOp, BadOp, InstructionFlags.Vex | InstructionFlags.Prefix66));
|
Add(X86Instruction.Vcvtps2ph, new InstructionInfo(0x000f3a1d, BadOp, BadOp, BadOp, BadOp, InstructionFlags.Vex | InstructionFlags.Prefix66));
|
||||||
Add(X86Instruction.Vfmadd231ps, new InstructionInfo(BadOp, BadOp, BadOp, BadOp, 0x000f38b8, InstructionFlags.Vex | InstructionFlags.Prefix66));
|
Add(X86Instruction.Vfmadd231ps, new InstructionInfo(BadOp, BadOp, BadOp, BadOp, 0x000f38b8, InstructionFlags.Vex | InstructionFlags.Prefix66));
|
||||||
Add(X86Instruction.Vfmadd231pd, new InstructionInfo(BadOp, BadOp, BadOp, BadOp, 0x000f38b8, InstructionFlags.Vex | InstructionFlags.Prefix66 | InstructionFlags.RexW));
|
|
||||||
Add(X86Instruction.Vfmadd231ss, new InstructionInfo(BadOp, BadOp, BadOp, BadOp, 0x000f38b9, InstructionFlags.Vex | InstructionFlags.Prefix66));
|
|
||||||
Add(X86Instruction.Vfmadd231sd, new InstructionInfo(BadOp, BadOp, BadOp, BadOp, 0x000f38b9, InstructionFlags.Vex | InstructionFlags.Prefix66 | InstructionFlags.RexW));
|
Add(X86Instruction.Vfmadd231sd, new InstructionInfo(BadOp, BadOp, BadOp, BadOp, 0x000f38b9, InstructionFlags.Vex | InstructionFlags.Prefix66 | InstructionFlags.RexW));
|
||||||
Add(X86Instruction.Vfmsub231ps, new InstructionInfo(BadOp, BadOp, BadOp, BadOp, 0x000f38ba, InstructionFlags.Vex | InstructionFlags.Prefix66));
|
Add(X86Instruction.Vfmadd231ss, new InstructionInfo(BadOp, BadOp, BadOp, BadOp, 0x000f38b9, InstructionFlags.Vex | InstructionFlags.Prefix66));
|
||||||
Add(X86Instruction.Vfmsub231pd, new InstructionInfo(BadOp, BadOp, BadOp, BadOp, 0x000f38ba, InstructionFlags.Vex | InstructionFlags.Prefix66 | InstructionFlags.RexW));
|
|
||||||
Add(X86Instruction.Vfmsub231ss, new InstructionInfo(BadOp, BadOp, BadOp, BadOp, 0x000f38bb, InstructionFlags.Vex | InstructionFlags.Prefix66));
|
|
||||||
Add(X86Instruction.Vfmsub231sd, new InstructionInfo(BadOp, BadOp, BadOp, BadOp, 0x000f38bb, InstructionFlags.Vex | InstructionFlags.Prefix66 | InstructionFlags.RexW));
|
Add(X86Instruction.Vfmsub231sd, new InstructionInfo(BadOp, BadOp, BadOp, BadOp, 0x000f38bb, InstructionFlags.Vex | InstructionFlags.Prefix66 | InstructionFlags.RexW));
|
||||||
Add(X86Instruction.Vfnmsub231ps, new InstructionInfo(BadOp, BadOp, BadOp, BadOp, 0x000f38be, InstructionFlags.Vex | InstructionFlags.Prefix66));
|
Add(X86Instruction.Vfmsub231ss, new InstructionInfo(BadOp, BadOp, BadOp, BadOp, 0x000f38bb, InstructionFlags.Vex | InstructionFlags.Prefix66));
|
||||||
Add(X86Instruction.Vfnmsub231pd, new InstructionInfo(BadOp, BadOp, BadOp, BadOp, 0x000f38be, InstructionFlags.Vex | InstructionFlags.Prefix66 | InstructionFlags.RexW));
|
Add(X86Instruction.Vfnmadd231ps, new InstructionInfo(BadOp, BadOp, BadOp, BadOp, 0x000f38bc, InstructionFlags.Vex | InstructionFlags.Prefix66));
|
||||||
Add(X86Instruction.Vfnmsub231ss, new InstructionInfo(BadOp, BadOp, BadOp, BadOp, 0x000f38bf, InstructionFlags.Vex | InstructionFlags.Prefix66));
|
Add(X86Instruction.Vfnmadd231sd, new InstructionInfo(BadOp, BadOp, BadOp, BadOp, 0x000f38bd, InstructionFlags.Vex | InstructionFlags.Prefix66 | InstructionFlags.RexW));
|
||||||
|
Add(X86Instruction.Vfnmadd231ss, new InstructionInfo(BadOp, BadOp, BadOp, BadOp, 0x000f38bd, InstructionFlags.Vex | InstructionFlags.Prefix66));
|
||||||
Add(X86Instruction.Vfnmsub231sd, new InstructionInfo(BadOp, BadOp, BadOp, BadOp, 0x000f38bf, InstructionFlags.Vex | InstructionFlags.Prefix66 | InstructionFlags.RexW));
|
Add(X86Instruction.Vfnmsub231sd, new InstructionInfo(BadOp, BadOp, BadOp, BadOp, 0x000f38bf, InstructionFlags.Vex | InstructionFlags.Prefix66 | InstructionFlags.RexW));
|
||||||
|
Add(X86Instruction.Vfnmsub231ss, new InstructionInfo(BadOp, BadOp, BadOp, BadOp, 0x000f38bf, InstructionFlags.Vex | InstructionFlags.Prefix66));
|
||||||
Add(X86Instruction.Vpblendvb, new InstructionInfo(BadOp, BadOp, BadOp, BadOp, 0x000f3a4c, InstructionFlags.Vex | InstructionFlags.Prefix66));
|
Add(X86Instruction.Vpblendvb, new InstructionInfo(BadOp, BadOp, BadOp, BadOp, 0x000f3a4c, InstructionFlags.Vex | InstructionFlags.Prefix66));
|
||||||
Add(X86Instruction.Xor, new InstructionInfo(0x00000031, 0x06000083, 0x06000081, BadOp, 0x00000033, InstructionFlags.None));
|
Add(X86Instruction.Xor, new InstructionInfo(0x00000031, 0x06000083, 0x06000081, BadOp, 0x00000033, InstructionFlags.None));
|
||||||
Add(X86Instruction.Xorpd, new InstructionInfo(BadOp, BadOp, BadOp, BadOp, 0x00000f57, InstructionFlags.Vex | InstructionFlags.Prefix66));
|
Add(X86Instruction.Xorpd, new InstructionInfo(BadOp, BadOp, BadOp, BadOp, 0x00000f57, InstructionFlags.Vex | InstructionFlags.Prefix66));
|
||||||
|
|
|
@ -440,9 +440,12 @@ namespace ARMeilleure.CodeGen.X86
|
||||||
else
|
else
|
||||||
{
|
{
|
||||||
EnsureSameReg(dest, src1);
|
EnsureSameReg(dest, src1);
|
||||||
|
|
||||||
Debug.Assert(src3.GetRegister().Index == 0);
|
Debug.Assert(src3.GetRegister().Index == 0);
|
||||||
|
|
||||||
context.Assembler.WriteInstruction(info.Inst, dest, src1, src2);
|
context.Assembler.WriteInstruction(info.Inst, dest, src1, src2);
|
||||||
}
|
}
|
||||||
|
|
||||||
break;
|
break;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -474,11 +477,16 @@ namespace ARMeilleure.CodeGen.X86
|
||||||
Operand src2 = operation.GetSource(1);
|
Operand src2 = operation.GetSource(1);
|
||||||
Operand src3 = operation.GetSource(2);
|
Operand src3 = operation.GetSource(2);
|
||||||
|
|
||||||
EnsureSameType(dest, src1, src2, src3);
|
|
||||||
EnsureSameReg(dest, src1);
|
|
||||||
Debug.Assert(!dest.Type.IsInteger());
|
|
||||||
Debug.Assert(HardwareCapabilities.SupportsVexEncoding);
|
Debug.Assert(HardwareCapabilities.SupportsVexEncoding);
|
||||||
|
|
||||||
|
Debug.Assert(dest.Kind == OperandKind.Register && src1.Kind == OperandKind.Register && src2.Kind == OperandKind.Register);
|
||||||
|
Debug.Assert(src3.Kind == OperandKind.Register || src3.Kind == OperandKind.Memory);
|
||||||
|
|
||||||
|
EnsureSameType(dest, src1, src2, src3);
|
||||||
|
Debug.Assert(dest.Type == OperandType.V128);
|
||||||
|
|
||||||
|
Debug.Assert(dest.Value == src1.Value);
|
||||||
|
|
||||||
context.Assembler.WriteInstruction(info.Inst, dest, src2, src3);
|
context.Assembler.WriteInstruction(info.Inst, dest, src2, src3);
|
||||||
|
|
||||||
break;
|
break;
|
||||||
|
|
|
@ -166,16 +166,14 @@ namespace ARMeilleure.CodeGen.X86
|
||||||
Add(Intrinsic.X86Unpcklps, new IntrinsicInfo(X86Instruction.Unpcklps, IntrinsicType.Binary));
|
Add(Intrinsic.X86Unpcklps, new IntrinsicInfo(X86Instruction.Unpcklps, IntrinsicType.Binary));
|
||||||
Add(Intrinsic.X86Vcvtph2ps, new IntrinsicInfo(X86Instruction.Vcvtph2ps, IntrinsicType.Unary));
|
Add(Intrinsic.X86Vcvtph2ps, new IntrinsicInfo(X86Instruction.Vcvtph2ps, IntrinsicType.Unary));
|
||||||
Add(Intrinsic.X86Vcvtps2ph, new IntrinsicInfo(X86Instruction.Vcvtps2ph, IntrinsicType.BinaryImm));
|
Add(Intrinsic.X86Vcvtps2ph, new IntrinsicInfo(X86Instruction.Vcvtps2ph, IntrinsicType.BinaryImm));
|
||||||
Add(Intrinsic.X86Vfmadd231pd, new IntrinsicInfo(X86Instruction.Vfmadd231pd, IntrinsicType.Fma));
|
|
||||||
Add(Intrinsic.X86Vfmadd231ps, new IntrinsicInfo(X86Instruction.Vfmadd231ps, IntrinsicType.Fma));
|
Add(Intrinsic.X86Vfmadd231ps, new IntrinsicInfo(X86Instruction.Vfmadd231ps, IntrinsicType.Fma));
|
||||||
Add(Intrinsic.X86Vfmadd231sd, new IntrinsicInfo(X86Instruction.Vfmadd231sd, IntrinsicType.Fma));
|
Add(Intrinsic.X86Vfmadd231sd, new IntrinsicInfo(X86Instruction.Vfmadd231sd, IntrinsicType.Fma));
|
||||||
Add(Intrinsic.X86Vfmadd231ss, new IntrinsicInfo(X86Instruction.Vfmadd231ss, IntrinsicType.Fma));
|
Add(Intrinsic.X86Vfmadd231ss, new IntrinsicInfo(X86Instruction.Vfmadd231ss, IntrinsicType.Fma));
|
||||||
Add(Intrinsic.X86Vfmsub231pd, new IntrinsicInfo(X86Instruction.Vfmsub231pd, IntrinsicType.Fma));
|
|
||||||
Add(Intrinsic.X86Vfmsub231ps, new IntrinsicInfo(X86Instruction.Vfmsub231ps, IntrinsicType.Fma));
|
|
||||||
Add(Intrinsic.X86Vfmsub231sd, new IntrinsicInfo(X86Instruction.Vfmsub231sd, IntrinsicType.Fma));
|
Add(Intrinsic.X86Vfmsub231sd, new IntrinsicInfo(X86Instruction.Vfmsub231sd, IntrinsicType.Fma));
|
||||||
Add(Intrinsic.X86Vfmsub231ss, new IntrinsicInfo(X86Instruction.Vfmsub231ss, IntrinsicType.Fma));
|
Add(Intrinsic.X86Vfmsub231ss, new IntrinsicInfo(X86Instruction.Vfmsub231ss, IntrinsicType.Fma));
|
||||||
Add(Intrinsic.X86Vfnmsub231pd, new IntrinsicInfo(X86Instruction.Vfnmsub231pd, IntrinsicType.Fma));
|
Add(Intrinsic.X86Vfnmadd231ps, new IntrinsicInfo(X86Instruction.Vfnmadd231ps, IntrinsicType.Fma));
|
||||||
Add(Intrinsic.X86Vfnmsub231ps, new IntrinsicInfo(X86Instruction.Vfnmsub231ps, IntrinsicType.Fma));
|
Add(Intrinsic.X86Vfnmadd231sd, new IntrinsicInfo(X86Instruction.Vfnmadd231sd, IntrinsicType.Fma));
|
||||||
|
Add(Intrinsic.X86Vfnmadd231ss, new IntrinsicInfo(X86Instruction.Vfnmadd231ss, IntrinsicType.Fma));
|
||||||
Add(Intrinsic.X86Vfnmsub231sd, new IntrinsicInfo(X86Instruction.Vfnmsub231sd, IntrinsicType.Fma));
|
Add(Intrinsic.X86Vfnmsub231sd, new IntrinsicInfo(X86Instruction.Vfnmsub231sd, IntrinsicType.Fma));
|
||||||
Add(Intrinsic.X86Vfnmsub231ss, new IntrinsicInfo(X86Instruction.Vfnmsub231ss, IntrinsicType.Fma));
|
Add(Intrinsic.X86Vfnmsub231ss, new IntrinsicInfo(X86Instruction.Vfnmsub231ss, IntrinsicType.Fma));
|
||||||
Add(Intrinsic.X86Xorpd, new IntrinsicInfo(X86Instruction.Xorpd, IntrinsicType.Binary));
|
Add(Intrinsic.X86Xorpd, new IntrinsicInfo(X86Instruction.Xorpd, IntrinsicType.Binary));
|
||||||
|
|
|
@ -203,18 +203,16 @@ namespace ARMeilleure.CodeGen.X86
|
||||||
Vblendvps,
|
Vblendvps,
|
||||||
Vcvtph2ps,
|
Vcvtph2ps,
|
||||||
Vcvtps2ph,
|
Vcvtps2ph,
|
||||||
Vfmadd231pd,
|
|
||||||
Vfmadd231ps,
|
Vfmadd231ps,
|
||||||
Vfmadd231sd,
|
Vfmadd231sd,
|
||||||
Vfmadd231ss,
|
Vfmadd231ss,
|
||||||
Vfmsub231ps,
|
|
||||||
Vfmsub231pd,
|
|
||||||
Vfmsub231ss,
|
|
||||||
Vfmsub231sd,
|
Vfmsub231sd,
|
||||||
Vfnmsub231ps,
|
Vfmsub231ss,
|
||||||
Vfnmsub231pd,
|
Vfnmadd231ps,
|
||||||
Vfnmsub231ss,
|
Vfnmadd231sd,
|
||||||
|
Vfnmadd231ss,
|
||||||
Vfnmsub231sd,
|
Vfnmsub231sd,
|
||||||
|
Vfnmsub231ss,
|
||||||
Vpblendvb,
|
Vpblendvb,
|
||||||
Xor,
|
Xor,
|
||||||
Xorpd,
|
Xorpd,
|
||||||
|
|
|
@ -822,6 +822,7 @@ namespace ARMeilleure.Decoders
|
||||||
SetA32("<<<<11101x10xxxxxxxx101xx0x0xxxx", InstName.Vfma, InstEmit32.Vfma_S, OpCode32SimdRegS.Create);
|
SetA32("<<<<11101x10xxxxxxxx101xx0x0xxxx", InstName.Vfma, InstEmit32.Vfma_S, OpCode32SimdRegS.Create);
|
||||||
SetA32("111100100x00xxxxxxxx1100xxx1xxxx", InstName.Vfma, InstEmit32.Vfma_V, OpCode32SimdReg.Create);
|
SetA32("111100100x00xxxxxxxx1100xxx1xxxx", InstName.Vfma, InstEmit32.Vfma_V, OpCode32SimdReg.Create);
|
||||||
SetA32("<<<<11101x10xxxxxxxx101xx1x0xxxx", InstName.Vfms, InstEmit32.Vfms_S, OpCode32SimdRegS.Create);
|
SetA32("<<<<11101x10xxxxxxxx101xx1x0xxxx", InstName.Vfms, InstEmit32.Vfms_S, OpCode32SimdRegS.Create);
|
||||||
|
SetA32("111100100x10xxxxxxxx1100xxx1xxxx", InstName.Vfms, InstEmit32.Vfms_V, OpCode32SimdReg.Create);
|
||||||
SetA32("<<<<11101x01xxxxxxxx101xx1x0xxxx", InstName.Vfnma, InstEmit32.Vfnma_S, OpCode32SimdRegS.Create);
|
SetA32("<<<<11101x01xxxxxxxx101xx1x0xxxx", InstName.Vfnma, InstEmit32.Vfnma_S, OpCode32SimdRegS.Create);
|
||||||
SetA32("<<<<11101x01xxxxxxxx101xx0x0xxxx", InstName.Vfnms, InstEmit32.Vfnms_S, OpCode32SimdRegS.Create);
|
SetA32("<<<<11101x01xxxxxxxx101xx0x0xxxx", InstName.Vfnms, InstEmit32.Vfnms_S, OpCode32SimdRegS.Create);
|
||||||
SetA32("1111001x0x<<xxxxxxxx0000xxx0xxxx", InstName.Vhadd, InstEmit32.Vhadd, OpCode32SimdReg.Create);
|
SetA32("1111001x0x<<xxxxxxxx0000xxx0xxxx", InstName.Vhadd, InstEmit32.Vhadd, OpCode32SimdReg.Create);
|
||||||
|
|
|
@ -591,7 +591,7 @@ namespace ARMeilleure.Instructions
|
||||||
EmitAluStore(context, res);
|
EmitAluStore(context, res);
|
||||||
}
|
}
|
||||||
|
|
||||||
public static void EmitDiv(ArmEmitterContext context, bool unsigned)
|
private static void EmitDiv(ArmEmitterContext context, bool unsigned)
|
||||||
{
|
{
|
||||||
Operand n = GetAluN(context);
|
Operand n = GetAluN(context);
|
||||||
Operand m = GetAluM(context);
|
Operand m = GetAluM(context);
|
||||||
|
|
|
@ -329,7 +329,7 @@ namespace ARMeilleure.Instructions
|
||||||
EmitGenericAluStoreA32(context, op.RdLo, op.SetFlags, lo);
|
EmitGenericAluStoreA32(context, op.RdLo, op.SetFlags, lo);
|
||||||
}
|
}
|
||||||
|
|
||||||
public static void EmitMlal(ArmEmitterContext context, bool signed)
|
private static void EmitMlal(ArmEmitterContext context, bool signed)
|
||||||
{
|
{
|
||||||
OpCode32AluUmull op = (OpCode32AluUmull)context.CurrOp;
|
OpCode32AluUmull op = (OpCode32AluUmull)context.CurrOp;
|
||||||
|
|
||||||
|
|
|
@ -252,28 +252,14 @@ namespace ARMeilleure.Instructions
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
public static void Vfma_V(ArmEmitterContext context) // Fused.
|
public static void Vfma_S(ArmEmitterContext context) // Fused.
|
||||||
{
|
{
|
||||||
if (Optimizations.FastFP && Optimizations.UseFma)
|
if (Optimizations.FastFP && Optimizations.UseFma)
|
||||||
{
|
{
|
||||||
// Vectors contain elements that are 32-bits in length always. The only thing that will change is the number of elements in a vector.
|
EmitScalarTernaryOpF32(context, Intrinsic.X86Vfmadd231ss, Intrinsic.X86Vfmadd231sd);
|
||||||
// The 64-bit variant will never be used.
|
|
||||||
EmitVectorTernaryOpF32(context, Intrinsic.X86Vfmadd231ps, Intrinsic.X86Vfmadd231pd);
|
|
||||||
}
|
}
|
||||||
else
|
else if (Optimizations.FastFP && Optimizations.UseSse2)
|
||||||
{
|
{
|
||||||
EmitVectorTernaryOpF32(context, (op1, op2, op3) =>
|
|
||||||
{
|
|
||||||
return EmitSoftFloatCall(context, nameof(SoftFloat32.FPMulAdd), op1, op2, op3);
|
|
||||||
});
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
public static void Vfma_S(ArmEmitterContext context) // Fused.
|
|
||||||
{
|
|
||||||
if (Optimizations.FastFP && Optimizations.UseSse2)
|
|
||||||
{
|
|
||||||
// TODO: Use FMA instruction set.
|
|
||||||
EmitScalarTernaryOpF32(context, Intrinsic.X86Mulss, Intrinsic.X86Mulsd, Intrinsic.X86Addss, Intrinsic.X86Addsd);
|
EmitScalarTernaryOpF32(context, Intrinsic.X86Mulss, Intrinsic.X86Mulsd, Intrinsic.X86Addss, Intrinsic.X86Addsd);
|
||||||
}
|
}
|
||||||
else
|
else
|
||||||
|
@ -285,11 +271,29 @@ namespace ARMeilleure.Instructions
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
public static void Vfma_V(ArmEmitterContext context) // Fused.
|
||||||
|
{
|
||||||
|
if (Optimizations.FastFP && Optimizations.UseFma)
|
||||||
|
{
|
||||||
|
EmitVectorTernaryOpF32(context, Intrinsic.X86Vfmadd231ps);
|
||||||
|
}
|
||||||
|
else
|
||||||
|
{
|
||||||
|
EmitVectorTernaryOpF32(context, (op1, op2, op3) =>
|
||||||
|
{
|
||||||
|
return EmitSoftFloatCallDefaultFpscr(context, nameof(SoftFloat32.FPMulAddFpscr), op1, op2, op3);
|
||||||
|
});
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
public static void Vfms_S(ArmEmitterContext context) // Fused.
|
public static void Vfms_S(ArmEmitterContext context) // Fused.
|
||||||
{
|
{
|
||||||
if (Optimizations.FastFP && Optimizations.UseSse2)
|
if (Optimizations.FastFP && Optimizations.UseFma)
|
||||||
|
{
|
||||||
|
EmitScalarTernaryOpF32(context, Intrinsic.X86Vfnmadd231ss, Intrinsic.X86Vfnmadd231sd);
|
||||||
|
}
|
||||||
|
else if (Optimizations.FastFP && Optimizations.UseSse2)
|
||||||
{
|
{
|
||||||
// TODO: Use FMA instruction set.
|
|
||||||
EmitScalarTernaryOpF32(context, Intrinsic.X86Mulss, Intrinsic.X86Mulsd, Intrinsic.X86Subss, Intrinsic.X86Subsd);
|
EmitScalarTernaryOpF32(context, Intrinsic.X86Mulss, Intrinsic.X86Mulsd, Intrinsic.X86Subss, Intrinsic.X86Subsd);
|
||||||
}
|
}
|
||||||
else
|
else
|
||||||
|
@ -301,17 +305,36 @@ namespace ARMeilleure.Instructions
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
public static void Vfms_V(ArmEmitterContext context) // Fused.
|
||||||
|
{
|
||||||
|
if (Optimizations.FastFP && Optimizations.UseFma)
|
||||||
|
{
|
||||||
|
EmitVectorTernaryOpF32(context, Intrinsic.X86Vfnmadd231ps);
|
||||||
|
}
|
||||||
|
else
|
||||||
|
{
|
||||||
|
EmitVectorTernaryOpF32(context, (op1, op2, op3) =>
|
||||||
|
{
|
||||||
|
return EmitSoftFloatCallDefaultFpscr(context, nameof(SoftFloat32.FPMulSubFpscr), op1, op2, op3);
|
||||||
|
});
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
public static void Vfnma_S(ArmEmitterContext context) // Fused.
|
public static void Vfnma_S(ArmEmitterContext context) // Fused.
|
||||||
{
|
{
|
||||||
if (Optimizations.FastFP && Optimizations.UseFma)
|
if (Optimizations.FastFP && Optimizations.UseFma)
|
||||||
{
|
{
|
||||||
EmitScalarTernaryOpF32(context, Intrinsic.X86Vfnmsub231ss, Intrinsic.X86Vfnmsub231sd);
|
EmitScalarTernaryOpF32(context, Intrinsic.X86Vfnmsub231ss, Intrinsic.X86Vfnmsub231sd);
|
||||||
}
|
}
|
||||||
|
else if (Optimizations.FastFP && Optimizations.UseSse2)
|
||||||
|
{
|
||||||
|
EmitScalarTernaryOpF32(context, Intrinsic.X86Mulss, Intrinsic.X86Mulsd, Intrinsic.X86Subss, Intrinsic.X86Subsd, isNegD: true);
|
||||||
|
}
|
||||||
else
|
else
|
||||||
{
|
{
|
||||||
EmitScalarTernaryOpF32(context, (op1, op2, op3) =>
|
EmitScalarTernaryOpF32(context, (op1, op2, op3) =>
|
||||||
{
|
{
|
||||||
return EmitSoftFloatCall(context, nameof(SoftFloat32.FPMulAdd), context.Negate(op1), context.Negate(op2), op3);
|
return EmitSoftFloatCall(context, nameof(SoftFloat32.FPNegMulAdd), op1, op2, op3);
|
||||||
});
|
});
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
@ -322,11 +345,15 @@ namespace ARMeilleure.Instructions
|
||||||
{
|
{
|
||||||
EmitScalarTernaryOpF32(context, Intrinsic.X86Vfmsub231ss, Intrinsic.X86Vfmsub231sd);
|
EmitScalarTernaryOpF32(context, Intrinsic.X86Vfmsub231ss, Intrinsic.X86Vfmsub231sd);
|
||||||
}
|
}
|
||||||
|
else if (Optimizations.FastFP && Optimizations.UseSse2)
|
||||||
|
{
|
||||||
|
EmitScalarTernaryOpF32(context, Intrinsic.X86Mulss, Intrinsic.X86Mulsd, Intrinsic.X86Addss, Intrinsic.X86Addsd, isNegD: true);
|
||||||
|
}
|
||||||
else
|
else
|
||||||
{
|
{
|
||||||
EmitScalarTernaryOpF32(context, (op1, op2, op3) =>
|
EmitScalarTernaryOpF32(context, (op1, op2, op3) =>
|
||||||
{
|
{
|
||||||
return EmitSoftFloatCall(context, nameof(SoftFloat32.FPMulAdd), context.Negate(op1), op2, op3);
|
return EmitSoftFloatCall(context, nameof(SoftFloat32.FPNegMulSub), op1, op2, op3);
|
||||||
});
|
});
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
@ -422,36 +449,21 @@ namespace ARMeilleure.Instructions
|
||||||
|
|
||||||
if (Optimizations.FastFP && Optimizations.UseSse2)
|
if (Optimizations.FastFP && Optimizations.UseSse2)
|
||||||
{
|
{
|
||||||
EmitScalarTernaryOpSimd32(context, (d, n, m) =>
|
EmitScalarTernaryOpF32(context, Intrinsic.X86Mulss, Intrinsic.X86Mulsd, Intrinsic.X86Subss, Intrinsic.X86Subsd, isNegD: true);
|
||||||
{
|
|
||||||
if ((op.Size & 1) == 0)
|
|
||||||
{
|
|
||||||
Operand res = context.AddIntrinsic(Intrinsic.X86Mulss, n, m);
|
|
||||||
res = context.AddIntrinsic(Intrinsic.X86Addss, d, res);
|
|
||||||
Operand mask = X86GetScalar(context, -0f);
|
|
||||||
return context.AddIntrinsic(Intrinsic.X86Xorps, mask, res);
|
|
||||||
}
|
|
||||||
else
|
|
||||||
{
|
|
||||||
Operand res = context.AddIntrinsic(Intrinsic.X86Mulsd, n, m);
|
|
||||||
res = context.AddIntrinsic(Intrinsic.X86Addsd, d, res);
|
|
||||||
Operand mask = X86GetScalar(context, -0d);
|
|
||||||
return context.AddIntrinsic(Intrinsic.X86Xorpd, mask, res);
|
|
||||||
}
|
|
||||||
});
|
|
||||||
}
|
}
|
||||||
else if (Optimizations.FastFP)
|
else if (Optimizations.FastFP)
|
||||||
{
|
{
|
||||||
EmitScalarTernaryOpF32(context, (op1, op2, op3) =>
|
EmitScalarTernaryOpF32(context, (op1, op2, op3) =>
|
||||||
{
|
{
|
||||||
return context.Negate(context.Add(op1, context.Multiply(op2, op3)));
|
return context.Subtract(context.Negate(op1), context.Multiply(op2, op3));
|
||||||
});
|
});
|
||||||
}
|
}
|
||||||
else
|
else
|
||||||
{
|
{
|
||||||
EmitScalarTernaryOpF32(context, (op1, op2, op3) =>
|
EmitScalarTernaryOpF32(context, (op1, op2, op3) =>
|
||||||
{
|
{
|
||||||
return EmitSoftFloatCall(context, nameof(SoftFloat32.FPNegMulAdd), op1, op2, op3);
|
Operand res = EmitSoftFloatCall(context, nameof(SoftFloat32.FPMul), op2, op3);
|
||||||
|
return EmitSoftFloatCall(context, nameof(SoftFloat32.FPSub), context.Negate(op1), res);
|
||||||
});
|
});
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
@ -462,24 +474,7 @@ namespace ARMeilleure.Instructions
|
||||||
|
|
||||||
if (Optimizations.FastFP && Optimizations.UseSse2)
|
if (Optimizations.FastFP && Optimizations.UseSse2)
|
||||||
{
|
{
|
||||||
EmitScalarTernaryOpSimd32(context, (d, n, m) =>
|
EmitScalarTernaryOpF32(context, Intrinsic.X86Mulss, Intrinsic.X86Mulsd, Intrinsic.X86Addss, Intrinsic.X86Addsd, isNegD: true);
|
||||||
{
|
|
||||||
if ((op.Size & 1) == 0)
|
|
||||||
{
|
|
||||||
Operand res = context.AddIntrinsic(Intrinsic.X86Mulss, n, m);
|
|
||||||
Operand mask = X86GetScalar(context, -0f);
|
|
||||||
d = context.AddIntrinsic(Intrinsic.X86Xorps, mask, d);
|
|
||||||
return context.AddIntrinsic(Intrinsic.X86Addss, d, res);
|
|
||||||
|
|
||||||
}
|
|
||||||
else
|
|
||||||
{
|
|
||||||
Operand res = context.AddIntrinsic(Intrinsic.X86Mulsd, n, m);
|
|
||||||
Operand mask = X86GetScalar(context, -0d);
|
|
||||||
d = context.AddIntrinsic(Intrinsic.X86Xorpd, mask, res);
|
|
||||||
return context.AddIntrinsic(Intrinsic.X86Addsd, d, res);
|
|
||||||
}
|
|
||||||
});
|
|
||||||
}
|
}
|
||||||
else if (Optimizations.FastFP)
|
else if (Optimizations.FastFP)
|
||||||
{
|
{
|
||||||
|
@ -492,7 +487,8 @@ namespace ARMeilleure.Instructions
|
||||||
{
|
{
|
||||||
EmitScalarTernaryOpF32(context, (op1, op2, op3) =>
|
EmitScalarTernaryOpF32(context, (op1, op2, op3) =>
|
||||||
{
|
{
|
||||||
return EmitSoftFloatCall(context, nameof(SoftFloat32.FPNegMulSub), op1, op2, op3);
|
Operand res = EmitSoftFloatCall(context, nameof(SoftFloat32.FPMul), op2, op3);
|
||||||
|
return EmitSoftFloatCall(context, nameof(SoftFloat32.FPAdd), context.Negate(op1), res);
|
||||||
});
|
});
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
|
@ -820,15 +820,15 @@ namespace ARMeilleure.Instructions
|
||||||
});
|
});
|
||||||
}
|
}
|
||||||
|
|
||||||
public static void EmitVectorTernaryOpF32(ArmEmitterContext context, Intrinsic inst32, Intrinsic inst64)
|
public static void EmitVectorTernaryOpF32(ArmEmitterContext context, Intrinsic inst32)
|
||||||
{
|
{
|
||||||
OpCode32SimdReg op = (OpCode32SimdReg)context.CurrOp;
|
OpCode32SimdReg op = (OpCode32SimdReg)context.CurrOp;
|
||||||
|
|
||||||
Intrinsic inst = (op.Size & 1) != 0 ? inst64 : inst32;
|
Debug.Assert((op.Size & 1) == 0);
|
||||||
|
|
||||||
EmitVectorTernaryOpSimd32(context, (d, n, m) =>
|
EmitVectorTernaryOpSimd32(context, (d, n, m) =>
|
||||||
{
|
{
|
||||||
return context.AddIntrinsic(inst, d, n, m);
|
return context.AddIntrinsic(inst32, d, n, m);
|
||||||
});
|
});
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -927,7 +927,13 @@ namespace ARMeilleure.Instructions
|
||||||
});
|
});
|
||||||
}
|
}
|
||||||
|
|
||||||
public static void EmitScalarTernaryOpF32(ArmEmitterContext context, Intrinsic inst32pt1, Intrinsic inst64pt1, Intrinsic inst32pt2, Intrinsic inst64pt2)
|
public static void EmitScalarTernaryOpF32(
|
||||||
|
ArmEmitterContext context,
|
||||||
|
Intrinsic inst32pt1,
|
||||||
|
Intrinsic inst64pt1,
|
||||||
|
Intrinsic inst32pt2,
|
||||||
|
Intrinsic inst64pt2,
|
||||||
|
bool isNegD = false)
|
||||||
{
|
{
|
||||||
OpCode32SimdRegS op = (OpCode32SimdRegS)context.CurrOp;
|
OpCode32SimdRegS op = (OpCode32SimdRegS)context.CurrOp;
|
||||||
|
|
||||||
|
@ -939,6 +945,18 @@ namespace ARMeilleure.Instructions
|
||||||
EmitScalarTernaryOpSimd32(context, (d, n, m) =>
|
EmitScalarTernaryOpSimd32(context, (d, n, m) =>
|
||||||
{
|
{
|
||||||
Operand res = context.AddIntrinsic(inst1, n, m);
|
Operand res = context.AddIntrinsic(inst1, n, m);
|
||||||
|
|
||||||
|
if (isNegD)
|
||||||
|
{
|
||||||
|
Operand mask = doubleSize
|
||||||
|
? X86GetScalar(context, -0d)
|
||||||
|
: X86GetScalar(context, -0f);
|
||||||
|
|
||||||
|
d = doubleSize
|
||||||
|
? context.AddIntrinsic(Intrinsic.X86Xorpd, mask, d)
|
||||||
|
: context.AddIntrinsic(Intrinsic.X86Xorps, mask, d);
|
||||||
|
}
|
||||||
|
|
||||||
return context.AddIntrinsic(inst2, d, res);
|
return context.AddIntrinsic(inst2, d, res);
|
||||||
});
|
});
|
||||||
}
|
}
|
||||||
|
|
|
@ -155,16 +155,14 @@ namespace ARMeilleure.IntermediateRepresentation
|
||||||
X86Unpcklps,
|
X86Unpcklps,
|
||||||
X86Vcvtph2ps,
|
X86Vcvtph2ps,
|
||||||
X86Vcvtps2ph,
|
X86Vcvtps2ph,
|
||||||
X86Vfmadd231pd,
|
|
||||||
X86Vfmadd231ps,
|
X86Vfmadd231ps,
|
||||||
X86Vfmadd231sd,
|
X86Vfmadd231sd,
|
||||||
X86Vfmadd231ss,
|
X86Vfmadd231ss,
|
||||||
X86Vfmsub231pd,
|
|
||||||
X86Vfmsub231ps,
|
|
||||||
X86Vfmsub231sd,
|
X86Vfmsub231sd,
|
||||||
X86Vfmsub231ss,
|
X86Vfmsub231ss,
|
||||||
X86Vfnmsub231pd,
|
X86Vfnmadd231ps,
|
||||||
X86Vfnmsub231ps,
|
X86Vfnmadd231sd,
|
||||||
|
X86Vfnmadd231ss,
|
||||||
X86Vfnmsub231sd,
|
X86Vfnmsub231sd,
|
||||||
X86Vfnmsub231ss,
|
X86Vfnmsub231ss,
|
||||||
X86Xorpd,
|
X86Xorpd,
|
||||||
|
|
|
@ -22,7 +22,7 @@ namespace ARMeilleure.Translation.PTC
|
||||||
{
|
{
|
||||||
private const string HeaderMagic = "PTChd";
|
private const string HeaderMagic = "PTChd";
|
||||||
|
|
||||||
private const uint InternalVersion = 1713; //! To be incremented manually for each change to the ARMeilleure project.
|
private const int InternalVersion = 1775; //! To be incremented manually for each change to the ARMeilleure project.
|
||||||
|
|
||||||
private const string ActualDir = "0";
|
private const string ActualDir = "0";
|
||||||
private const string BackupDir = "1";
|
private const string BackupDir = "1";
|
||||||
|
|
|
@ -22,41 +22,45 @@ namespace Ryujinx.Tests.Cpu
|
||||||
0x80000000u, 0xFFFFFFFFu };
|
0x80000000u, 0xFFFFFFFFu };
|
||||||
}
|
}
|
||||||
|
|
||||||
private static IEnumerable<uint> _1S_F_()
|
private static IEnumerable<ulong> _1S_F_()
|
||||||
{
|
{
|
||||||
yield return 0xFF7FFFFFu; // -Max Normal (float.MinValue)
|
yield return 0x00000000FF7FFFFFul; // -Max Normal (float.MinValue)
|
||||||
yield return 0x80800000u; // -Min Normal
|
yield return 0x0000000080800000ul; // -Min Normal
|
||||||
yield return 0x807FFFFFu; // -Max Subnormal
|
yield return 0x00000000807FFFFFul; // -Max Subnormal
|
||||||
yield return 0x80000001u; // -Min Subnormal (-float.Epsilon)
|
yield return 0x0000000080000001ul; // -Min Subnormal (-float.Epsilon)
|
||||||
yield return 0x7F7FFFFFu; // +Max Normal (float.MaxValue)
|
yield return 0x000000007F7FFFFFul; // +Max Normal (float.MaxValue)
|
||||||
yield return 0x00800000u; // +Min Normal
|
yield return 0x0000000000800000ul; // +Min Normal
|
||||||
yield return 0x007FFFFFu; // +Max Subnormal
|
yield return 0x00000000007FFFFFul; // +Max Subnormal
|
||||||
yield return 0x00000001u; // +Min Subnormal (float.Epsilon)
|
yield return 0x0000000000000001ul; // +Min Subnormal (float.Epsilon)
|
||||||
|
|
||||||
if (!NoZeros)
|
if (!NoZeros)
|
||||||
{
|
{
|
||||||
yield return 0x80000000u; // -Zero
|
yield return 0x0000000080000000ul; // -Zero
|
||||||
yield return 0x00000000u; // +Zero
|
yield return 0x0000000000000000ul; // +Zero
|
||||||
}
|
}
|
||||||
|
|
||||||
if (!NoInfs)
|
if (!NoInfs)
|
||||||
{
|
{
|
||||||
yield return 0xFF800000u; // -Infinity
|
yield return 0x00000000FF800000ul; // -Infinity
|
||||||
yield return 0x7F800000u; // +Infinity
|
yield return 0x000000007F800000ul; // +Infinity
|
||||||
}
|
}
|
||||||
|
|
||||||
if (!NoNaNs)
|
if (!NoNaNs)
|
||||||
{
|
{
|
||||||
yield return 0xFFC00000u; // -QNaN (all zeros payload) (float.NaN)
|
yield return 0x00000000FFC00000ul; // -QNaN (all zeros payload) (float.NaN)
|
||||||
yield return 0xFFBFFFFFu; // -SNaN (all ones payload)
|
yield return 0x00000000FFBFFFFFul; // -SNaN (all ones payload)
|
||||||
yield return 0x7FC00000u; // +QNaN (all zeros payload) (-float.NaN) (DefaultNaN)
|
yield return 0x000000007FC00000ul; // +QNaN (all zeros payload) (-float.NaN) (DefaultNaN)
|
||||||
yield return 0x7FBFFFFFu; // +SNaN (all ones payload)
|
yield return 0x000000007FBFFFFFul; // +SNaN (all ones payload)
|
||||||
}
|
}
|
||||||
|
|
||||||
for (int cnt = 1; cnt <= RndCnt; cnt++)
|
for (int cnt = 1; cnt <= RndCnt; cnt++)
|
||||||
{
|
{
|
||||||
yield return GenNormalS();
|
ulong grbg = TestContext.CurrentContext.Random.NextUInt();
|
||||||
yield return GenSubnormalS();
|
ulong rnd1 = GenNormalS();
|
||||||
|
ulong rnd2 = GenSubnormalS();
|
||||||
|
|
||||||
|
yield return (grbg << 32) | rnd1;
|
||||||
|
yield return (grbg << 32) | rnd2;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -93,8 +97,11 @@ namespace Ryujinx.Tests.Cpu
|
||||||
|
|
||||||
for (int cnt = 1; cnt <= RndCnt; cnt++)
|
for (int cnt = 1; cnt <= RndCnt; cnt++)
|
||||||
{
|
{
|
||||||
yield return GenNormalD();
|
ulong rnd1 = GenNormalD();
|
||||||
yield return GenSubnormalD();
|
ulong rnd2 = GenSubnormalD();
|
||||||
|
|
||||||
|
yield return rnd1;
|
||||||
|
yield return rnd2;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
#endregion
|
#endregion
|
||||||
|
@ -109,10 +116,10 @@ namespace Ryujinx.Tests.Cpu
|
||||||
[Test, Pairwise, Description("VCVT.<dt>.F32 <Sd>, <Sm>")]
|
[Test, Pairwise, Description("VCVT.<dt>.F32 <Sd>, <Sm>")]
|
||||||
public void Vcvt_F32_I32([Values(0u, 1u, 2u, 3u)] uint rd,
|
public void Vcvt_F32_I32([Values(0u, 1u, 2u, 3u)] uint rd,
|
||||||
[Values(0u, 1u, 2u, 3u)] uint rm,
|
[Values(0u, 1u, 2u, 3u)] uint rm,
|
||||||
[ValueSource(nameof(_1S_F_))] uint s0,
|
[ValueSource(nameof(_1S_F_))] ulong s0,
|
||||||
[ValueSource(nameof(_1S_F_))] uint s1,
|
[ValueSource(nameof(_1S_F_))] ulong s1,
|
||||||
[ValueSource(nameof(_1S_F_))] uint s2,
|
[ValueSource(nameof(_1S_F_))] ulong s2,
|
||||||
[ValueSource(nameof(_1S_F_))] uint s3,
|
[ValueSource(nameof(_1S_F_))] ulong s3,
|
||||||
[Values] bool unsigned) // <U32, S32>
|
[Values] bool unsigned) // <U32, S32>
|
||||||
{
|
{
|
||||||
uint opcode = 0xeebc0ac0u; // VCVT.U32.F32 S0, S0
|
uint opcode = 0xeebc0ac0u; // VCVT.U32.F32 S0, S0
|
||||||
|
@ -125,7 +132,7 @@ namespace Ryujinx.Tests.Cpu
|
||||||
opcode |= ((rd & 0x1e) << 11) | ((rd & 0x1) << 22);
|
opcode |= ((rd & 0x1e) << 11) | ((rd & 0x1) << 22);
|
||||||
opcode |= ((rm & 0x1e) >> 1) | ((rm & 0x1) << 5);
|
opcode |= ((rm & 0x1e) >> 1) | ((rm & 0x1) << 5);
|
||||||
|
|
||||||
V128 v0 = MakeVectorE0E1E2E3(s0, s1, s2, s3);
|
V128 v0 = MakeVectorE0E1E2E3((uint)s0, (uint)s1, (uint)s2, (uint)s3);
|
||||||
|
|
||||||
SingleOpcode(opcode, v0: v0);
|
SingleOpcode(opcode, v0: v0);
|
||||||
|
|
||||||
|
|
|
@ -22,6 +22,59 @@ namespace Ryujinx.Tests.Cpu
|
||||||
};
|
};
|
||||||
}
|
}
|
||||||
|
|
||||||
|
private static uint[] _Vfma_Vfms_Vfnma_Vfnms_S_F32_()
|
||||||
|
{
|
||||||
|
return new uint[]
|
||||||
|
{
|
||||||
|
0xEEA00A00u, // VFMA. F32 S0, S0, S0
|
||||||
|
0xEEA00A40u, // VFMS. F32 S0, S0, S0
|
||||||
|
0xEE900A40u, // VFNMA.F32 S0, S0, S0
|
||||||
|
0xEE900A00u // VFNMS.F32 S0, S0, S0
|
||||||
|
};
|
||||||
|
}
|
||||||
|
|
||||||
|
private static uint[] _Vfma_Vfms_Vfnma_Vfnms_S_F64_()
|
||||||
|
{
|
||||||
|
return new uint[]
|
||||||
|
{
|
||||||
|
0xEEA00B00u, // VFMA. F64 D0, D0, D0
|
||||||
|
0xEEA00B40u, // VFMS. F64 D0, D0, D0
|
||||||
|
0xEE900B40u, // VFNMA.F64 D0, D0, D0
|
||||||
|
0xEE900B00u // VFNMS.F64 D0, D0, D0
|
||||||
|
};
|
||||||
|
}
|
||||||
|
|
||||||
|
private static uint[] _Vfma_Vfms_V_F32_()
|
||||||
|
{
|
||||||
|
return new uint[]
|
||||||
|
{
|
||||||
|
0xF2000C10u, // VFMA.F32 D0, D0, D0
|
||||||
|
0xF2200C10u // VFMS.F32 D0, D0, D0
|
||||||
|
};
|
||||||
|
}
|
||||||
|
|
||||||
|
private static uint[] _Vmla_Vmls_Vnmla_Vnmls_S_F32_()
|
||||||
|
{
|
||||||
|
return new uint[]
|
||||||
|
{
|
||||||
|
0xEE000A00u, // VMLA. F32 S0, S0, S0
|
||||||
|
0xEE000A40u, // VMLS. F32 S0, S0, S0
|
||||||
|
0xEE100A40u, // VNMLA.F32 S0, S0, S0
|
||||||
|
0xEE100A00u // VNMLS.F32 S0, S0, S0
|
||||||
|
};
|
||||||
|
}
|
||||||
|
|
||||||
|
private static uint[] _Vmla_Vmls_Vnmla_Vnmls_S_F64_()
|
||||||
|
{
|
||||||
|
return new uint[]
|
||||||
|
{
|
||||||
|
0xEE000B00u, // VMLA. F64 D0, D0, D0
|
||||||
|
0xEE000B40u, // VMLS. F64 D0, D0, D0
|
||||||
|
0xEE100B40u, // VNMLA.F64 D0, D0, D0
|
||||||
|
0xEE100B00u // VNMLS.F64 D0, D0, D0
|
||||||
|
};
|
||||||
|
}
|
||||||
|
|
||||||
private static uint[] _Vp_Add_Max_Min_F_()
|
private static uint[] _Vp_Add_Max_Min_F_()
|
||||||
{
|
{
|
||||||
return new uint[]
|
return new uint[]
|
||||||
|
@ -184,8 +237,8 @@ namespace Ryujinx.Tests.Cpu
|
||||||
private const int RndCnt = 2;
|
private const int RndCnt = 2;
|
||||||
|
|
||||||
private static readonly bool NoZeros = false;
|
private static readonly bool NoZeros = false;
|
||||||
private static readonly bool NoInfs = true;
|
private static readonly bool NoInfs = false;
|
||||||
private static readonly bool NoNaNs = true;
|
private static readonly bool NoNaNs = false;
|
||||||
|
|
||||||
[Explicit]
|
[Explicit]
|
||||||
[Test, Pairwise, Description("VADD.f32 V0, V0, V0")]
|
[Test, Pairwise, Description("VADD.f32 V0, V0, V0")]
|
||||||
|
@ -293,119 +346,115 @@ namespace Ryujinx.Tests.Cpu
|
||||||
CompareAgainstUnicorn(fpsrMask: Fpsr.Nzcv);
|
CompareAgainstUnicorn(fpsrMask: Fpsr.Nzcv);
|
||||||
}
|
}
|
||||||
|
|
||||||
[Test, Pairwise, Description("VFMA.F<size> <Vd>, <Vn>, <Vm>")]
|
[Test, Pairwise] [Explicit] // Fused.
|
||||||
public void Vfma([Values(0u, 1u)] uint rd,
|
public void Vfma_Vfms_Vfnma_Vfnms_S_F32([ValueSource(nameof(_Vfma_Vfms_Vfnma_Vfnms_S_F32_))] uint opcode,
|
||||||
[Values(0u, 1u)] uint rn,
|
[Values(0u, 1u, 2u, 3u)] uint rd,
|
||||||
[Values(0u, 1u)] uint rm,
|
[Values(0u, 1u, 2u, 3u)] uint rn,
|
||||||
[Values(0u, 1u)] uint Q,
|
[Values(0u, 1u, 2u, 3u)] uint rm,
|
||||||
[ValueSource("_2S_F_")] ulong z,
|
[ValueSource(nameof(_1S_F_))] ulong s0,
|
||||||
[ValueSource("_2S_F_")] ulong a,
|
[ValueSource(nameof(_1S_F_))] ulong s1,
|
||||||
[ValueSource("_2S_F_")] ulong b )
|
[ValueSource(nameof(_1S_F_))] ulong s2,
|
||||||
|
[ValueSource(nameof(_1S_F_))] ulong s3)
|
||||||
{
|
{
|
||||||
uint opcode = 0xf2000c10;
|
opcode |= (((rd & 0x1) << 22) | (rd & 0x1e) << 11);
|
||||||
|
opcode |= (((rn & 0x1) << 7) | (rn & 0x1e) << 15);
|
||||||
V128 v0;
|
opcode |= (((rm & 0x1) << 5) | (rm & 0x1e) >> 1);
|
||||||
V128 v1;
|
|
||||||
V128 v2;
|
|
||||||
|
|
||||||
uint c = (uint) BitConverter.SingleToInt32Bits(z);
|
V128 v0 = MakeVectorE0E1E2E3((uint)s0, (uint)s1, (uint)s2, (uint)s3);
|
||||||
uint d = (uint) BitConverter.SingleToInt32Bits(a);
|
|
||||||
uint e = (uint) BitConverter.SingleToInt32Bits(b);
|
|
||||||
if (Q == 0)
|
|
||||||
{
|
|
||||||
opcode |= (((rm & 0x1) << 5) | (rm & 0x1e) >> 1);
|
|
||||||
opcode |= (((rd & 0x1) << 22) | (rd & 0x1e) << 11);
|
|
||||||
opcode |= (((rn & 0x1) << 7) | (rn & 0x1e) >> 15);
|
|
||||||
|
|
||||||
v0 = MakeVectorE0E1(c, c);
|
SingleOpcode(opcode, v0: v0);
|
||||||
v1 = MakeVectorE0E1(d, c);
|
|
||||||
v2 = MakeVectorE0E1(e, c);
|
|
||||||
}
|
|
||||||
else
|
|
||||||
{
|
|
||||||
rd = rn = rm = 0; // Needed, as these values cannot be odd values if Q == 1.
|
|
||||||
opcode |= (((rm & 0x10) << 1) | (rm & 0xf) << 0);
|
|
||||||
opcode |= (((rd & 0x10) << 18) | (rd & 0xf) << 12);
|
|
||||||
opcode |= (((rn & 0x10) << 3) | (rn & 0xf) << 16);
|
|
||||||
|
|
||||||
v0 = MakeVectorE0E1E2E3(c, c, d, e);
|
|
||||||
v1 = MakeVectorE0E1E2E3(d, c, e, c);
|
|
||||||
v2 = MakeVectorE0E1E2E3(e, c, d, c);
|
|
||||||
}
|
|
||||||
|
|
||||||
opcode |= ((Q & 1) << 6);
|
|
||||||
|
|
||||||
SingleOpcode(opcode, v0: v0, v1: v1, v2: v2);
|
|
||||||
CompareAgainstUnicorn();
|
|
||||||
}
|
|
||||||
|
|
||||||
[Test, Pairwise, Description("VFNMA.F<size> <Vd>, <Vn>, <Vm>")]
|
|
||||||
public void Vfnma([Values(0u, 1u)] uint rd,
|
|
||||||
[Values(0u, 1u)] uint rn,
|
|
||||||
[Values(0u, 1u)] uint rm,
|
|
||||||
[Values(2u, 3u)] uint size,
|
|
||||||
[ValueSource("_2S_F_")] ulong z,
|
|
||||||
[ValueSource("_2S_F_")] ulong a,
|
|
||||||
[ValueSource("_2S_F_")] ulong b)
|
|
||||||
{
|
|
||||||
uint opcode = 0xe900840;
|
|
||||||
|
|
||||||
if (size == 2)
|
|
||||||
{
|
|
||||||
opcode |= (((rm & 0x1) << 5) | (rm & 0x1e) >> 1);
|
|
||||||
opcode |= (((rd & 0x1) << 22) | (rd & 0x1e) << 11);
|
|
||||||
opcode |= (((rn & 0x1) << 7) | (rn & 0x1e) >> 15);
|
|
||||||
|
|
||||||
}
|
|
||||||
else
|
|
||||||
{
|
|
||||||
opcode |= (((rm & 0x10) << 1) | (rm & 0xf) << 0);
|
|
||||||
opcode |= (((rd & 0x10) << 18) | (rd & 0xf) << 12);
|
|
||||||
opcode |= (((rn & 0x10) << 3) | (rn & 0xf) << 16);
|
|
||||||
}
|
|
||||||
|
|
||||||
opcode |= ((size & 3) << 8);
|
|
||||||
|
|
||||||
V128 v0 = MakeVectorE0E1(z, z);
|
|
||||||
V128 v1 = MakeVectorE0E1(a, z);
|
|
||||||
V128 v2 = MakeVectorE0E1(b, z);
|
|
||||||
|
|
||||||
SingleOpcode(opcode, v0: v0, v1: v1, v2: v2);
|
|
||||||
CompareAgainstUnicorn();
|
CompareAgainstUnicorn();
|
||||||
}
|
}
|
||||||
|
|
||||||
[Test, Pairwise, Description("VFNMS.F<size> <Vd>, <Vn>, <Vm>")]
|
[Test, Pairwise] [Explicit] // Fused.
|
||||||
public void Vfnms([Values(0u, 1u)] uint rd,
|
public void Vfma_Vfms_Vfnma_Vfnms_S_F64([ValueSource(nameof(_Vfma_Vfms_Vfnma_Vfnms_S_F64_))] uint opcode,
|
||||||
[Values(0u, 1u)] uint rn,
|
[Values(0u, 1u)] uint rd,
|
||||||
[Values(0u, 1u)] uint rm,
|
[Values(0u, 1u)] uint rn,
|
||||||
[Values(2u, 3u)] uint size,
|
[Values(0u, 1u)] uint rm,
|
||||||
[ValueSource("_2S_F_")] ulong z,
|
[ValueSource(nameof(_1D_F_))] ulong d0,
|
||||||
[ValueSource("_2S_F_")] ulong a,
|
[ValueSource(nameof(_1D_F_))] ulong d1)
|
||||||
[ValueSource("_2S_F_")] ulong b)
|
|
||||||
{
|
{
|
||||||
uint opcode = 0xee900a00;
|
opcode |= (((rd & 0x10) << 18) | (rd & 0xf) << 12);
|
||||||
|
opcode |= (((rn & 0x10) << 3) | (rn & 0xf) << 16);
|
||||||
|
opcode |= (((rm & 0x10) << 1) | (rm & 0xf) << 0);
|
||||||
|
|
||||||
if (size == 2)
|
V128 v0 = MakeVectorE0E1(d0, d1);
|
||||||
|
|
||||||
|
SingleOpcode(opcode, v0: v0);
|
||||||
|
|
||||||
|
CompareAgainstUnicorn();
|
||||||
|
}
|
||||||
|
|
||||||
|
[Test, Pairwise] [Explicit] // Fused.
|
||||||
|
public void Vfma_Vfms_V_F32([ValueSource(nameof(_Vfma_Vfms_V_F32_))] uint opcode,
|
||||||
|
[Values(0u, 1u, 2u, 3u)] uint rd,
|
||||||
|
[Values(0u, 1u, 2u, 3u)] uint rn,
|
||||||
|
[Values(0u, 1u, 2u, 3u)] uint rm,
|
||||||
|
[ValueSource(nameof(_2S_F_))] ulong d0,
|
||||||
|
[ValueSource(nameof(_2S_F_))] ulong d1,
|
||||||
|
[ValueSource(nameof(_2S_F_))] ulong d2,
|
||||||
|
[ValueSource(nameof(_2S_F_))] ulong d3,
|
||||||
|
[Values] bool q)
|
||||||
|
{
|
||||||
|
if (q)
|
||||||
{
|
{
|
||||||
opcode |= (((rm & 0x1) << 5) | (rm & 0x1e) >> 1);
|
opcode |= 1 << 6;
|
||||||
opcode |= (((rd & 0x1) << 22) | (rd & 0x1e) << 11);
|
|
||||||
opcode |= (((rn & 0x1) << 7) | (rn & 0x1e) >> 15);
|
rd >>= 1; rd <<= 1;
|
||||||
|
rn >>= 1; rn <<= 1;
|
||||||
}
|
rm >>= 1; rm <<= 1;
|
||||||
else
|
|
||||||
{
|
|
||||||
opcode |= (((rm & 0x10) << 1) | (rm & 0xf) << 0);
|
|
||||||
opcode |= (((rd & 0x10) << 18) | (rd & 0xf) << 12);
|
|
||||||
opcode |= (((rn & 0x10) << 3) | (rn & 0xf) << 16);
|
|
||||||
}
|
}
|
||||||
|
|
||||||
opcode |= ((size & 3) << 8);
|
opcode |= ((rd & 0xf) << 12) | ((rd & 0x10) << 18);
|
||||||
|
opcode |= ((rn & 0xf) << 16) | ((rn & 0x10) << 3);
|
||||||
|
opcode |= ((rm & 0xf) << 0) | ((rm & 0x10) << 1);
|
||||||
|
|
||||||
V128 v0 = MakeVectorE0E1(z, z);
|
V128 v0 = MakeVectorE0E1(d0, d1);
|
||||||
V128 v1 = MakeVectorE0E1(a, z);
|
V128 v1 = MakeVectorE0E1(d2, d3);
|
||||||
V128 v2 = MakeVectorE0E1(b, z);
|
|
||||||
|
SingleOpcode(opcode, v0: v0, v1: v1);
|
||||||
|
|
||||||
|
CompareAgainstUnicorn();
|
||||||
|
}
|
||||||
|
|
||||||
|
[Test, Pairwise] [Explicit]
|
||||||
|
public void Vmla_Vmls_Vnmla_Vnmls_S_F32([ValueSource(nameof(_Vmla_Vmls_Vnmla_Vnmls_S_F32_))] uint opcode,
|
||||||
|
[Values(0u, 1u, 2u, 3u)] uint rd,
|
||||||
|
[Values(0u, 1u, 2u, 3u)] uint rn,
|
||||||
|
[Values(0u, 1u, 2u, 3u)] uint rm,
|
||||||
|
[ValueSource(nameof(_1S_F_))] ulong s0,
|
||||||
|
[ValueSource(nameof(_1S_F_))] ulong s1,
|
||||||
|
[ValueSource(nameof(_1S_F_))] ulong s2,
|
||||||
|
[ValueSource(nameof(_1S_F_))] ulong s3)
|
||||||
|
{
|
||||||
|
opcode |= (((rd & 0x1) << 22) | (rd & 0x1e) << 11);
|
||||||
|
opcode |= (((rn & 0x1) << 7) | (rn & 0x1e) << 15);
|
||||||
|
opcode |= (((rm & 0x1) << 5) | (rm & 0x1e) >> 1);
|
||||||
|
|
||||||
|
V128 v0 = MakeVectorE0E1E2E3((uint)s0, (uint)s1, (uint)s2, (uint)s3);
|
||||||
|
|
||||||
|
SingleOpcode(opcode, v0: v0);
|
||||||
|
|
||||||
|
CompareAgainstUnicorn();
|
||||||
|
}
|
||||||
|
|
||||||
|
[Test, Pairwise] [Explicit]
|
||||||
|
public void Vmla_Vmls_Vnmla_Vnmls_S_F64([ValueSource(nameof(_Vmla_Vmls_Vnmla_Vnmls_S_F64_))] uint opcode,
|
||||||
|
[Values(0u, 1u)] uint rd,
|
||||||
|
[Values(0u, 1u)] uint rn,
|
||||||
|
[Values(0u, 1u)] uint rm,
|
||||||
|
[ValueSource(nameof(_1D_F_))] ulong d0,
|
||||||
|
[ValueSource(nameof(_1D_F_))] ulong d1)
|
||||||
|
{
|
||||||
|
opcode |= (((rd & 0x10) << 18) | (rd & 0xf) << 12);
|
||||||
|
opcode |= (((rn & 0x10) << 3) | (rn & 0xf) << 16);
|
||||||
|
opcode |= (((rm & 0x10) << 1) | (rm & 0xf) << 0);
|
||||||
|
|
||||||
|
V128 v0 = MakeVectorE0E1(d0, d1);
|
||||||
|
|
||||||
|
SingleOpcode(opcode, v0: v0);
|
||||||
|
|
||||||
SingleOpcode(opcode, v0: v0, v1: v1, v2: v2);
|
|
||||||
CompareAgainstUnicorn();
|
CompareAgainstUnicorn();
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
Loading…
Reference in a new issue