mirror of
https://github.com/Ryujinx/Ryujinx.git
synced 2024-10-01 12:30:00 +02:00
Lower precision of estimate instruction results to match Arm behavior (#1943)
* Lower precision of estimate instruction results to match Arm behavior * PTC version update * Nits
This commit is contained in:
parent
98d0240ce6
commit
dcce407071
2 changed files with 66 additions and 19 deletions
|
@ -1475,9 +1475,11 @@ namespace ARMeilleure.Instructions
|
||||||
|
|
||||||
int sizeF = op.Size & 1;
|
int sizeF = op.Size & 1;
|
||||||
|
|
||||||
if (Optimizations.FastFP && Optimizations.UseSse && sizeF == 0)
|
if (Optimizations.FastFP && Optimizations.UseSse41 && sizeF == 0)
|
||||||
{
|
{
|
||||||
EmitScalarUnaryOpF(context, Intrinsic.X86Rcpss, 0);
|
Operand res = EmitSse41FP32RoundExp8(context, context.AddIntrinsic(Intrinsic.X86Rcpss, GetVec(op.Rn)), scalar: true);
|
||||||
|
|
||||||
|
context.Copy(GetVec(op.Rd), context.VectorZeroUpper96(res));
|
||||||
}
|
}
|
||||||
else
|
else
|
||||||
{
|
{
|
||||||
|
@ -1494,9 +1496,16 @@ namespace ARMeilleure.Instructions
|
||||||
|
|
||||||
int sizeF = op.Size & 1;
|
int sizeF = op.Size & 1;
|
||||||
|
|
||||||
if (Optimizations.FastFP && Optimizations.UseSse && sizeF == 0)
|
if (Optimizations.FastFP && Optimizations.UseSse41 && sizeF == 0)
|
||||||
{
|
{
|
||||||
EmitVectorUnaryOpF(context, Intrinsic.X86Rcpps, 0);
|
Operand res = EmitSse41FP32RoundExp8(context, context.AddIntrinsic(Intrinsic.X86Rcpps, GetVec(op.Rn)), scalar: false);
|
||||||
|
|
||||||
|
if (op.RegisterSize == RegisterSize.Simd64)
|
||||||
|
{
|
||||||
|
res = context.VectorZeroUpper64(res);
|
||||||
|
}
|
||||||
|
|
||||||
|
context.Copy(GetVec(op.Rd), res);
|
||||||
}
|
}
|
||||||
else
|
else
|
||||||
{
|
{
|
||||||
|
@ -1652,7 +1661,7 @@ namespace ARMeilleure.Instructions
|
||||||
{
|
{
|
||||||
if (Optimizations.UseSse41)
|
if (Optimizations.UseSse41)
|
||||||
{
|
{
|
||||||
EmitScalarRoundOpF(context, FPRoundingMode.TowardsMinusInfinity);
|
EmitSse41ScalarRoundOpF(context, FPRoundingMode.TowardsMinusInfinity);
|
||||||
}
|
}
|
||||||
else
|
else
|
||||||
{
|
{
|
||||||
|
@ -1667,7 +1676,7 @@ namespace ARMeilleure.Instructions
|
||||||
{
|
{
|
||||||
if (Optimizations.UseSse41)
|
if (Optimizations.UseSse41)
|
||||||
{
|
{
|
||||||
EmitVectorRoundOpF(context, FPRoundingMode.TowardsMinusInfinity);
|
EmitSse41VectorRoundOpF(context, FPRoundingMode.TowardsMinusInfinity);
|
||||||
}
|
}
|
||||||
else
|
else
|
||||||
{
|
{
|
||||||
|
@ -1682,7 +1691,7 @@ namespace ARMeilleure.Instructions
|
||||||
{
|
{
|
||||||
if (Optimizations.UseSse41)
|
if (Optimizations.UseSse41)
|
||||||
{
|
{
|
||||||
EmitScalarRoundOpF(context, FPRoundingMode.ToNearest);
|
EmitSse41ScalarRoundOpF(context, FPRoundingMode.ToNearest);
|
||||||
}
|
}
|
||||||
else
|
else
|
||||||
{
|
{
|
||||||
|
@ -1697,7 +1706,7 @@ namespace ARMeilleure.Instructions
|
||||||
{
|
{
|
||||||
if (Optimizations.UseSse41)
|
if (Optimizations.UseSse41)
|
||||||
{
|
{
|
||||||
EmitVectorRoundOpF(context, FPRoundingMode.ToNearest);
|
EmitSse41VectorRoundOpF(context, FPRoundingMode.ToNearest);
|
||||||
}
|
}
|
||||||
else
|
else
|
||||||
{
|
{
|
||||||
|
@ -1712,7 +1721,7 @@ namespace ARMeilleure.Instructions
|
||||||
{
|
{
|
||||||
if (Optimizations.UseSse41)
|
if (Optimizations.UseSse41)
|
||||||
{
|
{
|
||||||
EmitScalarRoundOpF(context, FPRoundingMode.TowardsPlusInfinity);
|
EmitSse41ScalarRoundOpF(context, FPRoundingMode.TowardsPlusInfinity);
|
||||||
}
|
}
|
||||||
else
|
else
|
||||||
{
|
{
|
||||||
|
@ -1727,7 +1736,7 @@ namespace ARMeilleure.Instructions
|
||||||
{
|
{
|
||||||
if (Optimizations.UseSse41)
|
if (Optimizations.UseSse41)
|
||||||
{
|
{
|
||||||
EmitVectorRoundOpF(context, FPRoundingMode.TowardsPlusInfinity);
|
EmitSse41VectorRoundOpF(context, FPRoundingMode.TowardsPlusInfinity);
|
||||||
}
|
}
|
||||||
else
|
else
|
||||||
{
|
{
|
||||||
|
@ -1778,7 +1787,7 @@ namespace ARMeilleure.Instructions
|
||||||
{
|
{
|
||||||
if (Optimizations.UseSse41)
|
if (Optimizations.UseSse41)
|
||||||
{
|
{
|
||||||
EmitScalarRoundOpF(context, FPRoundingMode.TowardsZero);
|
EmitSse41ScalarRoundOpF(context, FPRoundingMode.TowardsZero);
|
||||||
}
|
}
|
||||||
else
|
else
|
||||||
{
|
{
|
||||||
|
@ -1793,7 +1802,7 @@ namespace ARMeilleure.Instructions
|
||||||
{
|
{
|
||||||
if (Optimizations.UseSse41)
|
if (Optimizations.UseSse41)
|
||||||
{
|
{
|
||||||
EmitVectorRoundOpF(context, FPRoundingMode.TowardsZero);
|
EmitSse41VectorRoundOpF(context, FPRoundingMode.TowardsZero);
|
||||||
}
|
}
|
||||||
else
|
else
|
||||||
{
|
{
|
||||||
|
@ -1810,9 +1819,11 @@ namespace ARMeilleure.Instructions
|
||||||
|
|
||||||
int sizeF = op.Size & 1;
|
int sizeF = op.Size & 1;
|
||||||
|
|
||||||
if (Optimizations.FastFP && Optimizations.UseSse && sizeF == 0)
|
if (Optimizations.FastFP && Optimizations.UseSse41 && sizeF == 0)
|
||||||
{
|
{
|
||||||
EmitScalarUnaryOpF(context, Intrinsic.X86Rsqrtss, 0);
|
Operand res = EmitSse41FP32RoundExp8(context, context.AddIntrinsic(Intrinsic.X86Rsqrtss, GetVec(op.Rn)), scalar: true);
|
||||||
|
|
||||||
|
context.Copy(GetVec(op.Rd), context.VectorZeroUpper96(res));
|
||||||
}
|
}
|
||||||
else
|
else
|
||||||
{
|
{
|
||||||
|
@ -1829,9 +1840,16 @@ namespace ARMeilleure.Instructions
|
||||||
|
|
||||||
int sizeF = op.Size & 1;
|
int sizeF = op.Size & 1;
|
||||||
|
|
||||||
if (Optimizations.FastFP && Optimizations.UseSse && sizeF == 0)
|
if (Optimizations.FastFP && Optimizations.UseSse41 && sizeF == 0)
|
||||||
{
|
{
|
||||||
EmitVectorUnaryOpF(context, Intrinsic.X86Rsqrtps, 0);
|
Operand res = EmitSse41FP32RoundExp8(context, context.AddIntrinsic(Intrinsic.X86Rsqrtps, GetVec(op.Rn)), scalar: false);
|
||||||
|
|
||||||
|
if (op.RegisterSize == RegisterSize.Simd64)
|
||||||
|
{
|
||||||
|
res = context.VectorZeroUpper64(res);
|
||||||
|
}
|
||||||
|
|
||||||
|
context.Copy(GetVec(op.Rd), res);
|
||||||
}
|
}
|
||||||
else
|
else
|
||||||
{
|
{
|
||||||
|
@ -3498,7 +3516,7 @@ namespace ARMeilleure.Instructions
|
||||||
return context.ConditionalSelect(cmp, op1, op2);
|
return context.ConditionalSelect(cmp, op1, op2);
|
||||||
}
|
}
|
||||||
|
|
||||||
private static void EmitScalarRoundOpF(ArmEmitterContext context, FPRoundingMode roundMode)
|
private static void EmitSse41ScalarRoundOpF(ArmEmitterContext context, FPRoundingMode roundMode)
|
||||||
{
|
{
|
||||||
OpCodeSimd op = (OpCodeSimd)context.CurrOp;
|
OpCodeSimd op = (OpCodeSimd)context.CurrOp;
|
||||||
|
|
||||||
|
@ -3520,7 +3538,7 @@ namespace ARMeilleure.Instructions
|
||||||
context.Copy(GetVec(op.Rd), res);
|
context.Copy(GetVec(op.Rd), res);
|
||||||
}
|
}
|
||||||
|
|
||||||
private static void EmitVectorRoundOpF(ArmEmitterContext context, FPRoundingMode roundMode)
|
private static void EmitSse41VectorRoundOpF(ArmEmitterContext context, FPRoundingMode roundMode)
|
||||||
{
|
{
|
||||||
OpCodeSimd op = (OpCodeSimd)context.CurrOp;
|
OpCodeSimd op = (OpCodeSimd)context.CurrOp;
|
||||||
|
|
||||||
|
@ -3538,6 +3556,35 @@ namespace ARMeilleure.Instructions
|
||||||
context.Copy(GetVec(op.Rd), res);
|
context.Copy(GetVec(op.Rd), res);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
private static Operand EmitSse41FP32RoundExp8(ArmEmitterContext context, Operand value, bool scalar)
|
||||||
|
{
|
||||||
|
Operand roundMask;
|
||||||
|
Operand truncMask;
|
||||||
|
Operand expMask;
|
||||||
|
|
||||||
|
if (scalar)
|
||||||
|
{
|
||||||
|
roundMask = X86GetScalar(context, 0x4000);
|
||||||
|
truncMask = X86GetScalar(context, unchecked((int)0xFFFF8000));
|
||||||
|
expMask = X86GetScalar(context, 0x7F800000);
|
||||||
|
}
|
||||||
|
else
|
||||||
|
{
|
||||||
|
roundMask = X86GetAllElements(context, 0x4000);
|
||||||
|
truncMask = X86GetAllElements(context, unchecked((int)0xFFFF8000));
|
||||||
|
expMask = X86GetAllElements(context, 0x7F800000);
|
||||||
|
}
|
||||||
|
|
||||||
|
Operand oValue = value;
|
||||||
|
Operand masked = context.AddIntrinsic(Intrinsic.X86Pand, value, expMask);
|
||||||
|
Operand isNaNInf = context.AddIntrinsic(Intrinsic.X86Pcmpeqw, masked, expMask);
|
||||||
|
|
||||||
|
value = context.AddIntrinsic(Intrinsic.X86Paddw, value, roundMask);
|
||||||
|
value = context.AddIntrinsic(Intrinsic.X86Pand, value, truncMask);
|
||||||
|
|
||||||
|
return context.AddIntrinsic(Intrinsic.X86Blendvps, value, oValue, isNaNInf);
|
||||||
|
}
|
||||||
|
|
||||||
public static void EmitSse2VectorIsNaNOpF(
|
public static void EmitSse2VectorIsNaNOpF(
|
||||||
ArmEmitterContext context,
|
ArmEmitterContext context,
|
||||||
Operand opF,
|
Operand opF,
|
||||||
|
|
|
@ -22,7 +22,7 @@ namespace ARMeilleure.Translation.PTC
|
||||||
{
|
{
|
||||||
private const string HeaderMagic = "PTChd";
|
private const string HeaderMagic = "PTChd";
|
||||||
|
|
||||||
private const int InternalVersion = 1956; //! To be incremented manually for each change to the ARMeilleure project.
|
private const int InternalVersion = 1943; //! To be incremented manually for each change to the ARMeilleure project.
|
||||||
|
|
||||||
private const string ActualDir = "0";
|
private const string ActualDir = "0";
|
||||||
private const string BackupDir = "1";
|
private const string BackupDir = "1";
|
||||||
|
|
Loading…
Reference in a new issue