From 55374ebba0ed49bc4624e47cc971b1e63f644583 Mon Sep 17 00:00:00 2001 From: gdkchan Date: Tue, 14 Aug 2018 23:54:12 -0300 Subject: [PATCH] Zero out bits 63:32 of scalar float operations with SSE intrinsics (#273) --- .../Instruction/AInstEmitSimdArithmetic.cs | 16 ++++----- ChocolArm64/Instruction/AInstEmitSimdCmp.cs | 12 +++---- .../Instruction/AInstEmitSimdHelper.cs | 34 +++++++++++++++++-- ChocolArm64/Instruction/AVectorHelper.cs | 23 +++++++++++++ 4 files changed, 69 insertions(+), 16 deletions(-) diff --git a/ChocolArm64/Instruction/AInstEmitSimdArithmetic.cs b/ChocolArm64/Instruction/AInstEmitSimdArithmetic.cs index 1d7b16dd..92da9ff9 100644 --- a/ChocolArm64/Instruction/AInstEmitSimdArithmetic.cs +++ b/ChocolArm64/Instruction/AInstEmitSimdArithmetic.cs @@ -305,7 +305,7 @@ namespace ChocolArm64.Instruction { if (AOptimizations.UseSse && AOptimizations.UseSse2) { - EmitSseOrSse2CallF(Context, nameof(Sse.AddScalar)); + EmitScalarSseOrSse2CallF(Context, nameof(Sse.AddScalar)); } else { @@ -317,7 +317,7 @@ namespace ChocolArm64.Instruction { if (AOptimizations.UseSse && AOptimizations.UseSse2) { - EmitSseOrSse2CallF(Context, nameof(Sse.Add)); + EmitVectorSseOrSse2CallF(Context, nameof(Sse.Add)); } else { @@ -375,7 +375,7 @@ namespace ChocolArm64.Instruction { if (AOptimizations.UseSse && AOptimizations.UseSse2) { - EmitSseOrSse2CallF(Context, nameof(Sse.DivideScalar)); + EmitScalarSseOrSse2CallF(Context, nameof(Sse.DivideScalar)); } else { @@ -387,7 +387,7 @@ namespace ChocolArm64.Instruction { if (AOptimizations.UseSse && AOptimizations.UseSse2) { - EmitSseOrSse2CallF(Context, nameof(Sse.Divide)); + EmitVectorSseOrSse2CallF(Context, nameof(Sse.Divide)); } else { @@ -526,7 +526,7 @@ namespace ChocolArm64.Instruction { if (AOptimizations.UseSse && AOptimizations.UseSse2) { - EmitSseOrSse2CallF(Context, nameof(Sse.MultiplyScalar)); + EmitScalarSseOrSse2CallF(Context, nameof(Sse.MultiplyScalar)); } else { @@ -543,7 +543,7 @@ namespace ChocolArm64.Instruction { if (AOptimizations.UseSse && AOptimizations.UseSse2) { - EmitSseOrSse2CallF(Context, nameof(Sse.Multiply)); + EmitVectorSseOrSse2CallF(Context, nameof(Sse.Multiply)); } else { @@ -910,7 +910,7 @@ namespace ChocolArm64.Instruction { if (AOptimizations.UseSse && AOptimizations.UseSse2) { - EmitSseOrSse2CallF(Context, nameof(Sse.SubtractScalar)); + EmitScalarSseOrSse2CallF(Context, nameof(Sse.SubtractScalar)); } else { @@ -922,7 +922,7 @@ namespace ChocolArm64.Instruction { if (AOptimizations.UseSse && AOptimizations.UseSse2) { - EmitSseOrSse2CallF(Context, nameof(Sse.Subtract)); + EmitVectorSseOrSse2CallF(Context, nameof(Sse.Subtract)); } else { diff --git a/ChocolArm64/Instruction/AInstEmitSimdCmp.cs b/ChocolArm64/Instruction/AInstEmitSimdCmp.cs index c2d47747..6357396d 100644 --- a/ChocolArm64/Instruction/AInstEmitSimdCmp.cs +++ b/ChocolArm64/Instruction/AInstEmitSimdCmp.cs @@ -158,7 +158,7 @@ namespace ChocolArm64.Instruction if (Context.CurrOp is AOpCodeSimdReg && AOptimizations.UseSse && AOptimizations.UseSse2) { - EmitSseOrSse2CallF(Context, nameof(Sse.CompareEqualScalar)); + EmitScalarSseOrSse2CallF(Context, nameof(Sse.CompareEqualScalar)); } else { @@ -171,7 +171,7 @@ namespace ChocolArm64.Instruction if (Context.CurrOp is AOpCodeSimdReg && AOptimizations.UseSse && AOptimizations.UseSse2) { - EmitSseOrSse2CallF(Context, nameof(Sse.CompareEqual)); + EmitVectorSseOrSse2CallF(Context, nameof(Sse.CompareEqual)); } else { @@ -184,7 +184,7 @@ namespace ChocolArm64.Instruction if (Context.CurrOp is AOpCodeSimdReg && AOptimizations.UseSse && AOptimizations.UseSse2) { - EmitSseOrSse2CallF(Context, nameof(Sse.CompareGreaterThanOrEqualScalar)); + EmitScalarSseOrSse2CallF(Context, nameof(Sse.CompareGreaterThanOrEqualScalar)); } else { @@ -197,7 +197,7 @@ namespace ChocolArm64.Instruction if (Context.CurrOp is AOpCodeSimdReg && AOptimizations.UseSse && AOptimizations.UseSse2) { - EmitSseOrSse2CallF(Context, nameof(Sse.CompareGreaterThanOrEqual)); + EmitVectorSseOrSse2CallF(Context, nameof(Sse.CompareGreaterThanOrEqual)); } else { @@ -210,7 +210,7 @@ namespace ChocolArm64.Instruction if (Context.CurrOp is AOpCodeSimdReg && AOptimizations.UseSse && AOptimizations.UseSse2) { - EmitSseOrSse2CallF(Context, nameof(Sse.CompareGreaterThanScalar)); + EmitScalarSseOrSse2CallF(Context, nameof(Sse.CompareGreaterThanScalar)); } else { @@ -223,7 +223,7 @@ namespace ChocolArm64.Instruction if (Context.CurrOp is AOpCodeSimdReg && AOptimizations.UseSse && AOptimizations.UseSse2) { - EmitSseOrSse2CallF(Context, nameof(Sse.CompareGreaterThan)); + EmitVectorSseOrSse2CallF(Context, nameof(Sse.CompareGreaterThan)); } else { diff --git a/ChocolArm64/Instruction/AInstEmitSimdHelper.cs b/ChocolArm64/Instruction/AInstEmitSimdHelper.cs index a9af3902..4ecfdae3 100644 --- a/ChocolArm64/Instruction/AInstEmitSimdHelper.cs +++ b/ChocolArm64/Instruction/AInstEmitSimdHelper.cs @@ -110,7 +110,17 @@ namespace ChocolArm64.Instruction } } - public static void EmitSseOrSse2CallF(AILEmitterCtx Context, string Name) + public static void EmitScalarSseOrSse2CallF(AILEmitterCtx Context, string Name) + { + EmitSseOrSse2CallF(Context, Name, true); + } + + public static void EmitVectorSseOrSse2CallF(AILEmitterCtx Context, string Name) + { + EmitSseOrSse2CallF(Context, Name, false); + } + + public static void EmitSseOrSse2CallF(AILEmitterCtx Context, string Name, bool Scalar) { AOpCodeSimd Op = (AOpCodeSimd)Context.CurrOp; @@ -160,7 +170,18 @@ namespace ChocolArm64.Instruction Context.EmitStvec(Op.Rd); - if (Op.RegisterSize == ARegisterSize.SIMD64) + if (Scalar) + { + if (SizeF == 0) + { + EmitVectorZero32_128(Context, Op.Rd); + } + else /* if (SizeF == 1) */ + { + EmitVectorZeroUpper(Context, Op.Rd); + } + } + else if (Op.RegisterSize == ARegisterSize.SIMD64) { EmitVectorZeroUpper(Context, Op.Rd); } @@ -1238,6 +1259,15 @@ namespace ChocolArm64.Instruction EmitVectorInsert(Context, Rd, 1, 3, 0); } + public static void EmitVectorZero32_128(AILEmitterCtx Context, int Reg) + { + Context.EmitLdvec(Reg); + + AVectorHelper.EmitCall(Context, nameof(AVectorHelper.VectorZero32_128)); + + Context.EmitStvec(Reg); + } + public static void EmitVectorInsert(AILEmitterCtx Context, int Reg, int Index, int Size) { ThrowIfInvalid(Index, Size); diff --git a/ChocolArm64/Instruction/AVectorHelper.cs b/ChocolArm64/Instruction/AVectorHelper.cs index b2d53740..3e4452ab 100644 --- a/ChocolArm64/Instruction/AVectorHelper.cs +++ b/ChocolArm64/Instruction/AVectorHelper.cs @@ -9,6 +9,18 @@ namespace ChocolArm64.Instruction { static class AVectorHelper { + private static readonly Vector128 Zero32_128Mask; + + static AVectorHelper() + { + if (!Sse2.IsSupported) + { + throw new PlatformNotSupportedException(); + } + + Zero32_128Mask = Sse.StaticCast(Sse2.SetVector128(0, 0, 0, 0xffffffff)); + } + public static void EmitCall(AILEmitterCtx Context, string Name64, string Name128) { bool IsSimd64 = Context.CurrOp.RegisterSize == ARegisterSize.SIMD64; @@ -448,6 +460,17 @@ namespace ChocolArm64.Instruction throw new PlatformNotSupportedException(); } + [MethodImpl(MethodImplOptions.AggressiveInlining)] + public static Vector128 VectorZero32_128(Vector128 Vector) + { + if (Sse.IsSupported) + { + return Sse.And(Vector, Zero32_128Mask); + } + + throw new PlatformNotSupportedException(); + } + [MethodImpl(MethodImplOptions.AggressiveInlining)] public static Vector128 VectorSingleToSByte(Vector128 Vector) {