using System;
using System.Buffers.Binary;
using System.Diagnostics;
using System.Numerics;
using System.Runtime.CompilerServices;
using System.Runtime.Intrinsics;
using System.Runtime.Intrinsics.X86;

namespace Ryujinx.Common
{
    public static class XXHash128
    {
        private const int StripeLen = 64;
        private const int AccNb = StripeLen / sizeof(ulong);
        private const int SecretConsumeRate = 8;
        private const int SecretLastAccStart = 7;
        private const int SecretMergeAccsStart = 11;
        private const int SecretSizeMin = 136;
        private const int MidSizeStartOffset = 3;
        private const int MidSizeLastOffset = 17;

        private const uint Prime32_1 = 0x9E3779B1U;
        private const uint Prime32_2 = 0x85EBCA77U;
        private const uint Prime32_3 = 0xC2B2AE3DU;
        private const uint Prime32_4 = 0x27D4EB2FU;
        private const uint Prime32_5 = 0x165667B1U;

        private const ulong Prime64_1 = 0x9E3779B185EBCA87UL;
        private const ulong Prime64_2 = 0xC2B2AE3D27D4EB4FUL;
        private const ulong Prime64_3 = 0x165667B19E3779F9UL;
        private const ulong Prime64_4 = 0x85EBCA77C2B2AE63UL;
        private const ulong Prime64_5 = 0x27D4EB2F165667C5UL;

        private static readonly ulong[] Xxh3InitAcc = new ulong[]
        {
            Prime32_3,
            Prime64_1,
            Prime64_2,
            Prime64_3,
            Prime64_4,
            Prime32_2,
            Prime64_5,
            Prime32_1
        };

        private static readonly byte[] Xxh3KSecret = new byte[]
        {
            0xb8, 0xfe, 0x6c, 0x39, 0x23, 0xa4, 0x4b, 0xbe, 0x7c, 0x01, 0x81, 0x2c, 0xf7, 0x21, 0xad, 0x1c,
            0xde, 0xd4, 0x6d, 0xe9, 0x83, 0x90, 0x97, 0xdb, 0x72, 0x40, 0xa4, 0xa4, 0xb7, 0xb3, 0x67, 0x1f,
            0xcb, 0x79, 0xe6, 0x4e, 0xcc, 0xc0, 0xe5, 0x78, 0x82, 0x5a, 0xd0, 0x7d, 0xcc, 0xff, 0x72, 0x21,
            0xb8, 0x08, 0x46, 0x74, 0xf7, 0x43, 0x24, 0x8e, 0xe0, 0x35, 0x90, 0xe6, 0x81, 0x3a, 0x26, 0x4c,
            0x3c, 0x28, 0x52, 0xbb, 0x91, 0xc3, 0x00, 0xcb, 0x88, 0xd0, 0x65, 0x8b, 0x1b, 0x53, 0x2e, 0xa3,
            0x71, 0x64, 0x48, 0x97, 0xa2, 0x0d, 0xf9, 0x4e, 0x38, 0x19, 0xef, 0x46, 0xa9, 0xde, 0xac, 0xd8,
            0xa8, 0xfa, 0x76, 0x3f, 0xe3, 0x9c, 0x34, 0x3f, 0xf9, 0xdc, 0xbb, 0xc7, 0xc7, 0x0b, 0x4f, 0x1d,
            0x8a, 0x51, 0xe0, 0x4b, 0xcd, 0xb4, 0x59, 0x31, 0xc8, 0x9f, 0x7e, 0xc9, 0xd9, 0x78, 0x73, 0x64,
            0xea, 0xc5, 0xac, 0x83, 0x34, 0xd3, 0xeb, 0xc3, 0xc5, 0x81, 0xa0, 0xff, 0xfa, 0x13, 0x63, 0xeb,
            0x17, 0x0d, 0xdd, 0x51, 0xb7, 0xf0, 0xda, 0x49, 0xd3, 0x16, 0x55, 0x26, 0x29, 0xd4, 0x68, 0x9e,
            0x2b, 0x16, 0xbe, 0x58, 0x7d, 0x47, 0xa1, 0xfc, 0x8f, 0xf8, 0xb8, 0xd1, 0x7a, 0xd0, 0x31, 0xce,
            0x45, 0xcb, 0x3a, 0x8f, 0x95, 0x16, 0x04, 0x28, 0xaf, 0xd7, 0xfb, 0xca, 0xbb, 0x4b, 0x40, 0x7e
        };

        [MethodImpl(MethodImplOptions.AggressiveInlining)]
        private static ulong Mult32To64(ulong x, ulong y)
        {
            return (ulong)(uint)x * (ulong)(uint)y;
        }

        [MethodImpl(MethodImplOptions.AggressiveInlining)]
        private static Hash128 Mult64To128(ulong lhs, ulong rhs)
        {
            ulong high = Math.BigMul(lhs, rhs, out ulong low);
            return new Hash128
            {
                Low = low,
                High = high
            };
        }

        [MethodImpl(MethodImplOptions.AggressiveInlining)]
        private static ulong Mul128Fold64(ulong lhs, ulong rhs)
        {
            Hash128 product = Mult64To128(lhs, rhs);
            return product.Low ^ product.High;
        }

        [MethodImpl(MethodImplOptions.AggressiveInlining)]
        private static ulong XorShift64(ulong v64, int shift)
        {
            Debug.Assert(0 <= shift && shift < 64);
            return v64 ^ (v64 >> shift);
        }

        [MethodImpl(MethodImplOptions.AggressiveInlining)]
        private static ulong Xxh3Avalanche(ulong h64)
        {
            h64 = XorShift64(h64, 37);
            h64 *= 0x165667919E3779F9UL;
            h64 = XorShift64(h64, 32);
            return h64;
        }

        [MethodImpl(MethodImplOptions.AggressiveInlining)]
        private static ulong Xxh64Avalanche(ulong h64)
        {
            h64 ^= h64 >> 33;
            h64 *= Prime64_2;
            h64 ^= h64 >> 29;
            h64 *= Prime64_3;
            h64 ^= h64 >> 32;
            return h64;
        }

        [MethodImpl(MethodImplOptions.AggressiveInlining)]
        private unsafe static void Xxh3Accumulate512(Span<ulong> acc, ReadOnlySpan<byte> input, ReadOnlySpan<byte> secret)
        {
            if (Avx2.IsSupported)
            {
                fixed (ulong* pAcc = acc)
                {
                    fixed (byte* pInput = input, pSecret = secret)
                    {
                        Vector256<ulong>* xAcc = (Vector256<ulong>*)pAcc;
                        Vector256<byte>* xInput = (Vector256<byte>*)pInput;
                        Vector256<byte>* xSecret = (Vector256<byte>*)pSecret;

                        for (ulong i = 0; i < StripeLen / 32; i++)
                        {
                            Vector256<byte> dataVec = xInput[i];
                            Vector256<byte> keyVec = xSecret[i];
                            Vector256<byte> dataKey = Avx2.Xor(dataVec, keyVec);
                            Vector256<uint> dataKeyLo = Avx2.Shuffle(dataKey.AsUInt32(), 0b00110001);
                            Vector256<ulong> product = Avx2.Multiply(dataKey.AsUInt32(), dataKeyLo);
                            Vector256<uint> dataSwap = Avx2.Shuffle(dataVec.AsUInt32(), 0b01001110);
                            Vector256<ulong> sum = Avx2.Add(xAcc[i], dataSwap.AsUInt64());
                            xAcc[i] = Avx2.Add(product, sum);
                        }
                    }
                }
            }
            else if (Sse2.IsSupported)
            {
                fixed (ulong* pAcc = acc)
                {
                    fixed (byte* pInput = input, pSecret = secret)
                    {
                        Vector128<ulong>* xAcc = (Vector128<ulong>*)pAcc;
                        Vector128<byte>* xInput = (Vector128<byte>*)pInput;
                        Vector128<byte>* xSecret = (Vector128<byte>*)pSecret;

                        for (ulong i = 0; i < StripeLen / 16; i++)
                        {
                            Vector128<byte> dataVec = xInput[i];
                            Vector128<byte> keyVec = xSecret[i];
                            Vector128<byte> dataKey = Sse2.Xor(dataVec, keyVec);
                            Vector128<uint> dataKeyLo = Sse2.Shuffle(dataKey.AsUInt32(), 0b00110001);
                            Vector128<ulong> product = Sse2.Multiply(dataKey.AsUInt32(), dataKeyLo);
                            Vector128<uint> dataSwap = Sse2.Shuffle(dataVec.AsUInt32(), 0b01001110);
                            Vector128<ulong> sum = Sse2.Add(xAcc[i], dataSwap.AsUInt64());
                            xAcc[i] = Sse2.Add(product, sum);
                        }
                    }
                }
            }
            else
            {
                for (int i = 0; i < AccNb; i++)
                {
                    ulong dataVal = BinaryPrimitives.ReadUInt64LittleEndian(input.Slice(i * sizeof(ulong)));
                    ulong dataKey = dataVal ^ BinaryPrimitives.ReadUInt64LittleEndian(secret.Slice(i * sizeof(ulong)));
                    acc[i ^ 1] += dataVal;
                    acc[i] += Mult32To64((uint)dataKey, dataKey >> 32);
                }
            }
        }

        [MethodImpl(MethodImplOptions.AggressiveInlining)]
        private unsafe static void Xxh3ScrambleAcc(Span<ulong> acc, ReadOnlySpan<byte> secret)
        {
            if (Avx2.IsSupported)
            {
                fixed (ulong* pAcc = acc)
                {
                    fixed (byte* pSecret = secret)
                    {
                        Vector256<uint> prime32 = Vector256.Create(Prime32_1);
                        Vector256<ulong>* xAcc = (Vector256<ulong>*)pAcc;
                        Vector256<byte>* xSecret = (Vector256<byte>*)pSecret;

                        for (ulong i = 0; i < StripeLen / 32; i++)
                        {
                            Vector256<ulong> accVec = xAcc[i];
                            Vector256<ulong> shifted = Avx2.ShiftRightLogical(accVec, 47);
                            Vector256<ulong> dataVec = Avx2.Xor(accVec, shifted);

                            Vector256<byte> keyVec = xSecret[i];
                            Vector256<uint> dataKey = Avx2.Xor(dataVec.AsUInt32(), keyVec.AsUInt32());

                            Vector256<uint> dataKeyHi = Avx2.Shuffle(dataKey.AsUInt32(), 0b00110001);
                            Vector256<ulong> prodLo = Avx2.Multiply(dataKey, prime32);
                            Vector256<ulong> prodHi = Avx2.Multiply(dataKeyHi, prime32);

                            xAcc[i] = Avx2.Add(prodLo, Avx2.ShiftLeftLogical(prodHi, 32));
                        }
                    }
                }
            }
            else if (Sse2.IsSupported)
            {
                fixed (ulong* pAcc = acc)
                {
                    fixed (byte* pSecret = secret)
                    {
                        Vector128<uint> prime32 = Vector128.Create(Prime32_1);
                        Vector128<ulong>* xAcc = (Vector128<ulong>*)pAcc;
                        Vector128<byte>* xSecret = (Vector128<byte>*)pSecret;

                        for (ulong i = 0; i < StripeLen / 16; i++)
                        {
                            Vector128<ulong> accVec = xAcc[i];
                            Vector128<ulong> shifted = Sse2.ShiftRightLogical(accVec, 47);
                            Vector128<ulong> dataVec = Sse2.Xor(accVec, shifted);

                            Vector128<byte> keyVec = xSecret[i];
                            Vector128<uint> dataKey = Sse2.Xor(dataVec.AsUInt32(), keyVec.AsUInt32());

                            Vector128<uint> dataKeyHi = Sse2.Shuffle(dataKey.AsUInt32(), 0b00110001);
                            Vector128<ulong> prodLo = Sse2.Multiply(dataKey, prime32);
                            Vector128<ulong> prodHi = Sse2.Multiply(dataKeyHi, prime32);

                            xAcc[i] = Sse2.Add(prodLo, Sse2.ShiftLeftLogical(prodHi, 32));
                        }
                    }
                }
            }
            else
            {
                for (int i = 0; i < AccNb; i++)
                {
                    ulong key64 = BinaryPrimitives.ReadUInt64LittleEndian(secret.Slice(i * sizeof(ulong)));
                    ulong acc64 = acc[i];
                    acc64 = XorShift64(acc64, 47);
                    acc64 ^= key64;
                    acc64 *= Prime32_1;
                    acc[i] = acc64;
                }
            }
        }

        [MethodImpl(MethodImplOptions.AggressiveInlining)]
        private static void Xxh3Accumulate(Span<ulong> acc, ReadOnlySpan<byte> input, ReadOnlySpan<byte> secret, int nbStripes)
        {
            for (int n = 0; n < nbStripes; n++)
            {
                ReadOnlySpan<byte> inData = input.Slice(n * StripeLen);
                Xxh3Accumulate512(acc, inData, secret.Slice(n * SecretConsumeRate));
            }
        }

        private static void Xxh3HashLongInternalLoop(Span<ulong> acc, ReadOnlySpan<byte> input, ReadOnlySpan<byte> secret)
        {
            int nbStripesPerBlock = (secret.Length - StripeLen) / SecretConsumeRate;
            int blockLen = StripeLen * nbStripesPerBlock;
            int nbBlocks = (input.Length - 1) / blockLen;

            Debug.Assert(secret.Length >= SecretSizeMin);

            for (int n = 0; n < nbBlocks; n++)
            {
                Xxh3Accumulate(acc, input.Slice(n * blockLen), secret, nbStripesPerBlock);
                Xxh3ScrambleAcc(acc, secret.Slice(secret.Length - StripeLen));
            }

            Debug.Assert(input.Length > StripeLen);

            int nbStripes = (input.Length - 1 - (blockLen * nbBlocks)) / StripeLen;
            Debug.Assert(nbStripes <= (secret.Length / SecretConsumeRate));
            Xxh3Accumulate(acc, input.Slice(nbBlocks * blockLen), secret, nbStripes);

            ReadOnlySpan<byte> p = input.Slice(input.Length - StripeLen);
            Xxh3Accumulate512(acc, p, secret.Slice(secret.Length - StripeLen - SecretLastAccStart));
        }

        [MethodImpl(MethodImplOptions.AggressiveInlining)]
        private static ulong Xxh3Mix2Accs(Span<ulong> acc, ReadOnlySpan<byte> secret)
        {
            return Mul128Fold64(
                acc[0] ^ BinaryPrimitives.ReadUInt64LittleEndian(secret),
                acc[1] ^ BinaryPrimitives.ReadUInt64LittleEndian(secret.Slice(8)));
        }

        [MethodImpl(MethodImplOptions.AggressiveInlining)]
        private static ulong Xxh3MergeAccs(Span<ulong> acc, ReadOnlySpan<byte> secret, ulong start)
        {
            ulong result64 = start;

            for (int i = 0; i < 4; i++)
            {
                result64 += Xxh3Mix2Accs(acc.Slice(2 * i), secret.Slice(16 * i));
            }

            return Xxh3Avalanche(result64);
        }

        [SkipLocalsInit]
        private static Hash128 Xxh3HashLong128bInternal(ReadOnlySpan<byte> input, ReadOnlySpan<byte> secret)
        {
            Span<ulong> acc = stackalloc ulong[AccNb];
            Xxh3InitAcc.CopyTo(acc);

            Xxh3HashLongInternalLoop(acc, input, secret);

            Debug.Assert(acc.Length == 8);
            Debug.Assert(secret.Length >= acc.Length * sizeof(ulong) + SecretMergeAccsStart);

            return new Hash128
            {
                Low = Xxh3MergeAccs(acc, secret.Slice(SecretMergeAccsStart), (ulong)input.Length * Prime64_1),
                High = Xxh3MergeAccs(
                    acc,
                    secret.Slice(secret.Length - acc.Length * sizeof(ulong) - SecretMergeAccsStart),
                    ~((ulong)input.Length * Prime64_2))
            };
        }

        private static Hash128 Xxh3Len1To3128b(ReadOnlySpan<byte> input, ReadOnlySpan<byte> secret, ulong seed)
        {
            Debug.Assert(1 <= input.Length && input.Length <= 3);

            byte c1 = input[0];
            byte c2 = input[input.Length >> 1];
            byte c3 = input[^1];

            uint combinedL = ((uint)c1 << 16) | ((uint)c2 << 24) | c3 | ((uint)input.Length << 8);
            uint combinedH = BitOperations.RotateLeft(BinaryPrimitives.ReverseEndianness(combinedL), 13);
            ulong bitFlipL = (BinaryPrimitives.ReadUInt32LittleEndian(secret) ^ BinaryPrimitives.ReadUInt32LittleEndian(secret.Slice(4))) + seed;
            ulong bitFlipH = (BinaryPrimitives.ReadUInt32LittleEndian(secret.Slice(8)) ^ BinaryPrimitives.ReadUInt32LittleEndian(secret.Slice(12))) - seed;
            ulong keyedLo = combinedL ^ bitFlipL;
            ulong keyedHi = combinedH ^ bitFlipH;

            return new Hash128
            {
                Low = Xxh64Avalanche(keyedLo),
                High = Xxh64Avalanche(keyedHi)
            };
        }

        private static Hash128 Xxh3Len4To8128b(ReadOnlySpan<byte> input, ReadOnlySpan<byte> secret, ulong seed)
        {
            Debug.Assert(4 <= input.Length && input.Length <= 8);

            seed ^= BinaryPrimitives.ReverseEndianness((uint)seed) << 32;

            uint inputLo = BinaryPrimitives.ReadUInt32LittleEndian(input);
            uint inputHi = BinaryPrimitives.ReadUInt32LittleEndian(input.Slice(input.Length - 4));
            ulong input64 = inputLo + ((ulong)inputHi << 32);
            ulong bitFlip = (BinaryPrimitives.ReadUInt64LittleEndian(secret.Slice(16)) ^ BinaryPrimitives.ReadUInt64LittleEndian(secret.Slice(24))) + seed;
            ulong keyed = input64 ^ bitFlip;

            Hash128 m128 = Mult64To128(keyed, Prime64_1 + ((ulong)input.Length << 2));

            m128.High += m128.Low << 1;
            m128.Low ^= m128.High >> 3;

            m128.Low = XorShift64(m128.Low, 35);
            m128.Low *= 0x9FB21C651E98DF25UL;
            m128.Low = XorShift64(m128.Low, 28);
            m128.High = Xxh3Avalanche(m128.High);
            return m128;
        }

        private static Hash128 Xxh3Len9To16128b(ReadOnlySpan<byte> input, ReadOnlySpan<byte> secret, ulong seed)
        {
            Debug.Assert(9 <= input.Length && input.Length <= 16);

            ulong bitFlipL = (BinaryPrimitives.ReadUInt64LittleEndian(secret.Slice(32)) ^ BinaryPrimitives.ReadUInt64LittleEndian(secret.Slice(40))) - seed;
            ulong bitFlipH = (BinaryPrimitives.ReadUInt64LittleEndian(secret.Slice(48)) ^ BinaryPrimitives.ReadUInt64LittleEndian(secret.Slice(56))) + seed;
            ulong inputLo = BinaryPrimitives.ReadUInt64LittleEndian(input);
            ulong inputHi = BinaryPrimitives.ReadUInt64LittleEndian(input.Slice(input.Length - 8));

            Hash128 m128 = Mult64To128(inputLo ^ inputHi ^ bitFlipL, Prime64_1);
            m128.Low += ((ulong)input.Length - 1) << 54;
            inputHi ^= bitFlipH;
            m128.High += inputHi + Mult32To64((uint)inputHi, Prime32_2 - 1);
            m128.Low ^= BinaryPrimitives.ReverseEndianness(m128.High);

            Hash128 h128 = Mult64To128(m128.Low, Prime64_2);
            h128.High += m128.High * Prime64_2;
            h128.Low = Xxh3Avalanche(h128.Low);
            h128.High = Xxh3Avalanche(h128.High);
            return h128;
        }

        private static Hash128 Xxh3Len0To16128b(ReadOnlySpan<byte> input, ReadOnlySpan<byte> secret, ulong seed)
        {
            Debug.Assert(input.Length <= 16);

            if (input.Length > 8)
            {
                return Xxh3Len9To16128b(input, secret, seed);
            }
            else if (input.Length >= 4)
            {
                return Xxh3Len4To8128b(input, secret, seed);
            }
            else if (input.Length != 0)
            {
                return Xxh3Len1To3128b(input, secret, seed);
            }
            else
            {
                Hash128 h128 = new Hash128();
                ulong bitFlipL = BinaryPrimitives.ReadUInt64LittleEndian(secret.Slice(64)) ^ BinaryPrimitives.ReadUInt64LittleEndian(secret.Slice(72));
                ulong bitFlipH = BinaryPrimitives.ReadUInt64LittleEndian(secret.Slice(80)) ^ BinaryPrimitives.ReadUInt64LittleEndian(secret.Slice(88));
                h128.Low = Xxh64Avalanche(seed ^ bitFlipL);
                h128.High = Xxh64Avalanche(seed ^ bitFlipH);
                return h128;
            }
        }

        private static ulong Xxh3Mix16b(ReadOnlySpan<byte> input, ReadOnlySpan<byte> secret, ulong seed)
        {
            ulong inputLo = BinaryPrimitives.ReadUInt64LittleEndian(input);
            ulong inputHi = BinaryPrimitives.ReadUInt64LittleEndian(input.Slice(8));
            return Mul128Fold64(
                inputLo ^ (BinaryPrimitives.ReadUInt64LittleEndian(secret) + seed),
                inputHi ^ (BinaryPrimitives.ReadUInt64LittleEndian(secret.Slice(8)) - seed));
        }

        private static Hash128 Xxh128Mix32b(Hash128 acc, ReadOnlySpan<byte> input, ReadOnlySpan<byte> input2, ReadOnlySpan<byte> secret, ulong seed)
        {
            acc.Low += Xxh3Mix16b(input, secret, seed);
            acc.Low ^= BinaryPrimitives.ReadUInt64LittleEndian(input2) + BinaryPrimitives.ReadUInt64LittleEndian(input2.Slice(8));
            acc.High += Xxh3Mix16b(input2, secret.Slice(16), seed);
            acc.High ^= BinaryPrimitives.ReadUInt64LittleEndian(input) + BinaryPrimitives.ReadUInt64LittleEndian(input.Slice(8));
            return acc;
        }

        private static Hash128 Xxh3Len17To128128b(ReadOnlySpan<byte> input, ReadOnlySpan<byte> secret, ulong seed)
        {
            Debug.Assert(secret.Length >= SecretSizeMin);
            Debug.Assert(16 < input.Length && input.Length <= 128);

            Hash128 acc = new Hash128
            {
                Low = (ulong)input.Length * Prime64_1,
                High = 0
            };

            if (input.Length > 32)
            {
                if (input.Length > 64)
                {
                    if (input.Length > 96)
                    {
                        acc = Xxh128Mix32b(acc, input.Slice(48), input.Slice(input.Length - 64), secret.Slice(96), seed);
                    }
                    acc = Xxh128Mix32b(acc, input.Slice(32), input.Slice(input.Length - 48), secret.Slice(64), seed);
                }
                acc = Xxh128Mix32b(acc, input.Slice(16), input.Slice(input.Length - 32), secret.Slice(32), seed);
            }
            acc = Xxh128Mix32b(acc, input, input.Slice(input.Length - 16), secret, seed);

            Hash128 h128 = new Hash128
            {
                Low = acc.Low + acc.High,
                High = acc.Low * Prime64_1 + acc.High * Prime64_4 + ((ulong)input.Length - seed) * Prime64_2
            };
            h128.Low = Xxh3Avalanche(h128.Low);
            h128.High = 0UL - Xxh3Avalanche(h128.High);
            return h128;
        }

        private static Hash128 Xxh3Len129To240128b(ReadOnlySpan<byte> input, ReadOnlySpan<byte> secret, ulong seed)
        {
            Debug.Assert(secret.Length >= SecretSizeMin);
            Debug.Assert(128 < input.Length && input.Length <= 240);

            Hash128 acc = new Hash128();

            int nbRounds = input.Length / 32;
            acc.Low = (ulong)input.Length * Prime64_1;
            acc.High = 0;

            for (int i = 0; i < 4; i++)
            {
                acc = Xxh128Mix32b(acc, input.Slice(32 * i), input.Slice(32 * i + 16), secret.Slice(32 * i), seed);
            }

            acc.Low = Xxh3Avalanche(acc.Low);
            acc.High = Xxh3Avalanche(acc.High);
            Debug.Assert(nbRounds >= 4);

            for (int i = 4; i < nbRounds; i++)
            {
                acc = Xxh128Mix32b(acc, input.Slice(32 * i), input.Slice(32 * i + 16), secret.Slice(MidSizeStartOffset + 32 * (i - 4)), seed);
            }

            acc = Xxh128Mix32b(acc, input.Slice(input.Length - 16), input.Slice(input.Length - 32), secret.Slice(SecretSizeMin - MidSizeLastOffset - 16), 0UL - seed);

            Hash128 h128 = new Hash128
            {
                Low = acc.Low + acc.High,
                High = acc.Low * Prime64_1 + acc.High * Prime64_4 + ((ulong)input.Length - seed) * Prime64_2
            };
            h128.Low = Xxh3Avalanche(h128.Low);
            h128.High = 0UL - Xxh3Avalanche(h128.High);
            return h128;
        }

        private static Hash128 Xxh3128bitsInternal(ReadOnlySpan<byte> input, ReadOnlySpan<byte> secret, ulong seed)
        {
            Debug.Assert(secret.Length >= SecretSizeMin);

            if (input.Length <= 16)
            {
                return Xxh3Len0To16128b(input, secret, seed);
            }
            else if (input.Length <= 128)
            {
                return Xxh3Len17To128128b(input, secret, seed);
            }
            else if (input.Length <= 240)
            {
                return Xxh3Len129To240128b(input, secret, seed);
            }
            else
            {
                return Xxh3HashLong128bInternal(input, secret);
            }
        }

        public static Hash128 ComputeHash(ReadOnlySpan<byte> input)
        {
            return Xxh3128bitsInternal(input, Xxh3KSecret, 0UL);
        }
    }
}