mirror of
https://github.com/Thealexbarney/LibHac.git
synced 2024-11-14 10:49:41 +01:00
Implement CharacterEncoding
This commit is contained in:
parent
76e5a20e1d
commit
8406bea563
3 changed files with 1796 additions and 0 deletions
1111
src/LibHac/Util/CharacterEncoding.cs
Normal file
1111
src/LibHac/Util/CharacterEncoding.cs
Normal file
File diff suppressed because it is too large
Load diff
21
tests/LibHac.Tests/SpanEqualAsserts.cs
Normal file
21
tests/LibHac.Tests/SpanEqualAsserts.cs
Normal file
|
@ -0,0 +1,21 @@
|
||||||
|
using System;
|
||||||
|
using Xunit.Sdk;
|
||||||
|
|
||||||
|
namespace Xunit
|
||||||
|
{
|
||||||
|
public partial class Assert
|
||||||
|
{
|
||||||
|
/// <summary>
|
||||||
|
/// Verifies that two spans are equal, using a default comparer.
|
||||||
|
/// </summary>
|
||||||
|
/// <typeparam name="T">The type of the objects to be compared</typeparam>
|
||||||
|
/// <param name="expected">The expected value</param>
|
||||||
|
/// <param name="actual">The value to be compared against</param>
|
||||||
|
/// <exception cref="EqualException">Thrown when the objects are not equal</exception>
|
||||||
|
public static void Equal<T>(ReadOnlySpan<T> expected, ReadOnlySpan<T> actual) where T : unmanaged, IEquatable<T>
|
||||||
|
{
|
||||||
|
if(!expected.SequenceEqual(actual))
|
||||||
|
throw new EqualException(expected.ToArray(), actual.ToArray());
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
664
tests/LibHac.Tests/Util/CharacterEncodingTests.cs
Normal file
664
tests/LibHac.Tests/Util/CharacterEncodingTests.cs
Normal file
|
@ -0,0 +1,664 @@
|
||||||
|
// Licensed to the .NET Foundation under one or more agreements.
|
||||||
|
// The .NET Foundation licenses this file to you under the MIT license.
|
||||||
|
|
||||||
|
using System;
|
||||||
|
using System.Collections.Generic;
|
||||||
|
using System.Linq;
|
||||||
|
using System.Text;
|
||||||
|
using System.Text.RegularExpressions;
|
||||||
|
using LibHac.Util;
|
||||||
|
using Xunit;
|
||||||
|
|
||||||
|
namespace LibHac.Tests.Util
|
||||||
|
{
|
||||||
|
public class CharacterEncodingTests
|
||||||
|
{
|
||||||
|
// Most of these tests are stolen from .NET's UTF-8 tests. Some of the comments in this file may
|
||||||
|
// mention code paths and functions being tested in the .NET runtime that don't apply here as a result.
|
||||||
|
|
||||||
|
// ReSharper disable InconsistentNaming UnusedMember.Local
|
||||||
|
private const string X_UTF8 = "58"; // U+0058 LATIN CAPITAL LETTER X, 1 byte
|
||||||
|
private const string X_UTF16 = "X";
|
||||||
|
|
||||||
|
private const string Y_UTF8 = "59"; // U+0058 LATIN CAPITAL LETTER Y, 1 byte
|
||||||
|
private const string Y_UTF16 = "Y";
|
||||||
|
|
||||||
|
private const string Z_UTF8 = "5A"; // U+0058 LATIN CAPITAL LETTER Z, 1 byte
|
||||||
|
private const string Z_UTF16 = "Z";
|
||||||
|
|
||||||
|
private const string E_ACUTE_UTF8 = "C3A9"; // U+00E9 LATIN SMALL LETTER E WITH ACUTE, 2 bytes
|
||||||
|
private const string E_ACUTE_UTF16 = "\u00E9";
|
||||||
|
|
||||||
|
private const string EURO_SYMBOL_UTF8 = "E282AC"; // U+20AC EURO SIGN, 3 bytes
|
||||||
|
private const string EURO_SYMBOL_UTF16 = "\u20AC";
|
||||||
|
|
||||||
|
private const string REPLACEMENT_CHAR_UTF8 = "EFBFBD"; // U+FFFD REPLACEMENT CHAR, 3 bytes
|
||||||
|
private const string REPLACEMENT_CHAR_UTF16 = "\uFFFD";
|
||||||
|
|
||||||
|
private const string GRINNING_FACE_UTF8 = "F09F9880"; // U+1F600 GRINNING FACE, 4 bytes
|
||||||
|
private const string GRINNING_FACE_UTF16 = "\U0001F600";
|
||||||
|
|
||||||
|
private const string WOMAN_CARTWHEELING_MEDSKIN_UTF16 = "\U0001F938\U0001F3FD\u200D\u2640\uFE0F"; // U+1F938 U+1F3FD U+200D U+2640 U+FE0F WOMAN CARTWHEELING: MEDIUM SKIN TONE
|
||||||
|
|
||||||
|
// All valid scalars [ U+0000 .. U+D7FF ] and [ U+E000 .. U+10FFFF ].
|
||||||
|
private static readonly IEnumerable<Rune> s_allValidScalars = Enumerable.Range(0x0000, 0xD800).Concat(Enumerable.Range(0xE000, 0x110000 - 0xE000)).Select(value => new Rune(value));
|
||||||
|
|
||||||
|
private static readonly ReadOnlyMemory<uint> s_allScalarsAsUtf32;
|
||||||
|
private static readonly ReadOnlyMemory<char> s_allScalarsAsUtf16;
|
||||||
|
private static readonly ReadOnlyMemory<byte> s_allScalarsAsUtf8;
|
||||||
|
// ReSharper restore InconsistentNaming UnusedMember.Local
|
||||||
|
|
||||||
|
static CharacterEncodingTests()
|
||||||
|
{
|
||||||
|
var allScalarsAsUtf32 = new List<uint>();
|
||||||
|
var allScalarsAsUtf16 = new List<char>();
|
||||||
|
var allScalarsAsUtf8 = new List<byte>();
|
||||||
|
|
||||||
|
Span<byte> utf8 = stackalloc byte[4];
|
||||||
|
Span<char> utf16 = stackalloc char[2];
|
||||||
|
|
||||||
|
foreach (Rune rune in s_allValidScalars)
|
||||||
|
{
|
||||||
|
int utf8Length = ToUtf8(rune, utf8);
|
||||||
|
int utf16Length = ToUtf16(rune, utf16);
|
||||||
|
|
||||||
|
allScalarsAsUtf32.Add((uint)rune.Value);
|
||||||
|
|
||||||
|
for (int i = 0; i < utf16Length; i++)
|
||||||
|
allScalarsAsUtf16.Add(utf16[i]);
|
||||||
|
|
||||||
|
for (int i = 0; i < utf8Length; i++)
|
||||||
|
allScalarsAsUtf8.Add(utf8[i]);
|
||||||
|
}
|
||||||
|
|
||||||
|
s_allScalarsAsUtf32 = allScalarsAsUtf32.ToArray().AsMemory();
|
||||||
|
s_allScalarsAsUtf16 = allScalarsAsUtf16.ToArray().AsMemory();
|
||||||
|
s_allScalarsAsUtf8 = allScalarsAsUtf8.ToArray().AsMemory();
|
||||||
|
}
|
||||||
|
|
||||||
|
/*
|
||||||
|
* COMMON UTILITIES FOR UNIT TESTS
|
||||||
|
*/
|
||||||
|
|
||||||
|
public static byte[] DecodeHex(ReadOnlySpan<char> inputHex)
|
||||||
|
{
|
||||||
|
Assert.True(Regex.IsMatch(inputHex.ToString(), "^([0-9a-fA-F]{2})*$"), "Input must be an even number of hex characters.");
|
||||||
|
|
||||||
|
return Convert.FromHexString(inputHex);
|
||||||
|
}
|
||||||
|
|
||||||
|
public static byte[] ToUtf8(Rune rune)
|
||||||
|
{
|
||||||
|
Span<byte> utf8 = stackalloc byte[4];
|
||||||
|
|
||||||
|
int length = ToUtf8(rune, utf8);
|
||||||
|
return utf8.Slice(0, length).ToArray();
|
||||||
|
}
|
||||||
|
|
||||||
|
private static char[] ToUtf16(Rune rune)
|
||||||
|
{
|
||||||
|
Span<char> utf16 = stackalloc char[2];
|
||||||
|
|
||||||
|
int length = ToUtf16(rune, utf16);
|
||||||
|
return utf16.Slice(0, length).ToArray();
|
||||||
|
}
|
||||||
|
|
||||||
|
// !! IMPORTANT !!
|
||||||
|
// Don't delete this implementation, as we use it as a reference to make sure the framework's
|
||||||
|
// transcoding logic is correct.
|
||||||
|
public static int ToUtf8(Rune rune, Span<byte> destination)
|
||||||
|
{
|
||||||
|
if (!Rune.IsValid(rune.Value))
|
||||||
|
{
|
||||||
|
Assert.True(Rune.IsValid(rune.Value), $"Rune with value U+{(uint)rune.Value:X4} is not well-formed.");
|
||||||
|
}
|
||||||
|
|
||||||
|
Assert.True(destination.Length == 4);
|
||||||
|
|
||||||
|
destination[0] = 0;
|
||||||
|
destination[1] = 0;
|
||||||
|
destination[2] = 0;
|
||||||
|
destination[3] = 0;
|
||||||
|
|
||||||
|
if (rune.Value < 0x80)
|
||||||
|
{
|
||||||
|
destination[0] = (byte)rune.Value;
|
||||||
|
return 1;
|
||||||
|
}
|
||||||
|
else if (rune.Value < 0x0800)
|
||||||
|
{
|
||||||
|
destination[0] = (byte)((rune.Value >> 6) | 0xC0);
|
||||||
|
destination[1] = (byte)((rune.Value & 0x3F) | 0x80);
|
||||||
|
return 2;
|
||||||
|
}
|
||||||
|
else if (rune.Value < 0x10000)
|
||||||
|
{
|
||||||
|
destination[0] = (byte)((rune.Value >> 12) | 0xE0);
|
||||||
|
destination[1] = (byte)(((rune.Value >> 6) & 0x3F) | 0x80);
|
||||||
|
destination[2] = (byte)((rune.Value & 0x3F) | 0x80);
|
||||||
|
return 3;
|
||||||
|
}
|
||||||
|
else
|
||||||
|
{
|
||||||
|
destination[0] = (byte)((rune.Value >> 18) | 0xF0);
|
||||||
|
destination[1] = (byte)(((rune.Value >> 12) & 0x3F) | 0x80);
|
||||||
|
destination[2] = (byte)(((rune.Value >> 6) & 0x3F) | 0x80);
|
||||||
|
destination[3] = (byte)((rune.Value & 0x3F) | 0x80);
|
||||||
|
return 4;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// !! IMPORTANT !!
|
||||||
|
// Don't delete this implementation, as we use it as a reference to make sure the framework's
|
||||||
|
// transcoding logic is correct.
|
||||||
|
private static int ToUtf16(Rune rune, Span<char> destination)
|
||||||
|
{
|
||||||
|
if (!Rune.IsValid(rune.Value))
|
||||||
|
{
|
||||||
|
Assert.True(Rune.IsValid(rune.Value), $"Rune with value U+{(uint)rune.Value:X4} is not well-formed.");
|
||||||
|
}
|
||||||
|
|
||||||
|
Assert.True(destination.Length == 2);
|
||||||
|
|
||||||
|
destination[0] = '\0';
|
||||||
|
destination[1] = '\0';
|
||||||
|
|
||||||
|
if (rune.IsBmp)
|
||||||
|
{
|
||||||
|
destination[0] = (char)rune.Value;
|
||||||
|
return 1;
|
||||||
|
}
|
||||||
|
else
|
||||||
|
{
|
||||||
|
destination[0] = (char)((rune.Value >> 10) + 0xD800 - 0x40);
|
||||||
|
destination[1] = (char)((rune.Value & 0x03FF) + 0xDC00);
|
||||||
|
return 2;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
[Theory]
|
||||||
|
[InlineData("", "")] // empty string is OK
|
||||||
|
[InlineData(X_UTF16, X_UTF8)]
|
||||||
|
[InlineData(E_ACUTE_UTF16, E_ACUTE_UTF8)]
|
||||||
|
[InlineData(EURO_SYMBOL_UTF16, EURO_SYMBOL_UTF8)]
|
||||||
|
public void Utf16ToUtf8_WithSmallValidBuffers(string utf16Input, string expectedUtf8TranscodingHex)
|
||||||
|
{
|
||||||
|
Assert.InRange(utf16Input.Length, 0, 1);
|
||||||
|
|
||||||
|
Utf16ToUtf8_String_Test_Core(
|
||||||
|
utf16Input: utf16Input,
|
||||||
|
destinationSize: expectedUtf8TranscodingHex.Length / 2,
|
||||||
|
expectedEncodingResult: CharacterEncodingResult.Success,
|
||||||
|
expectedUtf8Transcoding: DecodeHex(expectedUtf8TranscodingHex));
|
||||||
|
}
|
||||||
|
|
||||||
|
[Theory]
|
||||||
|
[InlineData('\uD800', CharacterEncodingResult.InsufficientLength)] // standalone high surrogate
|
||||||
|
[InlineData('\uDFFF', CharacterEncodingResult.InvalidFormat)] // standalone low surrogate
|
||||||
|
public void Utf16ToUtf8_WithOnlyStandaloneSurrogates(char charValue, CharacterEncodingResult expectedEncodingResult)
|
||||||
|
{
|
||||||
|
Utf16ToUtf8_String_Test_Core(
|
||||||
|
utf16Input: new[] { charValue },
|
||||||
|
destinationSize: 0,
|
||||||
|
expectedEncodingResult: expectedEncodingResult,
|
||||||
|
expectedUtf8Transcoding: Span<byte>.Empty);
|
||||||
|
}
|
||||||
|
|
||||||
|
[Theory]
|
||||||
|
[InlineData("<LOW><HIGH>", "")] // swapped surrogate pair characters
|
||||||
|
[InlineData("A<LOW><HIGH>", "41")] // consume standalone ASCII char, then swapped surrogate pair characters
|
||||||
|
[InlineData("A<HIGH>B", "41F0")] // consume standalone ASCII char, then standalone high surrogate char
|
||||||
|
[InlineData("A<LOW>B", "41")] // consume standalone ASCII char, then standalone low surrogate char
|
||||||
|
[InlineData("AB<HIGH><HIGH>", "4142F0")] // consume two ASCII chars, then standalone high surrogate char
|
||||||
|
[InlineData("AB<LOW><LOW>", "4142")] // consume two ASCII chars, then standalone low surrogate char
|
||||||
|
public void Utf16ToUtf8_WithInvalidSurrogates(string utf16Input, string expectedUtf8TranscodingHex)
|
||||||
|
{
|
||||||
|
// xUnit can't handle ill-formed strings in [InlineData], so we replace here.
|
||||||
|
|
||||||
|
utf16Input = utf16Input.Replace("<HIGH>", "\uD800").Replace("<LOW>", "\uDFFF");
|
||||||
|
|
||||||
|
// These test cases are for the "fast processing" code which is the main loop of TranscodeToUtf8,
|
||||||
|
// so inputs should be at least 2 chars.
|
||||||
|
|
||||||
|
Assert.True(utf16Input.Length >= 2);
|
||||||
|
|
||||||
|
Utf16ToUtf8_String_Test_Core(
|
||||||
|
utf16Input: utf16Input,
|
||||||
|
destinationSize: expectedUtf8TranscodingHex.Length / 2,
|
||||||
|
expectedEncodingResult: CharacterEncodingResult.InvalidFormat,
|
||||||
|
expectedUtf8Transcoding: DecodeHex(expectedUtf8TranscodingHex));
|
||||||
|
|
||||||
|
// Now try the tests again with a larger buffer.
|
||||||
|
// This ensures that running out of destination space wasn't the reason we failed.
|
||||||
|
|
||||||
|
Utf16ToUtf8_String_Test_Core(
|
||||||
|
utf16Input: utf16Input,
|
||||||
|
destinationSize: (expectedUtf8TranscodingHex.Length) / 2 + 16,
|
||||||
|
expectedEncodingResult: CharacterEncodingResult.InvalidFormat,
|
||||||
|
expectedUtf8Transcoding: DecodeHex(expectedUtf8TranscodingHex));
|
||||||
|
}
|
||||||
|
|
||||||
|
[Theory]
|
||||||
|
[InlineData("80", CharacterEncodingResult.InsufficientLength, "")] // sequence cannot begin with continuation character
|
||||||
|
[InlineData("8182", CharacterEncodingResult.InsufficientLength, "")] // sequence cannot begin with continuation character
|
||||||
|
[InlineData("838485", CharacterEncodingResult.InsufficientLength, "")] // sequence cannot begin with continuation character
|
||||||
|
[InlineData(X_UTF8 + "80", CharacterEncodingResult.InsufficientLength, X_UTF16)] // sequence cannot begin with continuation character
|
||||||
|
[InlineData(X_UTF8 + "8182", CharacterEncodingResult.InsufficientLength, X_UTF16)] // sequence cannot begin with continuation character
|
||||||
|
[InlineData("C0", CharacterEncodingResult.InvalidFormat, "")] // [ C0 ] is always invalid
|
||||||
|
[InlineData("C080", CharacterEncodingResult.InsufficientLength, "")] // [ C0 ] is always invalid
|
||||||
|
[InlineData("C08081", CharacterEncodingResult.InsufficientLength, "")] // [ C0 ] is always invalid
|
||||||
|
[InlineData(X_UTF8 + "C1", CharacterEncodingResult.InvalidFormat, X_UTF16)] // [ C1 ] is always invalid
|
||||||
|
[InlineData(X_UTF8 + "C180", CharacterEncodingResult.InsufficientLength, X_UTF16)] // [ C1 ] is always invalid
|
||||||
|
[InlineData(X_UTF8 + "C27F", CharacterEncodingResult.InsufficientLength, X_UTF16)] // [ C2 ] is improperly terminated
|
||||||
|
[InlineData("E2827F", CharacterEncodingResult.InsufficientLength, "")] // [ E2 82 ] is improperly terminated
|
||||||
|
[InlineData("E09F80", CharacterEncodingResult.InsufficientLength, "")] // [ E0 9F ... ] is overlong
|
||||||
|
[InlineData("E0C080", CharacterEncodingResult.InsufficientLength, "")] // [ E0 ] is improperly terminated
|
||||||
|
[InlineData("ED7F80", CharacterEncodingResult.InsufficientLength, "")] // [ ED ] is improperly terminated
|
||||||
|
[InlineData("EDA080", CharacterEncodingResult.InsufficientLength, "")] // [ ED A0 ... ] is surrogate
|
||||||
|
public void Utf8ToUtf16_WithSmallInvalidBuffers(string utf8HexInput, CharacterEncodingResult expectedEncodingResult, string expectedUtf16Transcoding)
|
||||||
|
{
|
||||||
|
Utf8ToUtf16_String_Test_Core(
|
||||||
|
utf8Input: DecodeHex(utf8HexInput),
|
||||||
|
destinationSize: expectedUtf16Transcoding.Length,
|
||||||
|
expectedEncodingResult: expectedEncodingResult,
|
||||||
|
expectedUtf16Transcoding: expectedUtf16Transcoding);
|
||||||
|
|
||||||
|
// Now try the tests again with a larger buffer.
|
||||||
|
// This ensures that the sequence is seen as invalid when not hitting the destination length check.
|
||||||
|
|
||||||
|
Utf8ToUtf16_String_Test_Core(
|
||||||
|
utf8Input: DecodeHex(utf8HexInput),
|
||||||
|
destinationSize: expectedUtf16Transcoding.Length + 16,
|
||||||
|
expectedEncodingResult: CharacterEncodingResult.InvalidFormat,
|
||||||
|
expectedUtf16Transcoding: expectedUtf16Transcoding);
|
||||||
|
}
|
||||||
|
|
||||||
|
[Theory]
|
||||||
|
/* SMALL VALID BUFFERS - tests drain loop at end of method */
|
||||||
|
[InlineData("")] // empty string is OK
|
||||||
|
[InlineData("X")]
|
||||||
|
[InlineData("XY")]
|
||||||
|
[InlineData("XYZ")]
|
||||||
|
[InlineData(E_ACUTE_UTF16)]
|
||||||
|
[InlineData(X_UTF16 + E_ACUTE_UTF16)]
|
||||||
|
[InlineData(E_ACUTE_UTF16 + X_UTF16)]
|
||||||
|
[InlineData(EURO_SYMBOL_UTF16)]
|
||||||
|
/* LARGE VALID BUFFERS - test main loop at beginning of method */
|
||||||
|
[InlineData(E_ACUTE_UTF16 + "ABCD" + "0123456789:;<=>?")] // Loop unrolling at end of buffer
|
||||||
|
[InlineData(E_ACUTE_UTF16 + "ABCD" + "0123456789:;<=>?" + "01234567" + E_ACUTE_UTF16 + "89:;<=>?")] // Loop unrolling interrupted by non-ASCII
|
||||||
|
[InlineData("ABC" + E_ACUTE_UTF16 + "0123")] // 3 ASCII bytes followed by non-ASCII
|
||||||
|
[InlineData("AB" + E_ACUTE_UTF16 + "0123")] // 2 ASCII bytes followed by non-ASCII
|
||||||
|
[InlineData("A" + E_ACUTE_UTF16 + "0123")] // 1 ASCII byte followed by non-ASCII
|
||||||
|
[InlineData(E_ACUTE_UTF16 + E_ACUTE_UTF16 + E_ACUTE_UTF16 + E_ACUTE_UTF16)] // 4x 2-byte sequences, exercises optimization code path in 2-byte sequence processing
|
||||||
|
[InlineData(E_ACUTE_UTF16 + E_ACUTE_UTF16 + E_ACUTE_UTF16 + "PQ")] // 3x 2-byte sequences + 2 ASCII bytes, exercises optimization code path in 2-byte sequence processing
|
||||||
|
[InlineData(E_ACUTE_UTF16 + "PQ")] // single 2-byte sequence + 2 trailing ASCII bytes, exercises draining logic in 2-byte sequence processing
|
||||||
|
[InlineData(E_ACUTE_UTF16 + "P" + E_ACUTE_UTF16 + "0@P")] // single 2-byte sequences + 1 trailing ASCII byte + 2-byte sequence, exercises draining logic in 2-byte sequence processing
|
||||||
|
[InlineData(EURO_SYMBOL_UTF16 + "@")] // single 3-byte sequence + 1 trailing ASCII byte, exercises draining logic in 3-byte sequence processing
|
||||||
|
[InlineData(EURO_SYMBOL_UTF16 + "@P`")] // single 3-byte sequence + 3 trailing ASCII byte, exercises draining logic and "running out of data" logic in 3-byte sequence processing
|
||||||
|
[InlineData(EURO_SYMBOL_UTF16 + EURO_SYMBOL_UTF16 + EURO_SYMBOL_UTF16)] // 3x 3-byte sequences, exercises "stay within 3-byte loop" logic in 3-byte sequence processing
|
||||||
|
[InlineData(EURO_SYMBOL_UTF16 + EURO_SYMBOL_UTF16 + EURO_SYMBOL_UTF16 + EURO_SYMBOL_UTF16)] // 4x 3-byte sequences, exercises "consume multiple bytes at a time" logic in 3-byte sequence processing
|
||||||
|
[InlineData(EURO_SYMBOL_UTF16 + EURO_SYMBOL_UTF16 + EURO_SYMBOL_UTF16 + E_ACUTE_UTF16)] // 3x 3-byte sequences + single 2-byte sequence, exercises "consume multiple bytes at a time" logic in 3-byte sequence processing
|
||||||
|
[InlineData(EURO_SYMBOL_UTF16 + EURO_SYMBOL_UTF16 + E_ACUTE_UTF16 + E_ACUTE_UTF16 + E_ACUTE_UTF16 + E_ACUTE_UTF16)] // 2x 3-byte sequences + 4x 2-byte sequences, exercises "consume multiple bytes at a time" logic in 3-byte sequence processing
|
||||||
|
[InlineData(GRINNING_FACE_UTF16 + GRINNING_FACE_UTF16)] // 2x 4-byte sequences, exercises 4-byte sequence processing
|
||||||
|
[InlineData(GRINNING_FACE_UTF16 + "@AB")] // single 4-byte sequence + 3 ASCII bytes, exercises 4-byte sequence processing and draining logic
|
||||||
|
[InlineData(WOMAN_CARTWHEELING_MEDSKIN_UTF16)] // exercises switching between multiple sequence lengths
|
||||||
|
public void Utf8ToUtf16_ValidBuffers(string utf16Input)
|
||||||
|
{
|
||||||
|
// We're going to run the tests with destination buffer lengths ranging from 0 all the way
|
||||||
|
// to buffers large enough to hold the full output. This allows us to test logic that
|
||||||
|
// detects whether we're about to overrun our destination buffer and instead returns DestinationTooSmall.
|
||||||
|
|
||||||
|
Rune[] enumeratedScalars = utf16Input.EnumerateRunes().ToArray();
|
||||||
|
|
||||||
|
// Convert entire input to UTF-8 using our unit test reference logic.
|
||||||
|
|
||||||
|
byte[] utf8Input = enumeratedScalars.SelectMany(ToUtf8).ToArray();
|
||||||
|
|
||||||
|
// 0-length buffer test
|
||||||
|
Utf8ToUtf16_String_Test_Core(
|
||||||
|
utf8Input: utf8Input,
|
||||||
|
destinationSize: 0,
|
||||||
|
expectedEncodingResult: (utf8Input.Length == 0) ? CharacterEncodingResult.Success : CharacterEncodingResult.InsufficientLength,
|
||||||
|
expectedUtf16Transcoding: ReadOnlySpan<char>.Empty);
|
||||||
|
|
||||||
|
char[] concatenatedUtf16 = Array.Empty<char>();
|
||||||
|
|
||||||
|
for (int i = 0; i < enumeratedScalars.Length; i++)
|
||||||
|
{
|
||||||
|
Rune thisScalar = enumeratedScalars[i];
|
||||||
|
|
||||||
|
// if this is an astral scalar value, quickly test a buffer that's not large enough to contain the entire UTF-16 encoding
|
||||||
|
|
||||||
|
if (!thisScalar.IsBmp)
|
||||||
|
{
|
||||||
|
Utf8ToUtf16_String_Test_Core(
|
||||||
|
utf8Input: utf8Input,
|
||||||
|
destinationSize: concatenatedUtf16.Length + 1,
|
||||||
|
expectedEncodingResult: CharacterEncodingResult.InsufficientLength,
|
||||||
|
expectedUtf16Transcoding: concatenatedUtf16);
|
||||||
|
}
|
||||||
|
|
||||||
|
// now provide a destination buffer large enough to hold the next full scalar encoding
|
||||||
|
|
||||||
|
concatenatedUtf16 = concatenatedUtf16.Concat(ToUtf16(thisScalar)).ToArray();
|
||||||
|
|
||||||
|
Utf8ToUtf16_String_Test_Core(
|
||||||
|
utf8Input: utf8Input,
|
||||||
|
destinationSize: concatenatedUtf16.Length,
|
||||||
|
expectedEncodingResult: (i == enumeratedScalars.Length - 1) ? CharacterEncodingResult.Success : CharacterEncodingResult.InsufficientLength,
|
||||||
|
expectedUtf16Transcoding: concatenatedUtf16);
|
||||||
|
}
|
||||||
|
|
||||||
|
// now throw lots of ASCII data at the beginning so that we exercise the vectorized code paths
|
||||||
|
|
||||||
|
utf16Input = new string('x', 64) + utf16Input;
|
||||||
|
utf8Input = utf16Input.EnumerateRunes().SelectMany(ToUtf8).ToArray();
|
||||||
|
|
||||||
|
Utf8ToUtf16_String_Test_Core(
|
||||||
|
utf8Input: utf8Input,
|
||||||
|
destinationSize: utf16Input.Length,
|
||||||
|
expectedEncodingResult: CharacterEncodingResult.Success,
|
||||||
|
expectedUtf16Transcoding: utf16Input);
|
||||||
|
|
||||||
|
// now throw some non-ASCII data at the beginning so that we *don't* exercise the vectorized code paths
|
||||||
|
|
||||||
|
utf16Input = WOMAN_CARTWHEELING_MEDSKIN_UTF16 + utf16Input[64..];
|
||||||
|
utf8Input = utf16Input.EnumerateRunes().SelectMany(ToUtf8).ToArray();
|
||||||
|
|
||||||
|
Utf8ToUtf16_String_Test_Core(
|
||||||
|
utf8Input: utf8Input,
|
||||||
|
destinationSize: utf16Input.Length,
|
||||||
|
expectedEncodingResult: CharacterEncodingResult.Success,
|
||||||
|
expectedUtf16Transcoding: utf16Input);
|
||||||
|
}
|
||||||
|
|
||||||
|
[Fact]
|
||||||
|
public void Utf8ToUtf16_String_AllPossibleScalarValues()
|
||||||
|
{
|
||||||
|
Utf8ToUtf16_String_Test_Core(
|
||||||
|
utf8Input: s_allScalarsAsUtf8.Span,
|
||||||
|
destinationSize: s_allScalarsAsUtf16.Length,
|
||||||
|
expectedEncodingResult: CharacterEncodingResult.Success,
|
||||||
|
expectedUtf16Transcoding: s_allScalarsAsUtf16.Span);
|
||||||
|
}
|
||||||
|
|
||||||
|
[Fact]
|
||||||
|
public void Utf16ToUtf8_String_AllPossibleScalarValues()
|
||||||
|
{
|
||||||
|
Utf16ToUtf8_String_Test_Core(
|
||||||
|
utf16Input: s_allScalarsAsUtf16.Span,
|
||||||
|
destinationSize: s_allScalarsAsUtf8.Length,
|
||||||
|
expectedEncodingResult: CharacterEncodingResult.Success,
|
||||||
|
expectedUtf8Transcoding: s_allScalarsAsUtf8.Span);
|
||||||
|
}
|
||||||
|
|
||||||
|
[Fact]
|
||||||
|
public void Utf8ToUtf32_String_AllPossibleScalarValues()
|
||||||
|
{
|
||||||
|
Utf8ToUtf32_String_Test_Core(
|
||||||
|
utf8Input: s_allScalarsAsUtf8.Span,
|
||||||
|
destinationSize: s_allScalarsAsUtf32.Length,
|
||||||
|
expectedEncodingResult: CharacterEncodingResult.Success,
|
||||||
|
expectedUtf32Transcoding: s_allScalarsAsUtf32.Span);
|
||||||
|
}
|
||||||
|
|
||||||
|
[Fact]
|
||||||
|
public void Utf32ToUtf8_String_AllPossibleScalarValues()
|
||||||
|
{
|
||||||
|
Utf32ToUtf8_String_Test_Core(
|
||||||
|
utf32Input: s_allScalarsAsUtf32.Span,
|
||||||
|
destinationSize: s_allScalarsAsUtf8.Length,
|
||||||
|
expectedEncodingResult: CharacterEncodingResult.Success,
|
||||||
|
expectedUtf8Transcoding: s_allScalarsAsUtf8.Span);
|
||||||
|
}
|
||||||
|
|
||||||
|
[Fact]
|
||||||
|
public void Utf8ToUtf16_Length_AllPossibleScalarValues()
|
||||||
|
{
|
||||||
|
Utf8ToUtf16_Length_Test_Core(
|
||||||
|
// Skip the first code point because it's 0
|
||||||
|
utf8Input: s_allScalarsAsUtf8.Span.Slice(1),
|
||||||
|
expectedEncodingResult: CharacterEncodingResult.Success,
|
||||||
|
expectedUtf16Length: s_allScalarsAsUtf16.Length - 1);
|
||||||
|
}
|
||||||
|
|
||||||
|
[Fact]
|
||||||
|
public void Utf16ToUtf8_Length_AllPossibleScalarValues()
|
||||||
|
{
|
||||||
|
Utf16ToUtf8_Length_Test_Core(
|
||||||
|
utf16Input: s_allScalarsAsUtf16.Span.Slice(1),
|
||||||
|
expectedEncodingResult: CharacterEncodingResult.Success,
|
||||||
|
expectedUtf8Length: s_allScalarsAsUtf8.Length - 1);
|
||||||
|
}
|
||||||
|
|
||||||
|
[Fact]
|
||||||
|
public void Utf8ToUtf32_Length_AllPossibleScalarValues()
|
||||||
|
{
|
||||||
|
Utf8ToUtf32_Length_Test_Core(
|
||||||
|
utf8Input: s_allScalarsAsUtf8.Span.Slice(1),
|
||||||
|
expectedEncodingResult: CharacterEncodingResult.Success,
|
||||||
|
expectedUtf32Length: s_allScalarsAsUtf32.Length - 1);
|
||||||
|
}
|
||||||
|
|
||||||
|
[Fact]
|
||||||
|
public void Utf32ToUtf8_Length_AllPossibleScalarValues()
|
||||||
|
{
|
||||||
|
Utf32ToUtf8_Length_Test_Core(
|
||||||
|
utf32Input: s_allScalarsAsUtf32.Span.Slice(1),
|
||||||
|
expectedEncodingResult: CharacterEncodingResult.Success,
|
||||||
|
expectedUtf8Length: s_allScalarsAsUtf8.Length - 1);
|
||||||
|
}
|
||||||
|
|
||||||
|
[Fact]
|
||||||
|
public void Utf8ToUtf16_Character_AllPossibleScalarValues()
|
||||||
|
{
|
||||||
|
Span<byte> utf8 = stackalloc byte[4];
|
||||||
|
Span<char> utf16 = stackalloc char[2];
|
||||||
|
|
||||||
|
foreach (Rune rune in s_allValidScalars)
|
||||||
|
{
|
||||||
|
ToUtf8(rune, utf8);
|
||||||
|
ToUtf16(rune, utf16);
|
||||||
|
|
||||||
|
Utf8ToUtf16_Character_Test_Core(
|
||||||
|
utf8Input: utf8,
|
||||||
|
expectedEncodingResult: CharacterEncodingResult.Success,
|
||||||
|
expectedUtf16Transcoding: utf16);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
[Fact]
|
||||||
|
public void Utf16ToUtf8_Character_AllPossibleScalarValues()
|
||||||
|
{
|
||||||
|
Span<byte> utf8 = stackalloc byte[4];
|
||||||
|
Span<char> utf16 = stackalloc char[2];
|
||||||
|
|
||||||
|
foreach (Rune rune in s_allValidScalars)
|
||||||
|
{
|
||||||
|
ToUtf8(rune, utf8);
|
||||||
|
ToUtf16(rune, utf16);
|
||||||
|
|
||||||
|
Utf16ToUtf8_Character_Test_Core(
|
||||||
|
utf16Input: utf16,
|
||||||
|
expectedEncodingResult: CharacterEncodingResult.Success,
|
||||||
|
expectedUtf8Transcoding: utf8);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
[Fact]
|
||||||
|
public void Utf8ToUtf32_Character_AllPossibleScalarValues()
|
||||||
|
{
|
||||||
|
Span<byte> utf8 = stackalloc byte[4];
|
||||||
|
|
||||||
|
foreach (Rune rune in s_allValidScalars)
|
||||||
|
{
|
||||||
|
ToUtf8(rune, utf8);
|
||||||
|
|
||||||
|
Utf8ToUtf32_Character_Test_Core(
|
||||||
|
utf8Input: utf8,
|
||||||
|
expectedEncodingResult: CharacterEncodingResult.Success,
|
||||||
|
expectedUtf32Transcoding: (uint)rune.Value);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
[Fact]
|
||||||
|
public void Utf32ToUtf8_Character_AllPossibleScalarValues()
|
||||||
|
{
|
||||||
|
Span<byte> utf8 = stackalloc byte[4];
|
||||||
|
|
||||||
|
foreach (Rune rune in s_allValidScalars)
|
||||||
|
{
|
||||||
|
ToUtf8(rune, utf8);
|
||||||
|
|
||||||
|
Utf32ToUtf8_Character_Test_Core(
|
||||||
|
utf32Input: (uint)rune.Value,
|
||||||
|
expectedEncodingResult: CharacterEncodingResult.Success,
|
||||||
|
expectedUtf8Transcoding: utf8);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
[Fact]
|
||||||
|
public void PickOutCharacterFromUtf8String_AllPossibleScalarValues()
|
||||||
|
{
|
||||||
|
byte[] expectedUtf8 = new byte[4];
|
||||||
|
byte[] actualUtf8 = new byte[4];
|
||||||
|
|
||||||
|
ReadOnlySpan<byte> utf8Values = s_allScalarsAsUtf8.Span.Slice(1);
|
||||||
|
|
||||||
|
foreach (Rune rune in s_allValidScalars.Skip(1))
|
||||||
|
{
|
||||||
|
ToUtf8(rune, expectedUtf8);
|
||||||
|
|
||||||
|
CharacterEncodingResult result = CharacterEncoding.PickOutCharacterFromUtf8String(actualUtf8, ref utf8Values);
|
||||||
|
|
||||||
|
Assert.Equal(CharacterEncodingResult.Success, result);
|
||||||
|
Assert.Equal((ReadOnlySpan<byte>)expectedUtf8, actualUtf8);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
private static void Utf8ToUtf16_String_Test_Core(ReadOnlySpan<byte> utf8Input, int destinationSize, CharacterEncodingResult expectedEncodingResult, ReadOnlySpan<char> expectedUtf16Transcoding)
|
||||||
|
{
|
||||||
|
char[] destination = new char[destinationSize];
|
||||||
|
|
||||||
|
CharacterEncodingResult actualEncodingResult = CharacterEncoding.ConvertStringUtf8ToUtf16Native(destination, utf8Input, utf8Input.Length);
|
||||||
|
|
||||||
|
Assert.Equal(expectedEncodingResult, actualEncodingResult);
|
||||||
|
Assert.Equal(expectedUtf16Transcoding, destination.AsSpan(0, expectedUtf16Transcoding.Length));
|
||||||
|
}
|
||||||
|
|
||||||
|
private static void Utf16ToUtf8_String_Test_Core(ReadOnlySpan<char> utf16Input, int destinationSize, CharacterEncodingResult expectedEncodingResult, ReadOnlySpan<byte> expectedUtf8Transcoding)
|
||||||
|
{
|
||||||
|
byte[] destination = new byte[destinationSize];
|
||||||
|
|
||||||
|
CharacterEncodingResult actualEncodingResult = CharacterEncoding.ConvertStringUtf16NativeToUtf8(destination, utf16Input, utf16Input.Length);
|
||||||
|
|
||||||
|
Assert.Equal(expectedEncodingResult, actualEncodingResult);
|
||||||
|
Assert.Equal(expectedUtf8Transcoding, destination.AsSpan(0, expectedUtf8Transcoding.Length));
|
||||||
|
}
|
||||||
|
|
||||||
|
private static void Utf8ToUtf32_String_Test_Core(ReadOnlySpan<byte> utf8Input, int destinationSize, CharacterEncodingResult expectedEncodingResult, ReadOnlySpan<uint> expectedUtf32Transcoding)
|
||||||
|
{
|
||||||
|
uint[] destination = new uint[destinationSize];
|
||||||
|
|
||||||
|
CharacterEncodingResult actualEncodingResult = CharacterEncoding.ConvertStringUtf8ToUtf32(destination, utf8Input, utf8Input.Length);
|
||||||
|
|
||||||
|
Assert.Equal(expectedEncodingResult, actualEncodingResult);
|
||||||
|
Assert.Equal(expectedUtf32Transcoding, destination.AsSpan(0, expectedUtf32Transcoding.Length));
|
||||||
|
}
|
||||||
|
|
||||||
|
private static void Utf32ToUtf8_String_Test_Core(ReadOnlySpan<uint> utf32Input, int destinationSize, CharacterEncodingResult expectedEncodingResult, ReadOnlySpan<byte> expectedUtf8Transcoding)
|
||||||
|
{
|
||||||
|
byte[] destination = new byte[destinationSize];
|
||||||
|
|
||||||
|
CharacterEncodingResult actualEncodingResult = CharacterEncoding.ConvertStringUtf32ToUtf8(destination, utf32Input, utf32Input.Length);
|
||||||
|
|
||||||
|
Assert.Equal(expectedEncodingResult, actualEncodingResult);
|
||||||
|
Assert.Equal(expectedUtf8Transcoding, destination.AsSpan(0, expectedUtf8Transcoding.Length));
|
||||||
|
}
|
||||||
|
|
||||||
|
private static void Utf8ToUtf16_Length_Test_Core(ReadOnlySpan<byte> utf8Input, CharacterEncodingResult expectedEncodingResult, int expectedUtf16Length)
|
||||||
|
{
|
||||||
|
CharacterEncodingResult actualEncodingResult = CharacterEncoding.GetLengthOfConvertedStringUtf8ToUtf16Native(out int actualLength, utf8Input);
|
||||||
|
|
||||||
|
Assert.Equal(expectedEncodingResult, actualEncodingResult);
|
||||||
|
Assert.Equal(expectedUtf16Length, actualLength);
|
||||||
|
}
|
||||||
|
|
||||||
|
private static void Utf16ToUtf8_Length_Test_Core(ReadOnlySpan<char> utf16Input, CharacterEncodingResult expectedEncodingResult, int expectedUtf8Length)
|
||||||
|
{
|
||||||
|
CharacterEncodingResult actualEncodingResult = CharacterEncoding.GetLengthOfConvertedStringUtf16NativeToUtf8(out int actualLength, utf16Input);
|
||||||
|
|
||||||
|
Assert.Equal(expectedEncodingResult, actualEncodingResult);
|
||||||
|
Assert.Equal(expectedUtf8Length, actualLength);
|
||||||
|
}
|
||||||
|
|
||||||
|
private static void Utf8ToUtf32_Length_Test_Core(ReadOnlySpan<byte> utf8Input, CharacterEncodingResult expectedEncodingResult, int expectedUtf32Length)
|
||||||
|
{
|
||||||
|
CharacterEncodingResult actualEncodingResult = CharacterEncoding.GetLengthOfConvertedStringUtf8ToUtf32(out int actualLength, utf8Input);
|
||||||
|
|
||||||
|
Assert.Equal(expectedEncodingResult, actualEncodingResult);
|
||||||
|
Assert.Equal(expectedUtf32Length, actualLength);
|
||||||
|
}
|
||||||
|
|
||||||
|
private static void Utf32ToUtf8_Length_Test_Core(ReadOnlySpan<uint> utf32Input, CharacterEncodingResult expectedEncodingResult, int expectedUtf8Length)
|
||||||
|
{
|
||||||
|
CharacterEncodingResult actualEncodingResult = CharacterEncoding.GetLengthOfConvertedStringUtf32ToUtf8(out int actualLength, utf32Input);
|
||||||
|
|
||||||
|
Assert.Equal(expectedEncodingResult, actualEncodingResult);
|
||||||
|
Assert.Equal(expectedUtf8Length, actualLength);
|
||||||
|
}
|
||||||
|
|
||||||
|
private static void Utf8ToUtf16_Character_Test_Core(ReadOnlySpan<byte> utf8Input, CharacterEncodingResult expectedEncodingResult, ReadOnlySpan<char> expectedUtf16Transcoding)
|
||||||
|
{
|
||||||
|
Span<char> destination = stackalloc char[2];
|
||||||
|
|
||||||
|
CharacterEncodingResult actualEncodingResult = CharacterEncoding.ConvertCharacterUtf8ToUtf16Native(destination, utf8Input);
|
||||||
|
|
||||||
|
Assert.Equal(expectedEncodingResult, actualEncodingResult);
|
||||||
|
Assert.Equal(expectedUtf16Transcoding, destination.Slice(0, expectedUtf16Transcoding.Length));
|
||||||
|
|
||||||
|
for (int i = expectedUtf16Transcoding.Length; i < destination.Length; i++)
|
||||||
|
{
|
||||||
|
Assert.Equal(0, destination[i]);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
private static void Utf16ToUtf8_Character_Test_Core(ReadOnlySpan<char> utf16Input, CharacterEncodingResult expectedEncodingResult, ReadOnlySpan<byte> expectedUtf8Transcoding)
|
||||||
|
{
|
||||||
|
Span<byte> destination = stackalloc byte[4];
|
||||||
|
|
||||||
|
CharacterEncodingResult actualEncodingResult = CharacterEncoding.ConvertCharacterUtf16NativeToUtf8(destination, utf16Input);
|
||||||
|
|
||||||
|
Assert.Equal(expectedEncodingResult, actualEncodingResult);
|
||||||
|
Assert.Equal(expectedUtf8Transcoding, destination.Slice(0, expectedUtf8Transcoding.Length));
|
||||||
|
|
||||||
|
for (int i = expectedUtf8Transcoding.Length; i < destination.Length; i++)
|
||||||
|
{
|
||||||
|
Assert.Equal(0, destination[i]);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
private static void Utf8ToUtf32_Character_Test_Core(ReadOnlySpan<byte> utf8Input, CharacterEncodingResult expectedEncodingResult, uint expectedUtf32Transcoding)
|
||||||
|
{
|
||||||
|
CharacterEncodingResult actualEncodingResult = CharacterEncoding.ConvertCharacterUtf8ToUtf32(out uint destination, utf8Input);
|
||||||
|
|
||||||
|
Assert.Equal(expectedEncodingResult, actualEncodingResult);
|
||||||
|
Assert.Equal(expectedUtf32Transcoding, destination);
|
||||||
|
}
|
||||||
|
|
||||||
|
private static void Utf32ToUtf8_Character_Test_Core(uint utf32Input, CharacterEncodingResult expectedEncodingResult, ReadOnlySpan<byte> expectedUtf8Transcoding)
|
||||||
|
{
|
||||||
|
Span<byte> destination = stackalloc byte[4];
|
||||||
|
|
||||||
|
CharacterEncodingResult actualEncodingResult = CharacterEncoding.ConvertCharacterUtf32ToUtf8(destination, utf32Input);
|
||||||
|
|
||||||
|
Assert.Equal(expectedEncodingResult, actualEncodingResult);
|
||||||
|
Assert.Equal(expectedUtf8Transcoding, destination.Slice(0, expectedUtf8Transcoding.Length));
|
||||||
|
|
||||||
|
for (int i = expectedUtf8Transcoding.Length; i < destination.Length; i++)
|
||||||
|
{
|
||||||
|
Assert.Equal(0, destination[i]);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
Loading…
Reference in a new issue