Remove unused Utf8ToWideCharParser (#16392)

I randomly came across this class, that I didn't even remember we had. We don't use this class at the moment and won't need it any time soon. Its current implementation is also fairly questionable. While `til::u16state` isn't "perfect", it's vastly better than this.
2025-12-19 18:11:39 -05:00 · 2023-11-30 15:52:39 +01:00
parent 0c4751ba30
commit 130c9fbd76
10 changed files with 0 additions and 1005 deletions
--- a/doc/ORGANIZATION.md
+++ b/doc/ORGANIZATION.md
@@ -125,8 +125,6 @@
 * Private calls into the Windows Window Manager to perform privileged actions related to the console process (working to eliminate) or for High DPI stuff (also working to eliminate)
 	* `Userprivapi.cpp`
 	* `Windowdpiapi.cpp`
-* New UTF8 state machine in progress to improve Bash (and other apps) support for UTF-8 in console
-	* `Utf8ToWideCharParser.cpp`
 * Window resizing/layout/management/window messaging loops and all that other stuff that has us interact with Windows to create a visual display surface and control the user interaction entry point
 	* `Window.cpp`
 	* `Windowproc.cpp`
--- a/src/host/host-common.vcxitems
+++ b/src/host/host-common.vcxitems
@@ -46,7 +46,6 @@
    <ClCompile Include="..\telemetry.cpp" />
    <ClCompile Include="..\tracing.cpp" />
    <ClCompile Include="..\utils.cpp" />
-    <ClCompile Include="..\utf8ToWideCharParser.cpp" />
    <ClCompile Include="..\VtApiRoutines.cpp" />
    <ClCompile Include="..\VtInputThread.cpp" />
    <ClCompile Include="..\VtIo.cpp" />
@@ -100,7 +99,6 @@
    <ClInclude Include="..\telemetry.hpp" />
    <ClInclude Include="..\tracing.hpp" />
    <ClInclude Include="..\utils.hpp" />
-    <ClInclude Include="..\utf8ToWideCharParser.hpp" />
    <ClInclude Include="..\VtApiRoutines.h" />
    <ClInclude Include="..\VtInputThread.hpp" />
    <ClInclude Include="..\VtIo.hpp" />
--- a/src/host/lib/hostlib.vcxproj.filters
+++ b/src/host/lib/hostlib.vcxproj.filters
@@ -111,9 +111,6 @@
    <ClCompile Include="..\conimeinfo.cpp">
      <Filter>Source Files</Filter>
    </ClCompile>
-    <ClCompile Include="..\utf8ToWideCharParser.cpp">
-      <Filter>Source Files</Filter>
-    </ClCompile>
    <ClCompile Include="..\ntprivapi.cpp">
      <Filter>Source Files</Filter>
    </ClCompile>
@@ -266,9 +263,6 @@
    <ClInclude Include="..\outputStream.hpp">
      <Filter>Header Files</Filter>
    </ClInclude>
-    <ClInclude Include="..\utf8ToWideCharParser.hpp">
-      <Filter>Header Files</Filter>
-    </ClInclude>
    <ClInclude Include="..\ApiRoutines.h">
      <Filter>Header Files</Filter>
    </ClInclude>
--- a/src/host/sources.inc
+++ b/src/host/sources.inc
@@ -84,7 +84,6 @@ SOURCES = \
    ..\writeData.cpp \
    ..\renderData.cpp \
    ..\renderFontDefaults.cpp \
-    ..\utf8ToWideCharParser.cpp \
    ..\conareainfo.cpp \
    ..\conimeinfo.cpp \
    ..\ConsoleArguments.cpp \
--- a/src/host/ut_host/Host.UnitTests.vcxproj
+++ b/src/host/ut_host/Host.UnitTests.vcxproj
@@ -28,7 +28,6 @@
    <ClCompile Include="TextBufferTests.cpp" />
    <ClCompile Include="TitleTests.cpp" />
    <ClCompile Include="UtilsTests.cpp" />
-    <ClCompile Include="Utf8ToWideCharParserTests.cpp" />
    <ClCompile Include="InputBufferTests.cpp" />
    <ClCompile Include="ViewportTests.cpp" />
    <ClCompile Include="VtIoTests.cpp" />
--- a/src/host/ut_host/Host.UnitTests.vcxproj.filters
+++ b/src/host/ut_host/Host.UnitTests.vcxproj.filters
@@ -39,9 +39,6 @@
    <ClCompile Include="..\precomp.cpp">
      <Filter>Source Files</Filter>
    </ClCompile>
-    <ClCompile Include="Utf8ToWideCharParserTests.cpp">
-      <Filter>Source Files</Filter>
-    </ClCompile>
    <ClCompile Include="InitTests.cpp">
      <Filter>Source Files</Filter>
    </ClCompile>
--- a/src/host/ut_host/Utf8ToWideCharParserTests.cpp
+++ b/src/host/ut_host/Utf8ToWideCharParserTests.cpp
@@ -1,405 +0,0 @@
-// Copyright (c) Microsoft Corporation.
-// Licensed under the MIT license.
-
-#include "precomp.h"
-#include "WexTestClass.h"
-#include "../../inc/consoletaeftemplates.hpp"
-
-#include "utf8ToWideCharParser.hpp"
-
-#define IsBitSet WI_IsFlagSet
-
-using namespace WEX::Common;
-using namespace WEX::Logging;
-using namespace WEX::TestExecution;
-using namespace std;
-
-class Utf8ToWideCharParserTests
-{
-    static const unsigned int utf8CodePage = 65001;
-    static const unsigned int USACodePage = 1252;
-
-    TEST_CLASS(Utf8ToWideCharParserTests);
-
-    TEST_METHOD(ConvertsAsciiTest)
-    {
-        Log::Comment(L"Testing that ASCII chars are correctly converted to wide chars");
-        auto parser = Utf8ToWideCharParser{ utf8CodePage };
-        // ascii "hello"
-        const unsigned char hello[5] = { 0x48, 0x65, 0x6c, 0x6c, 0x6f };
-        const unsigned char wideHello[10] = { 0x48, 0x00, 0x65, 0x00, 0x6c, 0x00, 0x6c, 0x00, 0x6f, 0x00 };
-        unsigned int count = 5;
-        unsigned int consumed = 0;
-        unsigned int generated = 0;
-        unique_ptr<wchar_t[]> output{ nullptr };
-
-        VERIFY_SUCCEEDED(parser.Parse(hello, count, consumed, output, generated));
-        VERIFY_ARE_EQUAL(consumed, (unsigned int)5);
-        VERIFY_ARE_EQUAL(generated, (unsigned int)5);
-        VERIFY_ARE_NOT_EQUAL(output.get(), nullptr);
-
-        auto pReturnedBytes = reinterpret_cast<unsigned char*>(output.get());
-        for (auto i = 0; i < ARRAYSIZE(wideHello); ++i)
-        {
-            VERIFY_ARE_EQUAL(wideHello[i], pReturnedBytes[i]);
-        }
-    }
-
-    TEST_METHOD(ConvertSimpleUtf8Test)
-    {
-        Log::Comment(L"Testing that a simple UTF8 sequence can be converted");
-        auto parser = Utf8ToWideCharParser{ utf8CodePage };
-        // U+3059, U+3057 (hiragana sushi)
-        const unsigned char sushi[6] = { 0xe3, 0x81, 0x99, 0xe3, 0x81, 0x97 };
-        const unsigned char wideSushi[4] = { 0x59, 0x30, 0x57, 0x30 };
-        unsigned int count = 6;
-        unsigned int consumed = 0;
-        unsigned int generated = 0;
-        unique_ptr<wchar_t[]> output{ nullptr };
-
-        VERIFY_SUCCEEDED(parser.Parse(sushi, count, consumed, output, generated));
-        VERIFY_ARE_EQUAL(consumed, (unsigned int)6);
-        VERIFY_ARE_EQUAL(generated, (unsigned int)2);
-        VERIFY_ARE_NOT_EQUAL(output.get(), nullptr);
-
-        auto pReturnedBytes = reinterpret_cast<unsigned char*>(output.get());
-        for (auto i = 0; i < ARRAYSIZE(wideSushi); ++i)
-        {
-            VERIFY_ARE_EQUAL(wideSushi[i], pReturnedBytes[i]);
-        }
-    }
-
-    TEST_METHOD(WaitsForAdditionalInputAfterPartialSequenceTest)
-    {
-        Log::Comment(L"Testing that nothing is returned when parsing a partial sequence until the sequence is complete");
-        // U+3057 (hiragana shi)
-        unsigned char shi[3] = { 0xe3, 0x81, 0x97 };
-        unsigned char wideShi[2] = { 0x57, 0x30 };
-        auto parser = Utf8ToWideCharParser{ utf8CodePage };
-        unsigned int count = 1;
-        unsigned int consumed = 0;
-        unsigned int generated = 0;
-        unique_ptr<wchar_t[]> output{ nullptr };
-
-        for (auto i = 0; i < 2; ++i)
-        {
-            VERIFY_SUCCEEDED(parser.Parse(shi + i, count, consumed, output, generated));
-            VERIFY_ARE_EQUAL(consumed, (unsigned int)1);
-            VERIFY_ARE_EQUAL(generated, (unsigned int)0);
-            VERIFY_ARE_EQUAL(output.get(), nullptr);
-            count = 1;
-        }
-
-        VERIFY_SUCCEEDED(parser.Parse(shi + 2, count, consumed, output, generated));
-        VERIFY_ARE_EQUAL(consumed, (unsigned int)1);
-        VERIFY_ARE_EQUAL(generated, (unsigned int)1);
-        VERIFY_ARE_NOT_EQUAL(output.get(), nullptr);
-
-        auto pReturnedBytes = reinterpret_cast<unsigned char*>(output.get());
-        for (auto i = 0; i < ARRAYSIZE(wideShi); ++i)
-        {
-            VERIFY_ARE_EQUAL(wideShi[i], pReturnedBytes[i]);
-        }
-    }
-
-    TEST_METHOD(ReturnsInitialPartOfSequenceThatEndsWithPartialTest)
-    {
-        Log::Comment(L"Testing that a valid portion of a sequence is returned when it ends with a partial sequence");
-        // U+3059, U+3057 (hiragana sushi)
-        const unsigned char sushi[6] = { 0xe3, 0x81, 0x99, 0xe3, 0x81, 0x97 };
-        const unsigned char wideSushi[4] = { 0x59, 0x30, 0x57, 0x30 };
-        unsigned int count = 4;
-        unsigned int consumed = 0;
-        unsigned int generated = 0;
-        unique_ptr<wchar_t[]> output{ nullptr };
-        auto parser = Utf8ToWideCharParser{ utf8CodePage };
-
-        VERIFY_SUCCEEDED(parser.Parse(sushi, count, consumed, output, generated));
-        // check that we got the first wide char back
-        VERIFY_ARE_EQUAL(consumed, (unsigned int)4);
-        VERIFY_ARE_EQUAL(generated, (unsigned int)1);
-        VERIFY_ARE_NOT_EQUAL(output.get(), nullptr);
-
-        auto pReturnedBytes = reinterpret_cast<unsigned char*>(output.get());
-        for (auto i = 0; i < 2; ++i)
-        {
-            VERIFY_ARE_EQUAL(wideSushi[i], pReturnedBytes[i]);
-        }
-
-        // add byte 2 of 3 to parser
-        count = 1;
-        consumed = 0;
-        generated = 0;
-        output.reset(nullptr);
-        VERIFY_SUCCEEDED(parser.Parse(sushi + 4, count, consumed, output, generated));
-        VERIFY_ARE_EQUAL(consumed, (unsigned int)1);
-        VERIFY_ARE_EQUAL(generated, (unsigned int)0);
-        VERIFY_ARE_EQUAL(output.get(), nullptr);
-
-        // add last byte
-        count = 1;
-        consumed = 0;
-        generated = 0;
-        output.reset(nullptr);
-        VERIFY_SUCCEEDED(parser.Parse(sushi + 5, count, consumed, output, generated));
-        VERIFY_ARE_EQUAL(consumed, (unsigned int)1);
-        VERIFY_ARE_EQUAL(generated, (unsigned int)1);
-        VERIFY_ARE_NOT_EQUAL(output.get(), nullptr);
-
-        pReturnedBytes = reinterpret_cast<unsigned char*>(output.get());
-        for (auto i = 0; i < 2; ++i)
-        {
-            VERIFY_ARE_EQUAL(wideSushi[i + 2], pReturnedBytes[i]);
-        }
-    }
-
-    TEST_METHOD(MergesMultiplePartialSequencesTest)
-    {
-        Log::Comment(L"Testing that partial sequences sent individually will be merged together");
-
-        // clang-format off
-        // (hiragana doomo arigatoo)
-        const unsigned char doomoArigatoo[24] = {
-            0xe3, 0x81, 0xa9, // U+3069
-            0xe3, 0x81, 0x86, // U+3046
-            0xe3, 0x82, 0x82, // U+3082
-            0xe3, 0x81, 0x82, // U+3042
-            0xe3, 0x82, 0x8a, // U+308A
-            0xe3, 0x81, 0x8c, // U+304C
-            0xe3, 0x81, 0xa8, // U+3068
-            0xe3, 0x81, 0x86  // U+3046
-        };
-        const unsigned char wideDoomoArigatoo[16] = {
-            0x69, 0x30,
-            0x46, 0x30,
-            0x82, 0x30,
-            0x42, 0x30,
-            0x8a, 0x30,
-            0x4c, 0x30,
-            0x68, 0x30,
-            0x46, 0x30
-        };
-        // clang-format on
-
-        // send first 4 bytes
-        unsigned int count = 4;
-        unsigned int consumed = 0;
-        unsigned int generated = 0;
-        unique_ptr<wchar_t[]> output{ nullptr };
-        auto parser = Utf8ToWideCharParser{ utf8CodePage };
-
-        VERIFY_SUCCEEDED(parser.Parse(doomoArigatoo, count, consumed, output, generated));
-        VERIFY_ARE_EQUAL(consumed, (unsigned int)4);
-        VERIFY_ARE_EQUAL(generated, (unsigned int)1);
-        VERIFY_ARE_NOT_EQUAL(output.get(), nullptr);
-
-        auto pReturnedBytes = reinterpret_cast<unsigned char*>(output.get());
-        for (auto i = 0; i < 2; ++i)
-        {
-            VERIFY_ARE_EQUAL(wideDoomoArigatoo[i], pReturnedBytes[i]);
-        }
-
-        // send next 16 bytes
-        count = 16;
-        consumed = 0;
-        generated = 0;
-        output.reset(nullptr);
-        VERIFY_SUCCEEDED(parser.Parse(doomoArigatoo + 4, count, consumed, output, generated));
-        VERIFY_ARE_EQUAL(consumed, (unsigned int)16);
-        VERIFY_ARE_EQUAL(generated, (unsigned int)5);
-        VERIFY_ARE_NOT_EQUAL(output.get(), nullptr);
-
-        pReturnedBytes = reinterpret_cast<unsigned char*>(output.get());
-        for (auto i = 0; i < 10; ++i)
-        {
-            VERIFY_ARE_EQUAL(wideDoomoArigatoo[i + 2], pReturnedBytes[i]);
-        }
-
-        // send last 4 bytes
-        count = 4;
-        consumed = 0;
-        generated = 0;
-        output.reset(nullptr);
-        VERIFY_SUCCEEDED(parser.Parse(doomoArigatoo + 20, count, consumed, output, generated));
-        VERIFY_ARE_EQUAL(consumed, (unsigned int)4);
-        VERIFY_ARE_EQUAL(generated, (unsigned int)2);
-        VERIFY_ARE_NOT_EQUAL(output.get(), nullptr);
-
-        pReturnedBytes = reinterpret_cast<unsigned char*>(output.get());
-        for (auto i = 0; i < 4; ++i)
-        {
-            VERIFY_ARE_EQUAL(wideDoomoArigatoo[i + 12], pReturnedBytes[i]);
-        }
-    }
-
-    TEST_METHOD(RemovesInvalidSequencesTest)
-    {
-        Log::Comment(L"Testing that invalid sequences are removed and don't stop the parsing of the rest");
-
-        // clang-format off
-        // hiragana sushi with junk between japanese characters
-        const unsigned char sushi[9] = {
-            0xe3, 0x81, 0x99, // U+3059
-            0x80, 0x81, 0x82, // junk continuation bytes
-            0xe3, 0x81, 0x97  // U+3057
-        };
-        // clang-format on
-
-        const unsigned char wideSushi[4] = { 0x59, 0x30, 0x57, 0x30 };
-        unsigned int count = 9;
-        unsigned int consumed = 0;
-        unsigned int generated = 0;
-        unique_ptr<wchar_t[]> output{ nullptr };
-        auto parser = Utf8ToWideCharParser{ utf8CodePage };
-
-        VERIFY_SUCCEEDED(parser.Parse(sushi, count, consumed, output, generated));
-        VERIFY_ARE_EQUAL(consumed, (unsigned int)9);
-        VERIFY_ARE_EQUAL(generated, (unsigned int)2);
-        VERIFY_ARE_NOT_EQUAL(output.get(), nullptr);
-
-        auto pReturnedBytes = reinterpret_cast<unsigned char*>(output.get());
-        for (auto i = 0; i < ARRAYSIZE(wideSushi); ++i)
-        {
-            VERIFY_ARE_EQUAL(wideSushi[i], pReturnedBytes[i]);
-        }
-    }
-
-    TEST_METHOD(NonMinimalFormTest)
-    {
-        Log::Comment(L"Testing that non-minimal forms of a character are tolerated don't stop the rest");
-
-        // clang-format off
-
-        // Test data
-        const unsigned char data[] = {
-            0x60, 0x12, 0x08, 0x7f, // single byte points
-            0xc0, 0x80, // U+0000 as a 2-byte sequence (non-minimal)
-            0x41, 0x48, 0x06, 0x55, // more single byte points
-            0xe0, 0x80, 0x80, // U+0000 as a 3-byte sequence (non-minimal)
-            0x18, 0x77, 0x40, 0x31, // more single byte points
-            0xf0, 0x80, 0x80, 0x80, // U+0000 as a 4-byte sequence (non-minimal)
-            0x59, 0x1f, 0x68, 0x20 // more single byte points
-        };
-
-        // Expected conversion
-        const wchar_t wideData[] = {
-            0x0060, 0x0012, 0x0008, 0x007f,
-            0xfffd, 0xfffd, // The number of replacements per invalid sequence is not intended to be load-bearing
-            0x0041, 0x0048, 0x0006, 0x0055,
-            0xfffd, 0xfffd, // It is just representative of what it looked like when fixing this for GH#3380
-            0x0018, 0x0077, 0x0040, 0x0031,
-            0xfffd, 0xfffd, 0xfffd, // Change if necessary when completing GH#3378
-            0x0059, 0x001f, 0x0068, 0x0020
-        };
-
-        // clang-format on
-
-        const auto count = gsl::narrow_cast<unsigned int>(ARRAYSIZE(data));
-        const auto wideCount = gsl::narrow_cast<unsigned int>(ARRAYSIZE(wideData));
-        unsigned int consumed = 0;
-        unsigned int generated = 0;
-        unique_ptr<wchar_t[]> output{ nullptr };
-        auto parser = Utf8ToWideCharParser{ utf8CodePage };
-
-        VERIFY_SUCCEEDED(parser.Parse(data, count, consumed, output, generated));
-        VERIFY_ARE_EQUAL(count, consumed);
-        VERIFY_ARE_EQUAL(wideCount, generated);
-        VERIFY_IS_NOT_NULL(output.get());
-
-        const auto expected = WEX::Common::String(wideData, wideCount);
-        const auto actual = WEX::Common::String(output.get(), generated);
-        VERIFY_ARE_EQUAL(expected, actual);
-    }
-
-    TEST_METHOD(PartialBytesAreDroppedOnCodePageChangeTest)
-    {
-        Log::Comment(L"Testing that a saved partial sequence is cleared when the codepage changes");
-        auto parser = Utf8ToWideCharParser{ utf8CodePage };
-        // 2 bytes of a 4 byte sequence
-        const unsigned int inputSize = 2;
-        const unsigned char partialSequence[inputSize] = { 0xF0, 0x80 };
-        auto count = inputSize;
-        unsigned int consumed = 0;
-        unsigned int generated = 0;
-        unique_ptr<wchar_t[]> output{ nullptr };
-        VERIFY_SUCCEEDED(parser.Parse(partialSequence, count, consumed, output, generated));
-        VERIFY_ARE_EQUAL(parser._currentState, Utf8ToWideCharParser::_State::BeginPartialParse);
-        VERIFY_ARE_EQUAL(parser._bytesStored, inputSize);
-        // set the codepage to the same one it currently is, ensure
-        // that nothing changes
-        parser.SetCodePage(utf8CodePage);
-        VERIFY_ARE_EQUAL(parser._currentState, Utf8ToWideCharParser::_State::BeginPartialParse);
-        VERIFY_ARE_EQUAL(parser._bytesStored, inputSize);
-        // change to a different codepage, ensure parser is reset
-        parser.SetCodePage(USACodePage);
-        VERIFY_ARE_EQUAL(parser._currentState, Utf8ToWideCharParser::_State::Ready);
-        VERIFY_ARE_EQUAL(parser._bytesStored, (unsigned int)0);
-    }
-
-    TEST_METHOD(_IsLeadByteTest)
-    {
-        Log::Comment(L"Testing that _IsLeadByte properly differentiates correct from incorrect sequences");
-        auto parser = Utf8ToWideCharParser{ utf8CodePage };
-        VERIFY_IS_TRUE(parser._IsLeadByte(0xC0)); // 2 byte sequence
-        VERIFY_IS_TRUE(parser._IsLeadByte(0xE0)); // 3 byte sequence
-        VERIFY_IS_TRUE(parser._IsLeadByte(0xF0)); // 4 byte sequence
-        VERIFY_IS_FALSE(parser._IsLeadByte(0x00)); // ASCII char NUL
-        VERIFY_IS_FALSE(parser._IsLeadByte(0x80)); // continuation byte
-        VERIFY_IS_FALSE(parser._IsLeadByte(0x83)); // continuation byte
-        VERIFY_IS_FALSE(parser._IsLeadByte(0x7E)); // ASCII char '~'
-        VERIFY_IS_FALSE(parser._IsLeadByte(0x21)); // ASCII char '!'
-        VERIFY_IS_FALSE(parser._IsLeadByte(0xF8)); // invalid 5 byte sequence
-        VERIFY_IS_FALSE(parser._IsLeadByte(0xFC)); // invalid 6 byte sequence
-        VERIFY_IS_FALSE(parser._IsLeadByte(0xFE)); // invalid 7 byte sequence
-        VERIFY_IS_FALSE(parser._IsLeadByte(0xFF)); // all 1's
-    }
-
-    TEST_METHOD(_IsContinuationByteTest)
-    {
-        Log::Comment(L"Testing that _IsContinuationByte properly differentiates correct from incorrect sequences");
-        auto parser = Utf8ToWideCharParser{ utf8CodePage };
-        for (BYTE i = 0x00; i < 0xFF; ++i)
-        {
-            if (IsBitSet(i, 0x80) && !IsBitSet(i, 0x40))
-            {
-                VERIFY_IS_TRUE(parser._IsContinuationByte(i), NoThrowString().Format(L"Byte is 0x%02x", i));
-            }
-            else
-            {
-                VERIFY_IS_FALSE(parser._IsContinuationByte(i), NoThrowString().Format(L"Byte is 0x%02x", i));
-            }
-        }
-        VERIFY_IS_FALSE(parser._IsContinuationByte(0xFF));
-    }
-
-    TEST_METHOD(_IsAsciiByteTest)
-    {
-        Log::Comment(L"Testing that _IsAsciiByte properly differentiates correct from incorrect sequences");
-        auto parser = Utf8ToWideCharParser{ utf8CodePage };
-        for (BYTE i = 0x00; i < 0x80; ++i)
-        {
-            VERIFY_IS_TRUE(parser._IsAsciiByte(i), NoThrowString().Format(L"Byte is 0x%02x", i));
-        }
-        for (BYTE i = 0xFF; i > 0x7F; --i)
-        {
-            VERIFY_IS_FALSE(parser._IsAsciiByte(i), NoThrowString().Format(L"Byte is 0x%02x", i));
-        }
-    }
-
-    TEST_METHOD(_Utf8SequenceSizeTest)
-    {
-        Log::Comment(L"Testing that _Utf8SequenceSize correctly counts the number of MSB 1's");
-        auto parser = Utf8ToWideCharParser{ utf8CodePage };
-        VERIFY_ARE_EQUAL(parser._Utf8SequenceSize(0x00), (unsigned int)0);
-        VERIFY_ARE_EQUAL(parser._Utf8SequenceSize(0x80), (unsigned int)1);
-        VERIFY_ARE_EQUAL(parser._Utf8SequenceSize(0xC2), (unsigned int)2);
-        VERIFY_ARE_EQUAL(parser._Utf8SequenceSize(0xE3), (unsigned int)3);
-        VERIFY_ARE_EQUAL(parser._Utf8SequenceSize(0xF0), (unsigned int)4);
-        VERIFY_ARE_EQUAL(parser._Utf8SequenceSize(0xF3), (unsigned int)4);
-        VERIFY_ARE_EQUAL(parser._Utf8SequenceSize(0xF8), (unsigned int)5);
-        VERIFY_ARE_EQUAL(parser._Utf8SequenceSize(0xFC), (unsigned int)6);
-        VERIFY_ARE_EQUAL(parser._Utf8SequenceSize(0xFD), (unsigned int)6);
-        VERIFY_ARE_EQUAL(parser._Utf8SequenceSize(0xFE), (unsigned int)7);
-        VERIFY_ARE_EQUAL(parser._Utf8SequenceSize(0xFF), (unsigned int)8);
-    }
-};
--- a/src/host/ut_host/sources
+++ b/src/host/ut_host/sources
@@ -27,7 +27,6 @@ SOURCES = \
    TextBufferTests.cpp \
    ClipboardTests.cpp \
    SelectionTests.cpp \
-    Utf8ToWideCharParserTests.cpp \
    OutputCellIteratorTests.cpp \
    InitTests.cpp \
    TitleTests.cpp \
--- a/src/host/utf8ToWideCharParser.cpp
+++ b/src/host/utf8ToWideCharParser.cpp
@@ -1,520 +0,0 @@
-// Copyright (c) Microsoft Corporation.
-// Licensed under the MIT license.
-
-#include "precomp.h"
-
-#include "utf8ToWideCharParser.hpp"
-#include <unicode.hpp>
-
-#ifndef WIL_ENABLE_EXCEPTIONS
-#error WIL exception helpers must be enabled
-#endif
-
-#define IsBitSet WI_IsFlagSet
-
-const byte NonAsciiBytePrefix = 0x80;
-
-const byte ContinuationByteMask = 0xC0;
-const byte ContinuationBytePrefix = 0x80;
-
-const byte MostSignificantBitMask = 0x80;
-
-// Routine Description:
-// - Constructs an instance of the parser.
-// Arguments:
-// - codePage - Starting code page to interpret input with.
-// Return Value:
-// - A new instance of the parser.
-Utf8ToWideCharParser::Utf8ToWideCharParser(const unsigned int codePage) :
-    _currentCodePage{ codePage },
-    _bytesStored{ 0 },
-    _currentState{ _State::Ready },
-    _convertedWideChars{ nullptr }
-{
-    std::fill_n(_utf8CodePointPieces, _UTF8_BYTE_SEQUENCE_MAX, 0ui8);
-}
-
-// Routine Description:
-// - Set the code page that input sequences will correspond to. Clears
-// any saved partial multi-byte sequences if the code page changes
-// from the code page the partial sequence is associated with.
-// Arguments:
-// - codePage - the code page to set to.
-// Return Value:
-// - <none>
-void Utf8ToWideCharParser::SetCodePage(const unsigned int codePage)
-{
-    if (_currentCodePage != codePage)
-    {
-        _currentCodePage = codePage;
-        // we can't be making any assumptions about the partial
-        // sequence we were storing now that the codepage has changed
-        _bytesStored = 0;
-        _currentState = _State::Ready;
-    }
-}
-
-// Routine Description:
-// - Parses the input multi-byte sequence.
-// Arguments:
-// - pBytes - The byte sequence to parse.
-// - cchBuffer - The amount of bytes in pBytes. This will contain the
-// number of wide chars contained by converted after this function is
-// run, or 0 if an error occurs (or if pBytes is 0).
-// - converted - a valid unique_ptr to store the parsed wide chars
-// in. On error this will contain nullptr instead of an array.
-// Return Value:
-// - <none>
-[[nodiscard]] HRESULT Utf8ToWideCharParser::Parse(_In_reads_(cchBuffer) const byte* const pBytes,
-                                                  _In_ const unsigned int cchBuffer,
-                                                  _Out_ unsigned int& cchConsumed,
-                                                  _Inout_ std::unique_ptr<wchar_t[]>& converted,
-                                                  _Out_ unsigned int& cchConverted)
-{
-    cchConsumed = 0;
-    cchConverted = 0;
-
-    // we can't parse anything if we weren't given any data to parse
-    if (cchBuffer == 0)
-    {
-        return S_OK;
-    }
-    // we shouldn't be parsing if the current codepage isn't UTF8
-    if (_currentCodePage != CP_UTF8)
-    {
-        _currentState = _State::Error;
-    }
-    auto hr = S_OK;
-    try
-    {
-        auto loop = true;
-        unsigned int wideCharCount = 0;
-        _convertedWideChars.reset(nullptr);
-        while (loop)
-        {
-            switch (_currentState)
-            {
-            case _State::Ready:
-                wideCharCount = _ParseFullRange(pBytes, cchBuffer);
-                break;
-            case _State::BeginPartialParse:
-                wideCharCount = _InvolvedParse(pBytes, cchBuffer);
-                break;
-            case _State::Error:
-                hr = E_FAIL;
-                _Reset();
-                wideCharCount = 0;
-                loop = false;
-                break;
-            case _State::Finished:
-                _currentState = _State::Ready;
-                cchConsumed = cchBuffer;
-                loop = false;
-                break;
-            case _State::AwaitingMoreBytes:
-                _currentState = _State::BeginPartialParse;
-                cchConsumed = cchBuffer;
-                loop = false;
-                break;
-            default:
-                _currentState = _State::Error;
-                break;
-            }
-        }
-        converted.swap(_convertedWideChars);
-        cchConverted = wideCharCount;
-    }
-    catch (...)
-    {
-        _Reset();
-        hr = wil::ResultFromCaughtException();
-    }
-    return hr;
-}
-
-// Routine Description:
-// - Determines if ch is a UTF8 lead byte. See _Utf8SequenceSize() for a
-// description of how a lead byte is specified.
-// Arguments:
-// - ch - The byte to test.
-// Return Value:
-// - True if ch is a lead byte, false otherwise.
-bool Utf8ToWideCharParser::_IsLeadByte(_In_ byte ch)
-{
-    auto sequenceSize = _Utf8SequenceSize(ch);
-    return !_IsContinuationByte(ch) &&
-           !_IsAsciiByte(ch) &&
-           sequenceSize > 1 &&
-           sequenceSize <= _UTF8_BYTE_SEQUENCE_MAX;
-}
-
-// Routine Description:
-// - Determines if ch is a UTF8 continuation byte. A continuation byte
-// takes the form 10xx xxxx, so we need to check that the two most
-// significant bits are a 1 followed by a 0.
-// Arguments:
-// - ch - The byte to test
-// Return Value:
-// - True if ch is a continuation byte, false otherwise.
-bool Utf8ToWideCharParser::_IsContinuationByte(_In_ byte ch)
-{
-    return (ch & ContinuationByteMask) == ContinuationBytePrefix;
-}
-
-// Routine Description:
-// - Determines if ch is an ASCII compatible UTF8 byte. A byte is
-// ASCII compatible if the most significant bit is a 0.
-// Arguments:
-// - ch - The byte to test.
-// Return Value:
-// - True if ch is an ASCII compatible byte, false otherwise.
-bool Utf8ToWideCharParser::_IsAsciiByte(_In_ byte ch)
-{
-    return !IsBitSet(ch, NonAsciiBytePrefix);
-}
-
-// Routine Description:
-// - Determines if the sequence starting at pLeadByte is a valid UTF8
-// multi-byte sequence. Note that a single ASCII byte does not count
-// as a valid MULTI-byte sequence.
-// Arguments:
-// - pLeadByte - The start of a possible sequence.
-// - cb - The amount of remaining chars in the array that
-// pLeadByte points to.
-// Return Value:
-// - true if the sequence starting at pLeadByte is a multi-byte
-// sequence and uses all of the remaining chars, false otherwise.
-bool Utf8ToWideCharParser::_IsValidMultiByteSequence(_In_reads_(cb) const byte* const pLeadByte, const unsigned int cb)
-{
-    if (!_IsLeadByte(*pLeadByte))
-    {
-        return false;
-    }
-    const auto sequenceSize = _Utf8SequenceSize(*pLeadByte);
-    if (sequenceSize > cb)
-    {
-        return false;
-    }
-    // i starts at 1 so that we skip the lead byte
-    for (unsigned int i = 1; i < sequenceSize; ++i)
-    {
-        const auto ch = *(pLeadByte + i);
-        if (!_IsContinuationByte(ch))
-        {
-            return false;
-        }
-    }
-    return true;
-}
-
-// Routine Description:
-// - Checks if the sequence starting at pLeadByte is a portion of a
-// single valid multi-byte sequence. A new sequence must not be
-// started within the range provided in order for it to be considered
-// a valid partial sequence.
-// Arguments:
-// - pLeadByte - The start of the possible partial sequence.
-// - cb - The amount of remaining chars in the array that
-// pLeadByte points to.
-// Return Value:
-// - true if the sequence is a single partial multi-byte sequence,
-// false otherwise.
-bool Utf8ToWideCharParser::_IsPartialMultiByteSequence(_In_reads_(cb) const byte* const pLeadByte, const unsigned int cb)
-{
-    if (!_IsLeadByte(*pLeadByte))
-    {
-        return false;
-    }
-    const auto sequenceSize = _Utf8SequenceSize(*pLeadByte);
-    if (sequenceSize <= cb)
-    {
-        return false;
-    }
-    // i starts at 1 so that we skip the lead byte
-    for (unsigned int i = 1; i < cb; ++i)
-    {
-        const auto ch = *(pLeadByte + i);
-        if (!_IsContinuationByte(ch))
-        {
-            return false;
-        }
-    }
-    return true;
-}
-
-// Routine Description:
-// - Determines the number of bytes in the UTF8 multi-byte sequence.
-// Does not perform any verification that ch is a valid lead byte. A
-// lead byte indicates how many bytes are in a sequence by repeating a
-// 1 for each byte in the sequence, starting with the most significant
-// bit, then a 0 directly after. Ex:
-// - 110x xxxx = a two byte sequence
-// - 1110 xxxx = a three byte sequence
-//
-// Note that a byte that has a pattern 10xx xxxx is a continuation
-// byte and will be reported as a sequence of one by this function.
-//
-// A sequence is currently a maximum of four bytes but this function
-// will just count the number of consecutive 1 bits (starting with the
-// most significant bit) so if the byte is malformed (ex. 1111 110x) a
-// number larger than the maximum utf8 byte sequence may be
-// returned. It is the responsibility of the calling function to check
-// this (and the continuation byte scenario) because we don't do any
-// verification here.
-// Arguments:
-// - ch - the lead byte of a UTF8 multi-byte sequence.
-// Return Value:
-// - The number of bytes (including the lead byte) that ch indicates
-// are in the sequence.
-unsigned int Utf8ToWideCharParser::_Utf8SequenceSize(_In_ byte ch)
-{
-    unsigned int msbOnes = 0;
-    while (IsBitSet(ch, MostSignificantBitMask))
-    {
-        ++msbOnes;
-        ch <<= 1;
-    }
-    return msbOnes;
-}
-
-// Routine Description:
-// - Attempts to parse pInputChars by themselves in wide chars,
-// without using any saved partial byte sequences. On success,
-// _convertedWideChars will contain the converted wide char sequence
-// and _currentState will be set to _State::Finished. On failure,
-// _currentState will be set to either _State::Error or
-// _State::BeginPartialParse.
-// Arguments:
-// - pInputChars - The byte sequence to convert to wide chars.
-// - cb - The amount of bytes in pInputChars.
-// Return Value:
-// - The amount of wide chars that are stored in _convertedWideChars,
-// or 0 if pInputChars cannot be successfully converted.
-unsigned int Utf8ToWideCharParser::_ParseFullRange(_In_reads_(cb) const byte* const pInputChars, const unsigned int cb)
-{
-    auto bufferSize = MultiByteToWideChar(_currentCodePage,
-                                          MB_ERR_INVALID_CHARS,
-                                          reinterpret_cast<LPCCH>(pInputChars),
-                                          cb,
-                                          nullptr,
-                                          0);
-    if (bufferSize == 0)
-    {
-        auto err = GetLastError();
-        LOG_WIN32(err);
-        if (err == ERROR_NO_UNICODE_TRANSLATION)
-        {
-            _currentState = _State::BeginPartialParse;
-        }
-        else
-        {
-            _currentState = _State::Error;
-        }
-    }
-    else
-    {
-        _convertedWideChars = std::make_unique<wchar_t[]>(bufferSize);
-        bufferSize = MultiByteToWideChar(_currentCodePage,
-                                         0,
-                                         reinterpret_cast<LPCCH>(pInputChars),
-                                         cb,
-                                         _convertedWideChars.get(),
-                                         bufferSize);
-        if (bufferSize == 0)
-        {
-            LOG_LAST_ERROR();
-            _currentState = _State::Error;
-        }
-        else
-        {
-            _currentState = _State::Finished;
-        }
-    }
-    return bufferSize;
-}
-
-// Routine Description:
-// - Attempts to parse pInputChars in a more complex manner, taking
-// into account any previously saved partial byte sequences while
-// removing any invalid byte sequences. Will also save a partial byte
-// sequence from the end of the sequence if necessary. If the sequence
-// can be successfully parsed, _currentState will be set to
-// _State::Finished. If more bytes are necessary to form a wide char,
-// then _currentState will be set to
-// _State::AwaitingMoreBytes. Otherwise, _currentState will be set to
-// _State::Error.
-// Arguments:
-// - pInputChars - The byte sequence to convert to wide chars.
-// - cb - The amount of bytes in pInputChars.
-// Return Value:
-// - The amount of wide chars that are stored in _convertedWideChars,
-// or 0 if pInputChars cannot be successfully converted or if the
-// parser requires additional bytes before returning a valid wide
-// char.
-unsigned int Utf8ToWideCharParser::_InvolvedParse(_In_reads_(cb) const byte* const pInputChars, const unsigned int cb)
-{
-    // Do safe math to add up the count and error if it won't fit.
-    unsigned int count;
-    const auto hr = UIntAdd(cb, _bytesStored, &count);
-    if (FAILED(hr))
-    {
-        LOG_HR(hr);
-        _currentState = _State::Error;
-        return 0;
-    }
-
-    // Allocate space and copy.
-    auto combinedInputBytes = std::make_unique<byte[]>(count);
-    std::copy(_utf8CodePointPieces, _utf8CodePointPieces + _bytesStored, combinedInputBytes.get());
-    std::copy(pInputChars, pInputChars + cb, combinedInputBytes.get() + _bytesStored);
-    _bytesStored = 0;
-    auto validSequence = _RemoveInvalidSequences(combinedInputBytes.get(), count);
-    // the input may have only been a partial sequence so we need to
-    // check that there are actually any bytes that we can convert
-    // right now
-    if (validSequence.second == 0 && _bytesStored > 0)
-    {
-        _currentState = _State::AwaitingMoreBytes;
-        return 0;
-    }
-
-    // By this point, all obviously invalid sequences have been removed.
-    // But non-minimal forms of sequences might still exist.
-    // MB2WC will fail non-minimal forms with MB_ERR_INVALID_CHARS at this point.
-    // So we call with flags = 0 such that non-minimal forms get the U+FFFD
-    // replacement character treatment.
-    // This issue and related concerns are fully captured in future work item GH#3378
-    // for future cleanup and reconciliation.
-    // The original issue introducing this was GH#3320.
-    auto bufferSize = MultiByteToWideChar(_currentCodePage,
-                                          0,
-                                          reinterpret_cast<LPCCH>(validSequence.first.get()),
-                                          validSequence.second,
-                                          nullptr,
-                                          0);
-    if (bufferSize == 0)
-    {
-        LOG_LAST_ERROR();
-        _currentState = _State::Error;
-    }
-    else
-    {
-        _convertedWideChars = std::make_unique<wchar_t[]>(bufferSize);
-        bufferSize = MultiByteToWideChar(_currentCodePage,
-                                         0,
-                                         reinterpret_cast<LPCCH>(validSequence.first.get()),
-                                         validSequence.second,
-                                         _convertedWideChars.get(),
-                                         bufferSize);
-        if (bufferSize == 0)
-        {
-            LOG_LAST_ERROR();
-            _currentState = _State::Error;
-        }
-        else if (_bytesStored > 0)
-        {
-            _currentState = _State::AwaitingMoreBytes;
-        }
-        else
-        {
-            _currentState = _State::Finished;
-        }
-    }
-    return bufferSize;
-}
-
-// Routine Description:
-// - Reads pInputChars byte by byte, removing any invalid UTF8
-// multi-byte sequences.
-// Arguments:
-// - pInputChars - The byte sequence to fix.
-// - cb - The amount of bytes in pInputChars.
-// Return Value:
-// - A std::pair containing the corrected byte sequence and the number
-// of bytes in the sequence.
-std::pair<std::unique_ptr<byte[]>, unsigned int> Utf8ToWideCharParser::_RemoveInvalidSequences(_In_reads_(cb) const byte* const pInputChars, const unsigned int cb)
-{
-    auto validSequence = std::make_unique<byte[]>(cb);
-    unsigned int validSequenceLocation = 0; // index into validSequence
-    unsigned int currentByteInput = 0; // index into pInputChars
-    while (currentByteInput < cb)
-    {
-        if (_IsAsciiByte(pInputChars[currentByteInput]))
-        {
-            validSequence[validSequenceLocation] = pInputChars[currentByteInput];
-            ++validSequenceLocation;
-            ++currentByteInput;
-        }
-        else if (_IsContinuationByte(pInputChars[currentByteInput]))
-        {
-            while (currentByteInput < cb && _IsContinuationByte(pInputChars[currentByteInput]))
-            {
-                ++currentByteInput;
-            }
-        }
-        else if (_IsLeadByte(pInputChars[currentByteInput]))
-        {
-            if (_IsValidMultiByteSequence(&pInputChars[currentByteInput], cb - currentByteInput))
-            {
-                const auto sequenceSize = _Utf8SequenceSize(pInputChars[currentByteInput]);
-                // min is to guard against static analysis possible buffer overflow
-                const auto limit = std::min(sequenceSize, cb - currentByteInput);
-                for (unsigned int i = 0; i < limit; ++i)
-                {
-                    validSequence[validSequenceLocation] = pInputChars[currentByteInput];
-                    ++validSequenceLocation;
-                    ++currentByteInput;
-                }
-            }
-            else if (_IsPartialMultiByteSequence(&pInputChars[currentByteInput], cb - currentByteInput))
-            {
-                _StorePartialSequence(&pInputChars[currentByteInput], cb - currentByteInput);
-                break;
-            }
-            else
-            {
-                ++currentByteInput;
-                while (currentByteInput < cb && _IsContinuationByte(pInputChars[currentByteInput]))
-                {
-                    ++currentByteInput;
-                }
-            }
-        }
-        else
-        {
-            // invalid byte, skip it.
-            ++currentByteInput;
-        }
-    }
-    return std::make_pair<std::unique_ptr<byte[]>, unsigned int>(std::move(validSequence), std::move(validSequenceLocation));
-}
-
-// Routine Description:
-// - Stores a partial byte sequence for later use. Will overwrite any
-// previously saved sequence. Will only store bytes up to the limit
-// Utf8ToWideCharParser::_UTF8_BYTE_SEQUENCE_MAX.
-// Arguments:
-// - pLeadByte - The beginning of the sequence to save.
-// - cb - The amount of bytes to save.
-// Return Value:
-// - <none>
-void Utf8ToWideCharParser::_StorePartialSequence(_In_reads_(cb) const byte* const pLeadByte, const unsigned int cb)
-{
-    const auto maxLength = std::min(cb, _UTF8_BYTE_SEQUENCE_MAX);
-    std::copy(pLeadByte, pLeadByte + maxLength, _utf8CodePointPieces);
-    _bytesStored = maxLength;
-}
-
-// Routine Description:
-// - Resets the state of the parser to that of a newly initialized
-// instance. _currentCodePage is not affected.
-// Arguments:
-// - <none>
-// Return Value:
-// - <none>
-void Utf8ToWideCharParser::_Reset()
-{
-    _currentState = _State::Ready;
-    _bytesStored = 0;
-    _convertedWideChars.reset(nullptr);
-}
--- a/src/host/utf8ToWideCharParser.hpp
+++ b/src/host/utf8ToWideCharParser.hpp
@@ -1,64 +0,0 @@
-/*++
-Copyright (c) Microsoft Corporation
-Licensed under the MIT license.
-
-Module Name:
- utf8ToWideCharParser.hpp
-
-Abstract:
- This transforms a multi-byte character sequence into wide chars
- It will attempt to work around invalid byte sequences
- Partial byte sequences are supported
-
-Author(s):
- Austin Diviness (AustDi) 16-August-2016
--*/
-
-#pragma once
-
-class Utf8ToWideCharParser final
-{
-public:
-    Utf8ToWideCharParser(const unsigned int codePage);
-    void SetCodePage(const unsigned int codePage);
-    [[nodiscard]] HRESULT Parse(_In_reads_(cchBuffer) const byte* const pBytes,
-                                _In_ const unsigned int cchBuffer,
-                                _Out_ unsigned int& cchConsumed,
-                                _Inout_ std::unique_ptr<wchar_t[]>& converted,
-                                _Out_ unsigned int& cchConverted);
-
-private:
-    enum class _State
-    {
-        Ready, // ready for input, no partially parsed code points
-        Error, // error in parsing given bytes
-        BeginPartialParse, // not a clean byte sequence, needs involved parsing
-        AwaitingMoreBytes, // have a partial sequence saved, waiting for the rest of it
-        Finished // ready to return a wide char sequence
-    };
-
-    bool _IsLeadByte(_In_ byte ch);
-    bool _IsContinuationByte(_In_ byte ch);
-    bool _IsAsciiByte(_In_ byte ch);
-    bool _IsValidMultiByteSequence(_In_reads_(cb) const byte* const pLeadByte, const unsigned int cb);
-    bool _IsPartialMultiByteSequence(_In_reads_(cb) const byte* const pLeadByte, const unsigned int cb);
-    unsigned int _Utf8SequenceSize(_In_ byte ch);
-    unsigned int _ParseFullRange(_In_reads_(cb) const byte* const _InputChars, const unsigned int cb);
-    unsigned int _InvolvedParse(_In_reads_(cb) const byte* const pInputChars, const unsigned int cb);
-    std::pair<std::unique_ptr<byte[]>, unsigned int> _RemoveInvalidSequences(_In_reads_(cb) const byte* const pInputChars,
-                                                                             const unsigned int cb);
-    void _StorePartialSequence(_In_reads_(cb) const byte* const pLeadByte, const unsigned int cb);
-    void _Reset();
-
-    static const unsigned int _UTF8_BYTE_SEQUENCE_MAX = 4;
-
-    byte _utf8CodePointPieces[_UTF8_BYTE_SEQUENCE_MAX];
-    unsigned int _bytesStored; // bytes stored in utf8CodePointPieces
-    unsigned int _currentCodePage;
-    std::unique_ptr<wchar_t[]> _convertedWideChars;
-    _State _currentState;
-
-#ifdef UNIT_TESTING
-    friend class Utf8ToWideCharParserTests;
-#endif
-};