From f20cd3a9d358f28650a69f9cd56eae977aa2543c Mon Sep 17 00:00:00 2001 From: Leonard Hecker Date: Fri, 24 Mar 2023 23:20:53 +0100 Subject: [PATCH] Add an efficient text stream write function (#14821) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit This adds PR adds a couple foundational functions and classes to make our TextBuffer more performant and allow us to improve our Unicode correctness in the future, by getting rid of our dependence on `OutputCellIterator`. In the future we can then replace the simple UTF-16 code point iterator with a proper grapheme cluster iterator. While my focus is technically on Unicode correctness, the ~4x VT throughput increase in OpenConsole is pretty nice too. This PR adds: * A new, simpler ROW iterator (unused in this PR) * Cursor movement functions (`NavigateToPrevious`, `NavigateToNext`) They're based on functions that align the cursor to the start/end of the _current_ cell, so such functions can be added as well. * `ReplaceText` to write a raw string of text with the possibility to specify a right margin. * `CopyRangeFrom` will allow us to make reflow much faster, as it's able to bulk-copy already measured strings without re-measuring them. Related to #8000 ## Validation Steps Performed * enwik8.txt, zhwik8.txt, emoji-test.txt, all work with proper wide glyph reflow at the end of a row ✅ * This produces "a 咪" where only "a" has a white background: ```sh printf '\e7こん\e8\x1b[107ma\x1b[m\n' ``` * This produces "abん": ```sh stdbuf -o0 printf '\x1b7こん\x1b8a'; printf 'b\n' ``` * This produces "xy" at the end of the line: ```sh stdbuf -o0 printf '\e[999C\bこ\bx'; printf 'y\n' ``` * This produces red whitespace followed by "こ " in the default background color at the end of the line, and "ん" on the next line: ```sh printf '\e[41m\e[K\e[m\e[999C\e[2Dこん\n' ``` --- .github/actions/spelling/expect/alphabet.txt | 1 + .github/actions/spelling/expect/expect.txt | 2 - src/buffer/out/Row.cpp | 367 +++++++++++++++---- src/buffer/out/Row.hpp | 80 +++- src/buffer/out/precomp.h | 42 +-- src/buffer/out/textBuffer.cpp | 26 ++ src/buffer/out/textBuffer.hpp | 3 + src/cascadia/LocalTests_SettingsModel/pch.h | 5 +- src/host/ut_host/ScreenBufferTests.cpp | 10 + src/host/ut_host/TextBufferTests.cpp | 82 +++++ src/inc/LibraryIncludes.h | 18 +- src/inc/test/CommonState.hpp | 43 +-- src/inc/til/point.h | 2 + src/inc/til/rle.h | 9 + src/inc/til/unicode.h | 24 ++ src/terminal/adapter/adaptDispatch.cpp | 72 ++-- src/til/ut_til/RunLengthEncodingTests.cpp | 2 +- tools/ConsoleTypes.natvis | 27 +- 18 files changed, 582 insertions(+), 233 deletions(-) diff --git a/.github/actions/spelling/expect/alphabet.txt b/.github/actions/spelling/expect/alphabet.txt index 23933713a4..68b264a0b1 100644 --- a/.github/actions/spelling/expect/alphabet.txt +++ b/.github/actions/spelling/expect/alphabet.txt @@ -18,6 +18,7 @@ BBBBBBBB BBBBBCCC BBBBCCCCC BBGGRR +efg EFG EFGh QQQQQQQQQQABCDEFGHIJ diff --git a/.github/actions/spelling/expect/expect.txt b/.github/actions/spelling/expect/expect.txt index f73deba781..6acfcf2170 100644 --- a/.github/actions/spelling/expect/expect.txt +++ b/.github/actions/spelling/expect/expect.txt @@ -2280,8 +2280,6 @@ xunit xutr XVIRTUALSCREEN XWalk -xwwyzz -xxyyzz yact YCast YCENTER diff --git a/src/buffer/out/Row.cpp b/src/buffer/out/Row.cpp index 7943c0c98a..38a82857cf 100644 --- a/src/buffer/out/Row.cpp +++ b/src/buffer/out/Row.cpp @@ -4,7 +4,10 @@ #include "precomp.h" #include "Row.hpp" +#include + #include "textBuffer.hpp" +#include "../../types/inc/GlyphWidth.hpp" // The STL is missing a std::iota_n analogue for std::iota, so I made my own. template @@ -229,6 +232,40 @@ void ROW::TransferAttributes(const til::small_rle& a _attr.resize_trailing_extent(gsl::narrow(newWidth)); } +// Returns the previous possible cursor position, preceding the given column. +// Returns 0 if column is less than or equal to 0. +til::CoordType ROW::NavigateToPrevious(til::CoordType column) const noexcept +{ + return _adjustBackward(_clampedColumn(column - 1)); +} + +// Returns the next possible cursor position, following the given column. +// Returns the row width if column is beyond the width of the row. +til::CoordType ROW::NavigateToNext(til::CoordType column) const noexcept +{ + return _adjustForward(_clampedColumn(column + 1)); +} + +uint16_t ROW::_adjustBackward(uint16_t column) const noexcept +{ + // Safety: This is a little bit more dangerous. The first column is supposed + // to never be a trailer and so this loop should exit if column == 0. + for (; _uncheckedIsTrailer(column); --column) + { + } + return column; +} + +uint16_t ROW::_adjustForward(uint16_t column) const noexcept +{ + // Safety: This is a little bit more dangerous. The last column is supposed + // to never be a trailer and so this loop should exit if column == _columnCount. + for (; _uncheckedIsTrailer(column); ++column) + { + } + return column; +} + // Routine Description: // - clears char data in column in row // Arguments: @@ -375,90 +412,245 @@ void ROW::ReplaceAttributes(const til::CoordType beginIndex, const til::CoordTyp _attr.replace(_clampedColumnInclusive(beginIndex), _clampedColumnInclusive(endIndex), newAttr); } -void ROW::ReplaceCharacters(til::CoordType columnBegin, til::CoordType width, const std::wstring_view& chars) +[[msvc::forceinline]] ROW::WriteHelper::WriteHelper(ROW& row, til::CoordType columnBegin, til::CoordType columnLimit, const std::wstring_view& chars) noexcept : + row{ row }, + chars{ chars } { - const auto colBeg = _clampedUint16(columnBegin); - const auto colEnd = _clampedUint16(columnBegin + width); + colBeg = row._clampedColumnInclusive(columnBegin); + colLimit = row._clampedColumnInclusive(columnLimit); + chBegDirty = row._uncheckedCharOffset(colBeg); + colBegDirty = row._adjustBackward(colBeg); + leadingSpaces = colBeg - colBegDirty; + chBeg = chBegDirty + leadingSpaces; + colEnd = colBeg; + colEndDirty = 0; + charsConsumed = 0; +} - if (colBeg >= colEnd || colEnd > _columnCount || chars.empty()) +[[msvc::forceinline]] bool ROW::WriteHelper::IsValid() const noexcept +{ + return colBeg < colLimit && !chars.empty(); +} + +void ROW::ReplaceCharacters(til::CoordType columnBegin, til::CoordType width, const std::wstring_view& chars) +try +{ + WriteHelper h{ *this, columnBegin, _columnCount, chars }; + if (!h.IsValid()) { return; } + h.ReplaceCharacters(width); + h.Finish(); +} +catch (...) +{ + // Due to this function writing _charOffsets first, then calling _resizeChars (which may throw) and only then finally + // filling in _chars, we might end up in a situation were _charOffsets contains offsets outside of the _chars array. + // --> Restore this row to a known "okay"-state. + Reset(TextAttribute{}); + throw; +} - // Safety: - // * colBeg is now [0, _columnCount) - // * colEnd is now (colBeg, _columnCount] +[[msvc::forceinline]] void ROW::WriteHelper::ReplaceCharacters(til::CoordType width) noexcept +{ + const auto colEndNew = gsl::narrow_cast(colEnd + width); + if (colEndNew > colLimit) + { + colEndDirty = colLimit; + } + else + { + til::at(row._charOffsets, colEnd++) = chBeg; + for (; colEnd < colEndNew; ++colEnd) + { + til::at(row._charOffsets, colEnd) = gsl::narrow_cast(chBeg | CharOffsetsTrailer); + } - // Algorithm explanation + colEndDirty = colEnd; + charsConsumed = chars.size(); + } +} + +void ROW::ReplaceText(RowWriteState& state) +try +{ + WriteHelper h{ *this, state.columnBegin, state.columnLimit, state.text }; + if (!h.IsValid()) + { + state.columnEnd = h.colBeg; + state.columnBeginDirty = h.colBeg; + state.columnEndDirty = h.colBeg; + return; + } + h.ReplaceText(); + h.Finish(); + + state.text = state.text.substr(h.charsConsumed); + // Here's why we set `state.columnEnd` to `colLimit` if there's remaining text: + // Callers should be able to use `state.columnEnd` as the next cursor position, as well as the parameter for a + // follow-up call to ReplaceAttributes(). But if we fail to insert a wide glyph into the last column of a row, + // that last cell (which now contains padding whitespace) should get the same attributes as the rest of the + // string so that the row looks consistent. This requires us to return `colLimit` instead of `colLimit - 1`. + // Additionally, this has the benefit that callers can detect line wrapping by checking `columnEnd >= columnLimit`. + state.columnEnd = state.text.empty() ? h.colEnd : h.colLimit; + state.columnBeginDirty = h.colBegDirty; + state.columnEndDirty = h.colEndDirty; +} +catch (...) +{ + Reset(TextAttribute{}); + throw; +} + +[[msvc::forceinline]] void ROW::WriteHelper::ReplaceText() noexcept +{ + size_t ch = chBeg; + + for (const auto& s : til::utf16_iterator{ chars }) + { + const auto wide = til::at(s, 0) < 0x80 ? false : IsGlyphFullWidth(s); + const auto colEndNew = gsl::narrow_cast(colEnd + 1u + wide); + if (colEndNew > colLimit) + { + colEndDirty = colLimit; + break; + } + + til::at(row._charOffsets, colEnd++) = gsl::narrow_cast(ch); + if (wide) + { + til::at(row._charOffsets, colEnd++) = gsl::narrow_cast(ch | CharOffsetsTrailer); + } + + colEndDirty = colEnd; + ch += s.size(); + } + + charsConsumed = ch - chBeg; +} + +til::CoordType ROW::CopyRangeFrom(til::CoordType columnBegin, til::CoordType columnLimit, const ROW& other, til::CoordType& otherBegin, til::CoordType otherLimit) +try +{ + const auto otherColBeg = other._clampedColumnInclusive(otherBegin); + const auto otherColLimit = other._clampedColumnInclusive(otherLimit); + std::span charOffsets; + std::wstring_view chars; + + if (otherColBeg < otherColLimit) + { + charOffsets = other._charOffsets.subspan(otherColBeg, static_cast(otherColLimit) - otherColBeg + 1); + const auto charsOffset = charOffsets.front() & CharOffsetsMask; + // We _are_ using span. But C++ decided that string_view and span aren't convertible. + // _chars is a std::span for performance and because it refers to raw, shared memory. +#pragma warning(suppress : 26481) // Don't use pointer arithmetic. Use span instead (bounds.1). + chars = { other._chars.data() + charsOffset, other._chars.size() - charsOffset }; + } + + WriteHelper h{ *this, columnBegin, columnLimit, chars }; + if (!h.IsValid()) + { + return h.colBeg; + } + // Any valid charOffsets array is at least 2 elements long (the 1st element is the start offset and the 2nd + // element is the length of the first glyph) and begins/ends with a non-trailer offset. We don't really + // need to test for the end offset, since `WriteHelper::WriteWithOffsets` already takes care of that. + if (charOffsets.size() < 2 || WI_IsFlagSet(charOffsets.front(), CharOffsetsTrailer)) + { + assert(false); + otherBegin = other.size(); + return h.colBeg; + } + h.CopyRangeFrom(charOffsets); + h.Finish(); + + otherBegin += h.colEnd - h.colBeg; + return h.colEndDirty; +} +catch (...) +{ + Reset(TextAttribute{}); + throw; +} + +[[msvc::forceinline]] void ROW::WriteHelper::CopyRangeFrom(const std::span& charOffsets) noexcept +{ + // Since our `charOffsets` input is already in columns (just like the `ROW::_charOffsets`), + // we can directly look up the end char-offset, but... + const auto colEndDirtyInput = std::min(gsl::narrow_cast(colLimit - colBeg), gsl::narrow(charOffsets.size() - 1)); + + // ...since the colLimit might intersect with a wide glyph in `charOffset`, we need to adjust our input-colEnd. + auto colEndInput = colEndDirtyInput; + for (; WI_IsFlagSet(til::at(charOffsets, colEndInput), CharOffsetsTrailer); --colEndInput) + { + } + + const auto baseOffset = til::at(charOffsets, 0); + const auto endOffset = til::at(charOffsets, colEndInput); + const auto inToOutOffset = gsl::narrow_cast(chBeg - baseOffset); + + // Now with the `colEndInput` figured out, we can easily copy the `charOffsets` into the `_charOffsets`. + // It's possible to use SIMD for this loop for extra perf gains. Something like this for SSE2 (~8x faster): + // const auto in = _mm_loadu_si128(...); + // const auto off = _mm_and_epi32(in, _mm_set1_epi16(CharOffsetsMask)); + // const auto trailer = _mm_and_epi32(in, _mm_set1_epi16(CharOffsetsTrailer)); + // const auto out = _mm_or_epi32(_mm_add_epi16(off, _mm_set1_epi16(inToOutOffset)), trailer); + // _mm_store_si128(..., out); + for (uint16_t i = 0; i < colEndInput; ++i, ++colEnd) + { + const auto ch = til::at(charOffsets, i); + const auto off = ch & CharOffsetsMask; + const auto trailer = ch & CharOffsetsTrailer; + til::at(row._charOffsets, colEnd) = gsl::narrow_cast((off + inToOutOffset) | trailer); + } + + colEndDirty = gsl::narrow_cast(colBeg + colEndDirtyInput); + charsConsumed = endOffset - baseOffset; +} + +[[msvc::forceinline]] void ROW::WriteHelper::Finish() +{ + colEndDirty = row._adjustForward(colEndDirty); + + const uint16_t trailingSpaces = colEndDirty - colEnd; + const auto chEndDirtyOld = row._uncheckedCharOffset(colEndDirty); + const auto chEndDirty = chBegDirty + charsConsumed + leadingSpaces + trailingSpaces; + + if (chEndDirty != chEndDirtyOld) + { + row._resizeChars(colEndDirty, chBegDirty, chEndDirty, chEndDirtyOld); + } + + { + // std::copy_n compiles to memmove. We can do better. It also gets rid of an extra branch, + // because std::copy_n avoids calling memmove if the count is 0. It's never 0 for us. + const auto itBeg = row._chars.begin() + chBeg; + memcpy(&*itBeg, chars.data(), charsConsumed * sizeof(wchar_t)); + + if (leadingSpaces) + { + fill_n_small(row._chars.begin() + chBegDirty, leadingSpaces, L' '); + iota_n(row._charOffsets.begin() + colBegDirty, leadingSpaces, chBegDirty); + } + if (trailingSpaces) + { + fill_n_small(itBeg + charsConsumed, trailingSpaces, L' '); + iota_n(row._charOffsets.begin() + colEnd, trailingSpaces, gsl::narrow_cast(chBeg + charsConsumed)); + } + } + + // This updates `_doubleBytePadded` whenever we write the last column in the row. `_doubleBytePadded` tells our text + // reflow algorithm whether it should ignore the last column. This is important when writing wide characters into + // the terminal: If the last wide character in a row only fits partially, we should render whitespace, but + // during text reflow pretend as if no whitespace exists. After all, the user didn't write any whitespace there. // - // Task: - // Replace the characters in cells [colBeg, colEnd) with a single `width`-wide glyph consisting of `chars`. - // - // Problem: - // Imagine that we have the following ROW contents: - // "xxyyzz" - // xx, yy, zz are 2 cell wide glyphs. We want to insert a 2 cell wide glyph ww at colBeg 1: - // ^^ - // ww - // An incorrect result would be: - // "xwwyzz" - // The half cut off x and y glyph wouldn't make much sense, so we need to fill them with whitespace: - // " ww zz" - // - // Solution: - // Given the range we want to replace [colBeg, colEnd), we "extend" it to encompass leading (preceding) - // and trailing wide glyphs we partially overwrite resulting in the range [colExtBeg, colExtEnd), where - // colExtBeg <= colBeg and colExtEnd >= colEnd. In other words, the to be replaced range has been "extended". - // The amount of leading whitespace we need to insert is thus colBeg - colExtBeg - // and the amount of trailing whitespace colExtEnd - colEnd. - - // Extend range downwards (leading whitespace) - uint16_t colExtBeg = colBeg; - // Safety: colExtBeg is [0, _columnCount], because colBeg is. - const uint16_t chExtBeg = _uncheckedCharOffset(colExtBeg); - // Safety: colExtBeg remains [0, _columnCount] due to colExtBeg != 0. - for (; colExtBeg != 0 && _uncheckedIsTrailer(colExtBeg); --colExtBeg) + // The way this is written, it'll set `_doubleBytePadded` to `true` no matter whether a wide character didn't fit, + // or if the last 2 columns contain a wide character and a narrow character got written into the left half of it. + // In both cases `trailingSpaces` is 1 and fills the last column and `_doubleBytePadded` will be `true`. + if (colEndDirty == row._columnCount) { - } - - // Extend range upwards (trailing whitespace) - uint16_t colExtEnd = colEnd; - // Safety: colExtEnd cannot be incremented past _columnCount, because the last - // _charOffset at index _columnCount will never get the CharOffsetsTrailer flag. - for (; _uncheckedIsTrailer(colExtEnd); ++colExtEnd) - { - } - // Safety: After the previous loop colExtEnd is [0, _columnCount]. - const uint16_t chExtEnd = _uncheckedCharOffset(colExtEnd); - - const uint16_t leadingSpaces = colBeg - colExtBeg; - const uint16_t trailingSpaces = colExtEnd - colEnd; - const size_t chExtEndNew = chars.size() + leadingSpaces + trailingSpaces + chExtBeg; - - if (chExtEndNew != chExtEnd) - { - _resizeChars(colExtEnd, chExtBeg, chExtEnd, chExtEndNew); - } - - // Add leading/trailing whitespace and copy chars - { - auto it = _chars.begin() + chExtBeg; - it = fill_n_small(it, leadingSpaces, L' '); - it = copy_n_small(chars.begin(), chars.size(), it); - it = fill_n_small(it, trailingSpaces, L' '); - } - // Update char offsets with leading/trailing whitespace and the chars columns. - { - auto chPos = chExtBeg; - auto it = _charOffsets.begin() + colExtBeg; - - it = iota_n_mut(it, leadingSpaces, chPos); - - *it++ = chPos; - it = fill_small(it, _charOffsets.begin() + colEnd, gsl::narrow_cast(chPos | CharOffsetsTrailer)); - chPos = gsl::narrow_cast(chPos + chars.size()); - - it = iota_n_mut(it, trailingSpaces, chPos); + row.SetDoubleBytePadded(colEnd < row._columnCount); } } @@ -466,15 +658,15 @@ void ROW::ReplaceCharacters(til::CoordType columnBegin, til::CoordType width, co // as it reallocates the backing buffer and shifts the char offsets. // The parameters are difficult to explain, but their names are identical to // local variables in ReplaceCharacters() which I've attempted to document there. -void ROW::_resizeChars(uint16_t colExtEnd, uint16_t chExtBeg, uint16_t chExtEnd, size_t chExtEndNew) +void ROW::_resizeChars(uint16_t colEndDirty, uint16_t chBegDirty, size_t chEndDirty, uint16_t chEndDirtyOld) { - const auto diff = chExtEndNew - chExtEnd; + const auto diff = chEndDirty - chEndDirtyOld; const auto currentLength = _charSize(); const auto newLength = currentLength + diff; if (newLength <= _chars.size()) { - std::copy_n(_chars.begin() + chExtEnd, currentLength - chExtEnd, _chars.begin() + chExtEndNew); + std::copy_n(_chars.begin() + chEndDirtyOld, currentLength - chEndDirtyOld, _chars.begin() + chEndDirty); } else { @@ -484,14 +676,14 @@ void ROW::_resizeChars(uint16_t colExtEnd, uint16_t chExtBeg, uint16_t chExtEnd, auto charsHeap = std::make_unique_for_overwrite(newCapacity); const std::span chars{ charsHeap.get(), newCapacity }; - std::copy_n(_chars.begin(), chExtBeg, chars.begin()); - std::copy_n(_chars.begin() + chExtEnd, currentLength - chExtEnd, chars.begin() + chExtEndNew); + std::copy_n(_chars.begin(), chBegDirty, chars.begin()); + std::copy_n(_chars.begin() + chEndDirtyOld, currentLength - chEndDirtyOld, chars.begin() + chEndDirty); _charsHeap = std::move(charsHeap); _chars = chars; } - auto it = _charOffsets.begin() + colExtEnd; + auto it = _charOffsets.begin() + colEndDirty; const auto end = _charOffsets.end(); for (; it != end; ++it) { @@ -499,6 +691,11 @@ void ROW::_resizeChars(uint16_t colExtEnd, uint16_t chExtBeg, uint16_t chExtEnd, } } +til::small_rle& ROW::Attributes() noexcept +{ + return _attr; +} + const til::small_rle& ROW::Attributes() const noexcept { return _attr; @@ -527,6 +724,12 @@ uint16_t ROW::size() const noexcept return _columnCount; } +til::CoordType ROW::LineRenditionColumns() const noexcept +{ + const auto scale = _lineRendition != LineRendition::SingleWidth ? 1 : 0; + return _columnCount >> scale; +} + til::CoordType ROW::MeasureLeft() const noexcept { const auto text = GetText(); @@ -681,11 +884,13 @@ uint16_t ROW::_charSize() const noexcept // Safety: col must be [0, _columnCount]. uint16_t ROW::_uncheckedCharOffset(size_t col) const noexcept { + assert(col < _charOffsets.size()); return til::at(_charOffsets, col) & CharOffsetsMask; } // Safety: col must be [0, _columnCount]. bool ROW::_uncheckedIsTrailer(size_t col) const noexcept { + assert(col < _charOffsets.size()); return WI_IsFlagSet(til::at(_charOffsets, col), CharOffsetsTrailer); } diff --git a/src/buffer/out/Row.hpp b/src/buffer/out/Row.hpp index 7393bf6709..1e14c382fc 100644 --- a/src/buffer/out/Row.hpp +++ b/src/buffer/out/Row.hpp @@ -20,8 +20,6 @@ Revision History: #pragma once -#include - #include #include "LineRendition.hpp" @@ -37,6 +35,28 @@ enum class DelimiterClass RegularChar }; +struct RowWriteState +{ + // The text you want to write into the given ROW. When ReplaceText() returns, + // this is updated to remove all text from the beginning that was successfully written. + std::wstring_view text; // IN/OUT + // The column at which to start writing. + til::CoordType columnBegin = 0; // IN + // The first column which should not be written to anymore. + til::CoordType columnLimit = 0; // IN + + // The column 1 past the last glyph that was successfully written into the row. If you need to call + // ReplaceAttributes() to colorize the written range, etc., this is the columnEnd parameter you want. + // If you want to continue writing where you left off, this is also the next columnBegin parameter. + til::CoordType columnEnd = 0; // OUT + // The first column that got modified by this write operation. In case that the first glyph we write overwrites + // the trailing half of a wide glyph, leadingSpaces will be 1 and this value will be 1 less than colBeg. + til::CoordType columnBeginDirty = 0; // OUT + // This is 1 past the last column that was modified and will be 1 past columnEnd if we overwrote + // the leading half of a wide glyph and had to fill the trailing half with whitespace. + til::CoordType columnEndDirty = 0; // OUT +}; + class ROW final { public: @@ -62,16 +82,23 @@ public: void Resize(wchar_t* charsBuffer, uint16_t* charOffsetsBuffer, uint16_t rowWidth, const TextAttribute& fillAttribute); void TransferAttributes(const til::small_rle& attr, til::CoordType newWidth); + til::CoordType NavigateToPrevious(til::CoordType column) const noexcept; + til::CoordType NavigateToNext(til::CoordType column) const noexcept; + void ClearCell(til::CoordType column); OutputCellIterator WriteCells(OutputCellIterator it, til::CoordType columnBegin, std::optional wrap = std::nullopt, std::optional limitRight = std::nullopt); bool SetAttrToEnd(til::CoordType columnBegin, TextAttribute attr); void ReplaceAttributes(til::CoordType beginIndex, til::CoordType endIndex, const TextAttribute& newAttr); void ReplaceCharacters(til::CoordType columnBegin, til::CoordType width, const std::wstring_view& chars); + void ReplaceText(RowWriteState& state); + til::CoordType CopyRangeFrom(til::CoordType columnBegin, til::CoordType columnLimit, const ROW& other, til::CoordType& otherBegin, til::CoordType otherLimit); + til::small_rle& Attributes() noexcept; const til::small_rle& Attributes() const noexcept; TextAttribute GetAttrByColumn(til::CoordType column) const; std::vector GetHyperlinks() const; uint16_t size() const noexcept; + til::CoordType LineRenditionColumns() const noexcept; til::CoordType MeasureLeft() const noexcept; til::CoordType MeasureRight() const noexcept; bool ContainsText() const noexcept; @@ -89,6 +116,50 @@ public: #endif private: + // WriteHelper exists because other forms of abstracting this functionality away (like templates with lambdas) + // where only very poorly optimized by MSVC as it failed to inline the templates. + struct WriteHelper + { + explicit WriteHelper(ROW& row, til::CoordType columnBegin, til::CoordType columnLimit, const std::wstring_view& chars) noexcept; + bool IsValid() const noexcept; + void ReplaceCharacters(til::CoordType width) noexcept; + void ReplaceText() noexcept; + void CopyRangeFrom(const std::span& charOffsets) noexcept; + void Finish(); + + // Parent pointer. + ROW& row; + // The text given by the caller. + const std::wstring_view& chars; + + // This is the same as the columnBegin parameter for ReplaceText(), etc., + // but clamped to a valid range via _clampedColumnInclusive. + uint16_t colBeg; + // This is the same as the columnLimit parameter for ReplaceText(), etc., + // but clamped to a valid range via _clampedColumnInclusive. + uint16_t colLimit; + + // The column 1 past the last glyph that was successfully written into the row. If you need to call + // ReplaceAttributes() to colorize the written range, etc., this is the columnEnd parameter you want. + // If you want to continue writing where you left off, this is also the next columnBegin parameter. + uint16_t colEnd; + // The first column that got modified by this write operation. In case that the first glyph we write overwrites + // the trailing half of a wide glyph, leadingSpaces will be 1 and this value will be 1 less than colBeg. + uint16_t colBegDirty; + // Similar to dirtyBeg, this is 1 past the last column that was modified and will be 1 past colEnd if + // we overwrote the leading half of a wide glyph and had to fill the trailing half with whitespace. + uint16_t colEndDirty; + // The offset in ROW::chars at which we start writing the contents of WriteHelper::chars. + uint16_t chBeg; + // The offset at which we start writing leadingSpaces-many whitespaces. + uint16_t chBegDirty; + // The same as `colBeg - colBegDirty`. This is the amount of whitespace + // we write at chBegDirty, before the actual WriteHelper::chars content. + uint16_t leadingSpaces; + // The amount of characters copied from WriteHelper::chars. + size_t charsConsumed; + }; + // To simplify the detection of wide glyphs, we don't just store the simple character offset as described // for _charOffsets. Instead we use the most significant bit to indicate whether any column is the // trailing half of a wide glyph. This simplifies many implementation details via _uncheckedIsTrailer. @@ -102,13 +173,16 @@ private: template constexpr uint16_t _clampedColumnInclusive(T v) const noexcept; + uint16_t _adjustBackward(uint16_t column) const noexcept; + uint16_t _adjustForward(uint16_t column) const noexcept; + wchar_t _uncheckedChar(size_t off) const noexcept; uint16_t _charSize() const noexcept; uint16_t _uncheckedCharOffset(size_t col) const noexcept; bool _uncheckedIsTrailer(size_t col) const noexcept; void _init() noexcept; - void _resizeChars(uint16_t colExtEnd, uint16_t chExtBeg, uint16_t chExtEnd, size_t chExtEndNew); + void _resizeChars(uint16_t colEndDirty, uint16_t chBegDirty, size_t chEndDirty, uint16_t chEndDirtyOld); // These fields are a bit "wasteful", but it makes all this a bit more robust against // programming errors during initial development (which is when this comment was written). diff --git a/src/buffer/out/precomp.h b/src/buffer/out/precomp.h index 3ae7439f78..2ecc2c00d1 100644 --- a/src/buffer/out/precomp.h +++ b/src/buffer/out/precomp.h @@ -1,41 +1,7 @@ -/*++ -Copyright (c) Microsoft Corporation -Licensed under the MIT license. - -Module Name: -- precomp.h - -Abstract: -- Contains external headers to include in the precompile phase of console build process. -- Avoid including internal project headers. Instead include them only in the classes that need them (helps with test project building). ---*/ - -// stdafx.h : include file for standard system include files, -// or project specific include files that are used frequently, but -// are changed infrequently -// - +// Copyright (c) Microsoft Corporation. +// Licensed under the MIT license. #pragma once -// clang-format off +#include -// This includes support libraries from the CRT, STL, WIL, and GSL -#include "LibraryIncludes.h" - -#pragma warning(push) -#ifndef WIN32_LEAN_AND_MEAN -#define WIN32_LEAN_AND_MEAN // Exclude rarely-used stuff from Windows headers -#define NOMCX -#define NOHELP -#define NOCOMM -#endif - -// Windows Header Files: -#include -#include - -// private dependencies -#include "../inc/unicode.hpp" -#pragma warning(pop) - -// clang-format on +#include diff --git a/src/buffer/out/textBuffer.cpp b/src/buffer/out/textBuffer.cpp index 2bbc2e2072..8b8f9d1f79 100644 --- a/src/buffer/out/textBuffer.cpp +++ b/src/buffer/out/textBuffer.cpp @@ -376,6 +376,32 @@ bool TextBuffer::_PrepareForDoubleByteSequence(const DbcsAttribute dbcsAttribute return fSuccess; } +void TextBuffer::ConsumeGrapheme(std::wstring_view& chars) noexcept +{ + // This function is supposed to mirror the behavior of ROW::Write, when it reads characters off of `chars`. + // (I know that a UTF-16 code point is not a grapheme, but that's what we're working towards.) + chars = til::utf16_pop(chars); +} + +// This function is intended for writing regular "lines" of text and only the `state.text` and`state.columnBegin` +// fields are being used, whereas `state.columnLimit` is automatically overwritten by the line width of the given row. +// This allows this function to automatically set the wrap-forced field of the row, which is also the return value. +// The return value indicates to the caller whether the cursor should be moved to the next line. +void TextBuffer::WriteLine(til::CoordType row, bool wrapAtEOL, const TextAttribute& attributes, RowWriteState& state) +{ + auto& r = GetRowByOffset(row); + + r.ReplaceText(state); + r.ReplaceAttributes(state.columnBegin, state.columnEnd, attributes); + + if (state.columnEnd >= state.columnLimit) + { + r.SetWrapForced(wrapAtEOL); + } + + TriggerRedraw(Viewport::FromExclusive({ state.columnBeginDirty, row, state.columnEndDirty, row + 1 })); +} + // Routine Description: // - Writes cells to the output buffer. Writes at the cursor. // Arguments: diff --git a/src/buffer/out/textBuffer.hpp b/src/buffer/out/textBuffer.hpp index c7e57da058..fe27d4ef00 100644 --- a/src/buffer/out/textBuffer.hpp +++ b/src/buffer/out/textBuffer.hpp @@ -89,6 +89,9 @@ public: TextBufferTextIterator GetTextDataAt(const til::point at, const Microsoft::Console::Types::Viewport limit) const; // Text insertion functions + static void ConsumeGrapheme(std::wstring_view& chars) noexcept; + void WriteLine(til::CoordType row, bool wrapAtEOL, const TextAttribute& attributes, RowWriteState& state); + OutputCellIterator Write(const OutputCellIterator givenIt); OutputCellIterator Write(const OutputCellIterator givenIt, diff --git a/src/cascadia/LocalTests_SettingsModel/pch.h b/src/cascadia/LocalTests_SettingsModel/pch.h index b2e15b5a7b..9672725248 100644 --- a/src/cascadia/LocalTests_SettingsModel/pch.h +++ b/src/cascadia/LocalTests_SettingsModel/pch.h @@ -18,7 +18,7 @@ Author(s): // Manually include til after we include Windows.Foundation to give it winrt superpowers #define BLOCK_TIL // This includes support libraries from the CRT, STL, WIL, and GSL -#include "LibraryIncludes.h" +#include // This is inexplicable, but for whatever reason, cppwinrt conflicts with the // SDK definition of this function, so the only fix is to undef it. // from WinBase.h @@ -28,8 +28,9 @@ Author(s): #endif #include -#include +#include #include +#include #include #include diff --git a/src/host/ut_host/ScreenBufferTests.cpp b/src/host/ut_host/ScreenBufferTests.cpp index 668a82fcc6..bf3943d9e7 100644 --- a/src/host/ut_host/ScreenBufferTests.cpp +++ b/src/host/ut_host/ScreenBufferTests.cpp @@ -5249,6 +5249,16 @@ void ScreenBufferTests::SetAutoWrapMode() // Content should be clamped to the line width, overwriting the last char. VERIFY_IS_TRUE(_ValidateLineContains({ 80 - 3, startLine }, L"abf", attributes)); VERIFY_ARE_EQUAL(til::point(79, startLine), cursor.GetPosition()); + // Writing a wide glyph into the last 2 columns and overwriting it with a narrow one. + cursor.SetPosition({ 80 - 3, startLine }); + stateMachine.ProcessString(L"a\U0001F604b"); + VERIFY_IS_TRUE(_ValidateLineContains({ 80 - 3, startLine }, L"a b", attributes)); + VERIFY_ARE_EQUAL(til::point(79, startLine), cursor.GetPosition()); + // Writing a wide glyph into the last column and overwriting it with a narrow one. + cursor.SetPosition({ 80 - 3, startLine }); + stateMachine.ProcessString(L"ab\U0001F604c"); + VERIFY_IS_TRUE(_ValidateLineContains({ 80 - 3, startLine }, L"abc", attributes)); + VERIFY_ARE_EQUAL(til::point(79, startLine), cursor.GetPosition()); Log::Comment(L"When DECAWM is set, output is wrapped again."); stateMachine.ProcessString(L"\x1b[?7h"); diff --git a/src/host/ut_host/TextBufferTests.cpp b/src/host/ut_host/TextBufferTests.cpp index c10022df3f..823736ea30 100644 --- a/src/host/ut_host/TextBufferTests.cpp +++ b/src/host/ut_host/TextBufferTests.cpp @@ -147,6 +147,7 @@ class TextBufferTests TEST_METHOD(TestBurrito); TEST_METHOD(TestOverwriteChars); + TEST_METHOD(TestRowReplaceText); TEST_METHOD(TestAppendRTFText); @@ -2046,6 +2047,87 @@ void TextBufferTests::TestOverwriteChars() #undef complex1 } +void TextBufferTests::TestRowReplaceText() +{ + static constexpr til::size bufferSize{ 10, 3 }; + static constexpr UINT cursorSize = 12; + const TextAttribute attr{ 0x7f }; + TextBuffer buffer{ bufferSize, attr, cursorSize, false, _renderer }; + auto& row = buffer.GetRowByOffset(0); + +#define complex L"\U0001F41B" + + struct Test + { + const wchar_t* description; + struct + { + std::wstring_view text; + til::CoordType columnBegin = 0; + til::CoordType columnLimit = 0; + } input; + struct + { + std::wstring_view text; + til::CoordType columnEnd = 0; + til::CoordType columnBeginDirty = 0; + til::CoordType columnEndDirty = 0; + } expected; + std::wstring_view expectedRow; + }; + + static constexpr std::array tests{ + Test{ + L"Not enough space -> early exit", + { complex, 2, 2 }, + { complex, 2, 2, 2 }, + L" ", + }, + Test{ + L"Exact right amount of space", + { complex, 2, 4 }, + { L"", 4, 2, 4 }, + L" " complex L" ", + }, + Test{ + L"Not enough space -> columnEnd = columnLimit", + { complex complex, 0, 3 }, + { complex, 3, 0, 4 }, + complex L" ", + }, + Test{ + L"Too much to fit into the row", + { complex L"b" complex L"c" complex L"abcd", 0, til::CoordTypeMax }, + { L"cd", 10, 0, 10 }, + complex L"b" complex L"c" complex L"ab", + }, + Test{ + L"Overwriting wide glyphs dirties both cells, but leaves columnEnd at the end of the text", + { L"efg", 1, til::CoordTypeMax }, + { L"", 4, 0, 5 }, + L" efg c" complex L"ab", + }, + }; + + for (const auto& t : tests) + { + Log::Comment(t.description); + RowWriteState actual{ + .text = t.input.text, + .columnBegin = t.input.columnBegin, + .columnLimit = t.input.columnLimit, + }; + row.ReplaceText(actual); + VERIFY_ARE_EQUAL(t.expected.text, actual.text); + VERIFY_ARE_EQUAL(t.expected.columnEnd, actual.columnEnd); + VERIFY_ARE_EQUAL(t.expected.columnBeginDirty, actual.columnBeginDirty); + VERIFY_ARE_EQUAL(t.expected.columnEndDirty, actual.columnEndDirty); + VERIFY_ARE_EQUAL(t.expectedRow, row.GetText()); + } + +#undef complex +} + void TextBufferTests::TestAppendRTFText() { { diff --git a/src/inc/LibraryIncludes.h b/src/inc/LibraryIncludes.h index 515f0956ce..7a7ec3a6a5 100644 --- a/src/inc/LibraryIncludes.h +++ b/src/inc/LibraryIncludes.h @@ -17,6 +17,8 @@ // Block minwindef.h min/max macros to prevent conflict #define NOMINMAX +// Exclude rarely-used stuff from Windows headers +#define WIN32_LEAN_AND_MEAN #include #include @@ -52,21 +54,19 @@ #include // WIL -#include -#include -#include -#include -#include -#include #include +#include #include -#include +// Due to the use of RESOURCE_SUPPRESS_STL in result.h, we need to include resource.h first, which happens +// implicitly through the includes above. If RESOURCE_SUPPRESS_STL is gone, the order doesn't matter anymore. +#include +#include // GSL // Block GSL Multi Span include because it both has C++17 deprecated iterators // and uses the C-namespaced "max" which conflicts with Windows definitions. -#define GSL_MULTI_SPAN_H -#include +#include +#include // CppCoreCheck #include diff --git a/src/inc/test/CommonState.hpp b/src/inc/test/CommonState.hpp index 0e613d828c..942d0c6007 100644 --- a/src/inc/test/CommonState.hpp +++ b/src/inc/test/CommonState.hpp @@ -256,33 +256,6 @@ private: std::unique_ptr m_backupTextBufferInfo; std::unique_ptr m_readHandle; - struct TestString - { - std::wstring_view string; - bool wide = false; - }; - - static void applyTestString(ROW* pRow, const auto& testStrings) - { - uint16_t x = 0; - for (const auto& t : testStrings) - { - if (t.wide) - { - pRow->ReplaceCharacters(x, 2, t.string); - x += 2; - } - else - { - for (const auto& ch : t.string) - { - pRow->ReplaceCharacters(x, 1, { &ch, 1 }); - x += 1; - } - } - } - } - void FillRow(ROW* pRow, bool wrapForced) { // fill a row @@ -290,15 +263,13 @@ private: // か = \x304b // き = \x304d - static constexpr std::array testStrings{ - TestString{ L"AB" }, - TestString{ L"\x304b", true }, - TestString{ L"C" }, - TestString{ L"\x304d", true }, - TestString{ L"DE " }, - }; - - applyTestString(pRow, testStrings); + uint16_t column = 0; + for (const auto& ch : std::wstring_view{ L"AB\u304bC\u304dDE " }) + { + const uint16_t width = ch >= 0x80 ? 2 : 1; + pRow->ReplaceCharacters(column, width, { &ch, 1 }); + column += width; + } // A = bright red on dark gray // This string starts at index 0 diff --git a/src/inc/til/point.h b/src/inc/til/point.h index f5e4d64a5d..332b6fd8e0 100644 --- a/src/inc/til/point.h +++ b/src/inc/til/point.h @@ -6,6 +6,8 @@ namespace til // Terminal Implementation Library. Also: "Today I Learned" { using CoordType = int32_t; + inline constexpr CoordType CoordTypeMin = INT32_MIN; + inline constexpr CoordType CoordTypeMax = INT32_MAX; namespace details { diff --git a/src/inc/til/rle.h b/src/inc/til/rle.h index 21f56f100d..fa9e379367 100644 --- a/src/inc/til/rle.h +++ b/src/inc/til/rle.h @@ -479,6 +479,15 @@ namespace til // Terminal Implementation Library. Also: "Today I Learned" _replace_unchecked(start_index, end_index, replacements); } + // Replace the range [start_index, end_index) with replacements. + // If end_index is larger than size() it's set to size(). + // start_index must be smaller or equal to end_index. + void replace(size_type start_index, size_type end_index, const basic_rle& replacements) + { + _check_indices(start_index, end_index); + _replace_unchecked(start_index, end_index, replacements._runs); + } + // Replaces every instance of old_value in this vector with new_value. void replace_values(const value_type& old_value, const value_type& new_value) { diff --git a/src/inc/til/unicode.h b/src/inc/til/unicode.h index 030b7053bf..5191e236c3 100644 --- a/src/inc/til/unicode.h +++ b/src/inc/til/unicode.h @@ -59,6 +59,30 @@ namespace til return { ptr, len }; } + // Removes the first code point off of `wstr` and returns the rest. + constexpr std::wstring_view utf16_pop(std::wstring_view wstr) noexcept + { + auto it = wstr.begin(); + const auto end = wstr.end(); + + if (it != end) + { + const auto wch = *it; + ++it; + + if (is_surrogate(wch)) + { + const auto wch2 = it != end ? *it : wchar_t{}; + if (is_leading_surrogate(wch) && is_trailing_surrogate(wch2)) + { + ++it; + } + } + } + + return { it, end }; + } + // Splits a UTF16 string into codepoints, yielding `wstring_view`s of UTF16 text. Use it as: // for (const auto& str : til::utf16_iterator{ input }) { ... } struct utf16_iterator diff --git a/src/terminal/adapter/adaptDispatch.cpp b/src/terminal/adapter/adaptDispatch.cpp index ec46da7afb..9dcc434bbd 100644 --- a/src/terminal/adapter/adaptDispatch.cpp +++ b/src/terminal/adapter/adaptDispatch.cpp @@ -80,11 +80,12 @@ void AdaptDispatch::_WriteToBuffer(const std::wstring_view string) // Turn off the cursor until we're done, so it isn't refreshed unnecessarily. cursor.SetIsOn(false); - // The width at which we wrap is determined by the line rendition attribute. - auto lineWidth = textBuffer.GetLineWidth(cursorPosition.y); + RowWriteState state{ + .text = string, + .columnLimit = textBuffer.GetLineWidth(cursorPosition.y), + }; - auto stringPosition = string.cbegin(); - while (stringPosition < string.cend()) + while (!state.text.empty()) { if (cursor.IsDelayedEOLWrap() && wrapAtEOL) { @@ -97,64 +98,59 @@ void AdaptDispatch::_WriteToBuffer(const std::wstring_view string) _api.LineFeed(true, true); cursorPosition = cursor.GetPosition(); // We need to recalculate the width when moving to a new line. - lineWidth = textBuffer.GetLineWidth(cursorPosition.y); + state.columnLimit = textBuffer.GetLineWidth(cursorPosition.y); } } - const OutputCellIterator it(std::wstring_view{ stringPosition, string.cend() }, attributes); if (_modes.test(Mode::InsertReplace)) { // If insert-replace mode is enabled, we first measure how many cells // the string will occupy, and scroll the target area right by that // amount to make space for the incoming text. + const OutputCellIterator it(state.text, attributes); auto measureIt = it; - while (measureIt && measureIt.GetCellDistance(it) < lineWidth) + while (measureIt && measureIt.GetCellDistance(it) < state.columnLimit) { - measureIt++; + ++measureIt; } const auto row = cursorPosition.y; const auto cellCount = measureIt.GetCellDistance(it); - _ScrollRectHorizontally(textBuffer, { cursorPosition.x, row, lineWidth, row + 1 }, cellCount); + _ScrollRectHorizontally(textBuffer, { cursorPosition.x, row, state.columnLimit, row + 1 }, cellCount); } - const auto itEnd = textBuffer.WriteLine(it, cursorPosition, wrapAtEOL, lineWidth - 1); - if (itEnd.GetInputDistance(it) == 0) + state.columnBegin = cursorPosition.x; + + const auto textPositionBefore = state.text.data(); + textBuffer.WriteLine(cursorPosition.y, wrapAtEOL, attributes, state); + const auto textPositionAfter = state.text.data(); + + if (state.columnBeginDirty != state.columnEndDirty) { - // If we haven't written anything out because there wasn't enough space, - // we move the cursor to the end of the line so that it's forced to wrap. - cursorPosition.x = lineWidth; - // But if wrapping is disabled, we also need to move to the next string - // position, otherwise we'll be stuck in this loop forever. - if (!wrapAtEOL) - { - stringPosition++; - } - } - else - { - const auto cellCount = itEnd.GetCellDistance(it); - const auto changedRect = til::rect{ cursorPosition, til::size{ cellCount, 1 } }; + const til::rect changedRect{ state.columnBeginDirty, cursorPosition.y, state.columnEndDirty, cursorPosition.y + 1 }; _api.NotifyAccessibilityChange(changedRect); - - stringPosition += itEnd.GetInputDistance(it); - cursorPosition.x += cellCount; } - if (cursorPosition.x >= lineWidth) + // If we're past the end of the line, we need to clamp the cursor + // back into range, and if wrapping is enabled, set the delayed wrap + // flag. The wrapping only occurs once another character is output. + const auto isWrapping = state.columnEnd >= state.columnLimit; + cursorPosition.x = isWrapping ? state.columnLimit - 1 : state.columnEnd; + cursor.SetPosition(cursorPosition); + + if (isWrapping) { - // If we're past the end of the line, we need to clamp the cursor - // back into range, and if wrapping is enabled, set the delayed wrap - // flag. The wrapping only occurs once another character is output. - cursorPosition.x = lineWidth - 1; - cursor.SetPosition(cursorPosition); if (wrapAtEOL) { cursor.DelayEOLWrap(); } - } - else - { - cursor.SetPosition(cursorPosition); + else if (textPositionBefore == textPositionAfter) + { + // We want to wrap, but we're not allowed to and we failed to write even a single character into the row. + // This can only mean one thing! The DECAWM Autowrap mode is disabled ("\x1b[?7l") and we tried writing a + // wide glyph into the last column. ROW::Write() returns the lineWidth and leaves stringIterator untouched. + // To prevent a deadlock, because stringIterator never advances, we need to throw that glyph away. + textBuffer.ConsumeGrapheme(state.text); + } } } diff --git a/src/til/ut_til/RunLengthEncodingTests.cpp b/src/til/ut_til/RunLengthEncodingTests.cpp index 340b48f16d..ce79cd754c 100644 --- a/src/til/ut_til/RunLengthEncodingTests.cpp +++ b/src/til/ut_til/RunLengthEncodingTests.cpp @@ -190,7 +190,7 @@ class RunLengthEncodingTests // We're testing replace() elsewhere, but this is special: // This ensures that even if we're default constructed we can add data. - rle.replace(0, 0, { 1, 5 }); + rle.replace(0, 0, rle_type{ 1, 5 }); VERIFY_ARE_EQUAL(5u, rle.size()); VERIFY_IS_FALSE(rle.empty()); } diff --git a/tools/ConsoleTypes.natvis b/tools/ConsoleTypes.natvis index 7569524087..bf772d2191 100644 --- a/tools/ConsoleTypes.natvis +++ b/tools/ConsoleTypes.natvis @@ -38,31 +38,12 @@ {{LT({Left}, {Top}) RB({Right}, {Bottom}) In:[{Right-Left+1} x {Bottom-Top+1}] Ex:[{Right-Left} x {Bottom-Top}]}} - - Stored Glyph, go to UnicodeStorage. - {_wch,X} Single - {_wch,X} Lead - {_wch,X} Trail - - - - - _data - - - - - {{ wrap={_wrapForced} padded={_doubleBytePadded} }} - - _data - - - - {{ id={_id} width={_rowWidth} }} + {_chars.data(),[_charOffsets[_columnCount]]} + _chars.data(),[_charOffsets[_columnCount]] - _charRow - _attrRow + _chars.data(),[_charOffsets[_columnCount]] + _charOffsets.data(),[_charOffsets.size()]