Add an efficient text stream write function (#14821)

This adds PR adds a couple foundational functions and classes to make
our TextBuffer more performant and allow us to improve our Unicode
correctness in the future, by getting rid of our dependence on
`OutputCellIterator`. In the future we can then replace the simple
UTF-16 code point iterator with a proper grapheme cluster iterator.

While my focus is technically on Unicode correctness, the ~4x VT
throughput increase in OpenConsole is pretty nice too.

This PR adds:
* A new, simpler ROW iterator (unused in this PR)
* Cursor movement functions (`NavigateToPrevious`, `NavigateToNext`)
  They're based on functions that align the cursor to the start/end
  of the _current_ cell, so such functions can be added as well.
* `ReplaceText` to write a raw string of text with the possibility to
  specify a right margin.
* `CopyRangeFrom` will allow us to make reflow much faster, as it's able
  to bulk-copy already measured strings without re-measuring them.

Related to #8000

## Validation Steps Performed
* enwik8.txt, zhwik8.txt, emoji-test.txt, all work with proper
  wide glyph reflow at the end of a row 
* This produces "a 咪" where only "a" has a white background:
  ```sh
  printf '\e7こん\e8\x1b[107ma\x1b[m\n'
  ```
* This produces "abん":
  ```sh
  stdbuf -o0 printf '\x1b7こん\x1b8a'; printf 'b\n'
  ```
* This produces "xy" at the end of the line:
  ```sh
  stdbuf -o0 printf '\e[999C\bこ\bx'; printf 'y\n'
  ```
* This produces red whitespace followed by "こ " in the default
  background color at the end of the line, and "ん" on the next line:
  ```sh
  printf '\e[41m\e[K\e[m\e[999C\e[2Dこん\n'
  ```
This commit is contained in:
Leonard Hecker
2023-03-24 23:20:53 +01:00
committed by GitHub
parent f5e9e8ea77
commit f20cd3a9d3
18 changed files with 582 additions and 233 deletions

View File

@@ -18,6 +18,7 @@ BBBBBBBB
BBBBBCCC
BBBBCCCCC
BBGGRR
efg
EFG
EFGh
QQQQQQQQQQABCDEFGHIJ

View File

@@ -2280,8 +2280,6 @@ xunit
xutr
XVIRTUALSCREEN
XWalk
xwwyzz
xxyyzz
yact
YCast
YCENTER

View File

@@ -4,7 +4,10 @@
#include "precomp.h"
#include "Row.hpp"
#include <til/unicode.h>
#include "textBuffer.hpp"
#include "../../types/inc/GlyphWidth.hpp"
// The STL is missing a std::iota_n analogue for std::iota, so I made my own.
template<typename OutIt, typename Diff, typename T>
@@ -229,6 +232,40 @@ void ROW::TransferAttributes(const til::small_rle<TextAttribute, uint16_t, 1>& a
_attr.resize_trailing_extent(gsl::narrow<uint16_t>(newWidth));
}
// Returns the previous possible cursor position, preceding the given column.
// Returns 0 if column is less than or equal to 0.
til::CoordType ROW::NavigateToPrevious(til::CoordType column) const noexcept
{
return _adjustBackward(_clampedColumn(column - 1));
}
// Returns the next possible cursor position, following the given column.
// Returns the row width if column is beyond the width of the row.
til::CoordType ROW::NavigateToNext(til::CoordType column) const noexcept
{
return _adjustForward(_clampedColumn(column + 1));
}
uint16_t ROW::_adjustBackward(uint16_t column) const noexcept
{
// Safety: This is a little bit more dangerous. The first column is supposed
// to never be a trailer and so this loop should exit if column == 0.
for (; _uncheckedIsTrailer(column); --column)
{
}
return column;
}
uint16_t ROW::_adjustForward(uint16_t column) const noexcept
{
// Safety: This is a little bit more dangerous. The last column is supposed
// to never be a trailer and so this loop should exit if column == _columnCount.
for (; _uncheckedIsTrailer(column); ++column)
{
}
return column;
}
// Routine Description:
// - clears char data in column in row
// Arguments:
@@ -375,90 +412,245 @@ void ROW::ReplaceAttributes(const til::CoordType beginIndex, const til::CoordTyp
_attr.replace(_clampedColumnInclusive(beginIndex), _clampedColumnInclusive(endIndex), newAttr);
}
void ROW::ReplaceCharacters(til::CoordType columnBegin, til::CoordType width, const std::wstring_view& chars)
[[msvc::forceinline]] ROW::WriteHelper::WriteHelper(ROW& row, til::CoordType columnBegin, til::CoordType columnLimit, const std::wstring_view& chars) noexcept :
row{ row },
chars{ chars }
{
const auto colBeg = _clampedUint16(columnBegin);
const auto colEnd = _clampedUint16(columnBegin + width);
colBeg = row._clampedColumnInclusive(columnBegin);
colLimit = row._clampedColumnInclusive(columnLimit);
chBegDirty = row._uncheckedCharOffset(colBeg);
colBegDirty = row._adjustBackward(colBeg);
leadingSpaces = colBeg - colBegDirty;
chBeg = chBegDirty + leadingSpaces;
colEnd = colBeg;
colEndDirty = 0;
charsConsumed = 0;
}
if (colBeg >= colEnd || colEnd > _columnCount || chars.empty())
[[msvc::forceinline]] bool ROW::WriteHelper::IsValid() const noexcept
{
return colBeg < colLimit && !chars.empty();
}
void ROW::ReplaceCharacters(til::CoordType columnBegin, til::CoordType width, const std::wstring_view& chars)
try
{
WriteHelper h{ *this, columnBegin, _columnCount, chars };
if (!h.IsValid())
{
return;
}
h.ReplaceCharacters(width);
h.Finish();
}
catch (...)
{
// Due to this function writing _charOffsets first, then calling _resizeChars (which may throw) and only then finally
// filling in _chars, we might end up in a situation were _charOffsets contains offsets outside of the _chars array.
// --> Restore this row to a known "okay"-state.
Reset(TextAttribute{});
throw;
}
// Safety:
// * colBeg is now [0, _columnCount)
// * colEnd is now (colBeg, _columnCount]
[[msvc::forceinline]] void ROW::WriteHelper::ReplaceCharacters(til::CoordType width) noexcept
{
const auto colEndNew = gsl::narrow_cast<uint16_t>(colEnd + width);
if (colEndNew > colLimit)
{
colEndDirty = colLimit;
}
else
{
til::at(row._charOffsets, colEnd++) = chBeg;
for (; colEnd < colEndNew; ++colEnd)
{
til::at(row._charOffsets, colEnd) = gsl::narrow_cast<uint16_t>(chBeg | CharOffsetsTrailer);
}
// Algorithm explanation
colEndDirty = colEnd;
charsConsumed = chars.size();
}
}
void ROW::ReplaceText(RowWriteState& state)
try
{
WriteHelper h{ *this, state.columnBegin, state.columnLimit, state.text };
if (!h.IsValid())
{
state.columnEnd = h.colBeg;
state.columnBeginDirty = h.colBeg;
state.columnEndDirty = h.colBeg;
return;
}
h.ReplaceText();
h.Finish();
state.text = state.text.substr(h.charsConsumed);
// Here's why we set `state.columnEnd` to `colLimit` if there's remaining text:
// Callers should be able to use `state.columnEnd` as the next cursor position, as well as the parameter for a
// follow-up call to ReplaceAttributes(). But if we fail to insert a wide glyph into the last column of a row,
// that last cell (which now contains padding whitespace) should get the same attributes as the rest of the
// string so that the row looks consistent. This requires us to return `colLimit` instead of `colLimit - 1`.
// Additionally, this has the benefit that callers can detect line wrapping by checking `columnEnd >= columnLimit`.
state.columnEnd = state.text.empty() ? h.colEnd : h.colLimit;
state.columnBeginDirty = h.colBegDirty;
state.columnEndDirty = h.colEndDirty;
}
catch (...)
{
Reset(TextAttribute{});
throw;
}
[[msvc::forceinline]] void ROW::WriteHelper::ReplaceText() noexcept
{
size_t ch = chBeg;
for (const auto& s : til::utf16_iterator{ chars })
{
const auto wide = til::at(s, 0) < 0x80 ? false : IsGlyphFullWidth(s);
const auto colEndNew = gsl::narrow_cast<uint16_t>(colEnd + 1u + wide);
if (colEndNew > colLimit)
{
colEndDirty = colLimit;
break;
}
til::at(row._charOffsets, colEnd++) = gsl::narrow_cast<uint16_t>(ch);
if (wide)
{
til::at(row._charOffsets, colEnd++) = gsl::narrow_cast<uint16_t>(ch | CharOffsetsTrailer);
}
colEndDirty = colEnd;
ch += s.size();
}
charsConsumed = ch - chBeg;
}
til::CoordType ROW::CopyRangeFrom(til::CoordType columnBegin, til::CoordType columnLimit, const ROW& other, til::CoordType& otherBegin, til::CoordType otherLimit)
try
{
const auto otherColBeg = other._clampedColumnInclusive(otherBegin);
const auto otherColLimit = other._clampedColumnInclusive(otherLimit);
std::span<uint16_t> charOffsets;
std::wstring_view chars;
if (otherColBeg < otherColLimit)
{
charOffsets = other._charOffsets.subspan(otherColBeg, static_cast<size_t>(otherColLimit) - otherColBeg + 1);
const auto charsOffset = charOffsets.front() & CharOffsetsMask;
// We _are_ using span. But C++ decided that string_view and span aren't convertible.
// _chars is a std::span for performance and because it refers to raw, shared memory.
#pragma warning(suppress : 26481) // Don't use pointer arithmetic. Use span instead (bounds.1).
chars = { other._chars.data() + charsOffset, other._chars.size() - charsOffset };
}
WriteHelper h{ *this, columnBegin, columnLimit, chars };
if (!h.IsValid())
{
return h.colBeg;
}
// Any valid charOffsets array is at least 2 elements long (the 1st element is the start offset and the 2nd
// element is the length of the first glyph) and begins/ends with a non-trailer offset. We don't really
// need to test for the end offset, since `WriteHelper::WriteWithOffsets` already takes care of that.
if (charOffsets.size() < 2 || WI_IsFlagSet(charOffsets.front(), CharOffsetsTrailer))
{
assert(false);
otherBegin = other.size();
return h.colBeg;
}
h.CopyRangeFrom(charOffsets);
h.Finish();
otherBegin += h.colEnd - h.colBeg;
return h.colEndDirty;
}
catch (...)
{
Reset(TextAttribute{});
throw;
}
[[msvc::forceinline]] void ROW::WriteHelper::CopyRangeFrom(const std::span<const uint16_t>& charOffsets) noexcept
{
// Since our `charOffsets` input is already in columns (just like the `ROW::_charOffsets`),
// we can directly look up the end char-offset, but...
const auto colEndDirtyInput = std::min(gsl::narrow_cast<uint16_t>(colLimit - colBeg), gsl::narrow<uint16_t>(charOffsets.size() - 1));
// ...since the colLimit might intersect with a wide glyph in `charOffset`, we need to adjust our input-colEnd.
auto colEndInput = colEndDirtyInput;
for (; WI_IsFlagSet(til::at(charOffsets, colEndInput), CharOffsetsTrailer); --colEndInput)
{
}
const auto baseOffset = til::at(charOffsets, 0);
const auto endOffset = til::at(charOffsets, colEndInput);
const auto inToOutOffset = gsl::narrow_cast<uint16_t>(chBeg - baseOffset);
// Now with the `colEndInput` figured out, we can easily copy the `charOffsets` into the `_charOffsets`.
// It's possible to use SIMD for this loop for extra perf gains. Something like this for SSE2 (~8x faster):
// const auto in = _mm_loadu_si128(...);
// const auto off = _mm_and_epi32(in, _mm_set1_epi16(CharOffsetsMask));
// const auto trailer = _mm_and_epi32(in, _mm_set1_epi16(CharOffsetsTrailer));
// const auto out = _mm_or_epi32(_mm_add_epi16(off, _mm_set1_epi16(inToOutOffset)), trailer);
// _mm_store_si128(..., out);
for (uint16_t i = 0; i < colEndInput; ++i, ++colEnd)
{
const auto ch = til::at(charOffsets, i);
const auto off = ch & CharOffsetsMask;
const auto trailer = ch & CharOffsetsTrailer;
til::at(row._charOffsets, colEnd) = gsl::narrow_cast<uint16_t>((off + inToOutOffset) | trailer);
}
colEndDirty = gsl::narrow_cast<uint16_t>(colBeg + colEndDirtyInput);
charsConsumed = endOffset - baseOffset;
}
[[msvc::forceinline]] void ROW::WriteHelper::Finish()
{
colEndDirty = row._adjustForward(colEndDirty);
const uint16_t trailingSpaces = colEndDirty - colEnd;
const auto chEndDirtyOld = row._uncheckedCharOffset(colEndDirty);
const auto chEndDirty = chBegDirty + charsConsumed + leadingSpaces + trailingSpaces;
if (chEndDirty != chEndDirtyOld)
{
row._resizeChars(colEndDirty, chBegDirty, chEndDirty, chEndDirtyOld);
}
{
// std::copy_n compiles to memmove. We can do better. It also gets rid of an extra branch,
// because std::copy_n avoids calling memmove if the count is 0. It's never 0 for us.
const auto itBeg = row._chars.begin() + chBeg;
memcpy(&*itBeg, chars.data(), charsConsumed * sizeof(wchar_t));
if (leadingSpaces)
{
fill_n_small(row._chars.begin() + chBegDirty, leadingSpaces, L' ');
iota_n(row._charOffsets.begin() + colBegDirty, leadingSpaces, chBegDirty);
}
if (trailingSpaces)
{
fill_n_small(itBeg + charsConsumed, trailingSpaces, L' ');
iota_n(row._charOffsets.begin() + colEnd, trailingSpaces, gsl::narrow_cast<uint16_t>(chBeg + charsConsumed));
}
}
// This updates `_doubleBytePadded` whenever we write the last column in the row. `_doubleBytePadded` tells our text
// reflow algorithm whether it should ignore the last column. This is important when writing wide characters into
// the terminal: If the last wide character in a row only fits partially, we should render whitespace, but
// during text reflow pretend as if no whitespace exists. After all, the user didn't write any whitespace there.
//
// Task:
// Replace the characters in cells [colBeg, colEnd) with a single `width`-wide glyph consisting of `chars`.
//
// Problem:
// Imagine that we have the following ROW contents:
// "xxyyzz"
// xx, yy, zz are 2 cell wide glyphs. We want to insert a 2 cell wide glyph ww at colBeg 1:
// ^^
// ww
// An incorrect result would be:
// "xwwyzz"
// The half cut off x and y glyph wouldn't make much sense, so we need to fill them with whitespace:
// " ww zz"
//
// Solution:
// Given the range we want to replace [colBeg, colEnd), we "extend" it to encompass leading (preceding)
// and trailing wide glyphs we partially overwrite resulting in the range [colExtBeg, colExtEnd), where
// colExtBeg <= colBeg and colExtEnd >= colEnd. In other words, the to be replaced range has been "extended".
// The amount of leading whitespace we need to insert is thus colBeg - colExtBeg
// and the amount of trailing whitespace colExtEnd - colEnd.
// Extend range downwards (leading whitespace)
uint16_t colExtBeg = colBeg;
// Safety: colExtBeg is [0, _columnCount], because colBeg is.
const uint16_t chExtBeg = _uncheckedCharOffset(colExtBeg);
// Safety: colExtBeg remains [0, _columnCount] due to colExtBeg != 0.
for (; colExtBeg != 0 && _uncheckedIsTrailer(colExtBeg); --colExtBeg)
// The way this is written, it'll set `_doubleBytePadded` to `true` no matter whether a wide character didn't fit,
// or if the last 2 columns contain a wide character and a narrow character got written into the left half of it.
// In both cases `trailingSpaces` is 1 and fills the last column and `_doubleBytePadded` will be `true`.
if (colEndDirty == row._columnCount)
{
}
// Extend range upwards (trailing whitespace)
uint16_t colExtEnd = colEnd;
// Safety: colExtEnd cannot be incremented past _columnCount, because the last
// _charOffset at index _columnCount will never get the CharOffsetsTrailer flag.
for (; _uncheckedIsTrailer(colExtEnd); ++colExtEnd)
{
}
// Safety: After the previous loop colExtEnd is [0, _columnCount].
const uint16_t chExtEnd = _uncheckedCharOffset(colExtEnd);
const uint16_t leadingSpaces = colBeg - colExtBeg;
const uint16_t trailingSpaces = colExtEnd - colEnd;
const size_t chExtEndNew = chars.size() + leadingSpaces + trailingSpaces + chExtBeg;
if (chExtEndNew != chExtEnd)
{
_resizeChars(colExtEnd, chExtBeg, chExtEnd, chExtEndNew);
}
// Add leading/trailing whitespace and copy chars
{
auto it = _chars.begin() + chExtBeg;
it = fill_n_small(it, leadingSpaces, L' ');
it = copy_n_small(chars.begin(), chars.size(), it);
it = fill_n_small(it, trailingSpaces, L' ');
}
// Update char offsets with leading/trailing whitespace and the chars columns.
{
auto chPos = chExtBeg;
auto it = _charOffsets.begin() + colExtBeg;
it = iota_n_mut(it, leadingSpaces, chPos);
*it++ = chPos;
it = fill_small(it, _charOffsets.begin() + colEnd, gsl::narrow_cast<uint16_t>(chPos | CharOffsetsTrailer));
chPos = gsl::narrow_cast<uint16_t>(chPos + chars.size());
it = iota_n_mut(it, trailingSpaces, chPos);
row.SetDoubleBytePadded(colEnd < row._columnCount);
}
}
@@ -466,15 +658,15 @@ void ROW::ReplaceCharacters(til::CoordType columnBegin, til::CoordType width, co
// as it reallocates the backing buffer and shifts the char offsets.
// The parameters are difficult to explain, but their names are identical to
// local variables in ReplaceCharacters() which I've attempted to document there.
void ROW::_resizeChars(uint16_t colExtEnd, uint16_t chExtBeg, uint16_t chExtEnd, size_t chExtEndNew)
void ROW::_resizeChars(uint16_t colEndDirty, uint16_t chBegDirty, size_t chEndDirty, uint16_t chEndDirtyOld)
{
const auto diff = chExtEndNew - chExtEnd;
const auto diff = chEndDirty - chEndDirtyOld;
const auto currentLength = _charSize();
const auto newLength = currentLength + diff;
if (newLength <= _chars.size())
{
std::copy_n(_chars.begin() + chExtEnd, currentLength - chExtEnd, _chars.begin() + chExtEndNew);
std::copy_n(_chars.begin() + chEndDirtyOld, currentLength - chEndDirtyOld, _chars.begin() + chEndDirty);
}
else
{
@@ -484,14 +676,14 @@ void ROW::_resizeChars(uint16_t colExtEnd, uint16_t chExtBeg, uint16_t chExtEnd,
auto charsHeap = std::make_unique_for_overwrite<wchar_t[]>(newCapacity);
const std::span chars{ charsHeap.get(), newCapacity };
std::copy_n(_chars.begin(), chExtBeg, chars.begin());
std::copy_n(_chars.begin() + chExtEnd, currentLength - chExtEnd, chars.begin() + chExtEndNew);
std::copy_n(_chars.begin(), chBegDirty, chars.begin());
std::copy_n(_chars.begin() + chEndDirtyOld, currentLength - chEndDirtyOld, chars.begin() + chEndDirty);
_charsHeap = std::move(charsHeap);
_chars = chars;
}
auto it = _charOffsets.begin() + colExtEnd;
auto it = _charOffsets.begin() + colEndDirty;
const auto end = _charOffsets.end();
for (; it != end; ++it)
{
@@ -499,6 +691,11 @@ void ROW::_resizeChars(uint16_t colExtEnd, uint16_t chExtBeg, uint16_t chExtEnd,
}
}
til::small_rle<TextAttribute, uint16_t, 1>& ROW::Attributes() noexcept
{
return _attr;
}
const til::small_rle<TextAttribute, uint16_t, 1>& ROW::Attributes() const noexcept
{
return _attr;
@@ -527,6 +724,12 @@ uint16_t ROW::size() const noexcept
return _columnCount;
}
til::CoordType ROW::LineRenditionColumns() const noexcept
{
const auto scale = _lineRendition != LineRendition::SingleWidth ? 1 : 0;
return _columnCount >> scale;
}
til::CoordType ROW::MeasureLeft() const noexcept
{
const auto text = GetText();
@@ -681,11 +884,13 @@ uint16_t ROW::_charSize() const noexcept
// Safety: col must be [0, _columnCount].
uint16_t ROW::_uncheckedCharOffset(size_t col) const noexcept
{
assert(col < _charOffsets.size());
return til::at(_charOffsets, col) & CharOffsetsMask;
}
// Safety: col must be [0, _columnCount].
bool ROW::_uncheckedIsTrailer(size_t col) const noexcept
{
assert(col < _charOffsets.size());
return WI_IsFlagSet(til::at(_charOffsets, col), CharOffsetsTrailer);
}

View File

@@ -20,8 +20,6 @@ Revision History:
#pragma once
#include <span>
#include <til/rle.h>
#include "LineRendition.hpp"
@@ -37,6 +35,28 @@ enum class DelimiterClass
RegularChar
};
struct RowWriteState
{
// The text you want to write into the given ROW. When ReplaceText() returns,
// this is updated to remove all text from the beginning that was successfully written.
std::wstring_view text; // IN/OUT
// The column at which to start writing.
til::CoordType columnBegin = 0; // IN
// The first column which should not be written to anymore.
til::CoordType columnLimit = 0; // IN
// The column 1 past the last glyph that was successfully written into the row. If you need to call
// ReplaceAttributes() to colorize the written range, etc., this is the columnEnd parameter you want.
// If you want to continue writing where you left off, this is also the next columnBegin parameter.
til::CoordType columnEnd = 0; // OUT
// The first column that got modified by this write operation. In case that the first glyph we write overwrites
// the trailing half of a wide glyph, leadingSpaces will be 1 and this value will be 1 less than colBeg.
til::CoordType columnBeginDirty = 0; // OUT
// This is 1 past the last column that was modified and will be 1 past columnEnd if we overwrote
// the leading half of a wide glyph and had to fill the trailing half with whitespace.
til::CoordType columnEndDirty = 0; // OUT
};
class ROW final
{
public:
@@ -62,16 +82,23 @@ public:
void Resize(wchar_t* charsBuffer, uint16_t* charOffsetsBuffer, uint16_t rowWidth, const TextAttribute& fillAttribute);
void TransferAttributes(const til::small_rle<TextAttribute, uint16_t, 1>& attr, til::CoordType newWidth);
til::CoordType NavigateToPrevious(til::CoordType column) const noexcept;
til::CoordType NavigateToNext(til::CoordType column) const noexcept;
void ClearCell(til::CoordType column);
OutputCellIterator WriteCells(OutputCellIterator it, til::CoordType columnBegin, std::optional<bool> wrap = std::nullopt, std::optional<til::CoordType> limitRight = std::nullopt);
bool SetAttrToEnd(til::CoordType columnBegin, TextAttribute attr);
void ReplaceAttributes(til::CoordType beginIndex, til::CoordType endIndex, const TextAttribute& newAttr);
void ReplaceCharacters(til::CoordType columnBegin, til::CoordType width, const std::wstring_view& chars);
void ReplaceText(RowWriteState& state);
til::CoordType CopyRangeFrom(til::CoordType columnBegin, til::CoordType columnLimit, const ROW& other, til::CoordType& otherBegin, til::CoordType otherLimit);
til::small_rle<TextAttribute, uint16_t, 1>& Attributes() noexcept;
const til::small_rle<TextAttribute, uint16_t, 1>& Attributes() const noexcept;
TextAttribute GetAttrByColumn(til::CoordType column) const;
std::vector<uint16_t> GetHyperlinks() const;
uint16_t size() const noexcept;
til::CoordType LineRenditionColumns() const noexcept;
til::CoordType MeasureLeft() const noexcept;
til::CoordType MeasureRight() const noexcept;
bool ContainsText() const noexcept;
@@ -89,6 +116,50 @@ public:
#endif
private:
// WriteHelper exists because other forms of abstracting this functionality away (like templates with lambdas)
// where only very poorly optimized by MSVC as it failed to inline the templates.
struct WriteHelper
{
explicit WriteHelper(ROW& row, til::CoordType columnBegin, til::CoordType columnLimit, const std::wstring_view& chars) noexcept;
bool IsValid() const noexcept;
void ReplaceCharacters(til::CoordType width) noexcept;
void ReplaceText() noexcept;
void CopyRangeFrom(const std::span<const uint16_t>& charOffsets) noexcept;
void Finish();
// Parent pointer.
ROW& row;
// The text given by the caller.
const std::wstring_view& chars;
// This is the same as the columnBegin parameter for ReplaceText(), etc.,
// but clamped to a valid range via _clampedColumnInclusive.
uint16_t colBeg;
// This is the same as the columnLimit parameter for ReplaceText(), etc.,
// but clamped to a valid range via _clampedColumnInclusive.
uint16_t colLimit;
// The column 1 past the last glyph that was successfully written into the row. If you need to call
// ReplaceAttributes() to colorize the written range, etc., this is the columnEnd parameter you want.
// If you want to continue writing where you left off, this is also the next columnBegin parameter.
uint16_t colEnd;
// The first column that got modified by this write operation. In case that the first glyph we write overwrites
// the trailing half of a wide glyph, leadingSpaces will be 1 and this value will be 1 less than colBeg.
uint16_t colBegDirty;
// Similar to dirtyBeg, this is 1 past the last column that was modified and will be 1 past colEnd if
// we overwrote the leading half of a wide glyph and had to fill the trailing half with whitespace.
uint16_t colEndDirty;
// The offset in ROW::chars at which we start writing the contents of WriteHelper::chars.
uint16_t chBeg;
// The offset at which we start writing leadingSpaces-many whitespaces.
uint16_t chBegDirty;
// The same as `colBeg - colBegDirty`. This is the amount of whitespace
// we write at chBegDirty, before the actual WriteHelper::chars content.
uint16_t leadingSpaces;
// The amount of characters copied from WriteHelper::chars.
size_t charsConsumed;
};
// To simplify the detection of wide glyphs, we don't just store the simple character offset as described
// for _charOffsets. Instead we use the most significant bit to indicate whether any column is the
// trailing half of a wide glyph. This simplifies many implementation details via _uncheckedIsTrailer.
@@ -102,13 +173,16 @@ private:
template<typename T>
constexpr uint16_t _clampedColumnInclusive(T v) const noexcept;
uint16_t _adjustBackward(uint16_t column) const noexcept;
uint16_t _adjustForward(uint16_t column) const noexcept;
wchar_t _uncheckedChar(size_t off) const noexcept;
uint16_t _charSize() const noexcept;
uint16_t _uncheckedCharOffset(size_t col) const noexcept;
bool _uncheckedIsTrailer(size_t col) const noexcept;
void _init() noexcept;
void _resizeChars(uint16_t colExtEnd, uint16_t chExtBeg, uint16_t chExtEnd, size_t chExtEndNew);
void _resizeChars(uint16_t colEndDirty, uint16_t chBegDirty, size_t chEndDirty, uint16_t chEndDirtyOld);
// These fields are a bit "wasteful", but it makes all this a bit more robust against
// programming errors during initial development (which is when this comment was written).

View File

@@ -1,41 +1,7 @@
/*++
Copyright (c) Microsoft Corporation
Licensed under the MIT license.
Module Name:
- precomp.h
Abstract:
- Contains external headers to include in the precompile phase of console build process.
- Avoid including internal project headers. Instead include them only in the classes that need them (helps with test project building).
--*/
// stdafx.h : include file for standard system include files,
// or project specific include files that are used frequently, but
// are changed infrequently
//
// Copyright (c) Microsoft Corporation.
// Licensed under the MIT license.
#pragma once
// clang-format off
#include <LibraryIncludes.h>
// This includes support libraries from the CRT, STL, WIL, and GSL
#include "LibraryIncludes.h"
#pragma warning(push)
#ifndef WIN32_LEAN_AND_MEAN
#define WIN32_LEAN_AND_MEAN // Exclude rarely-used stuff from Windows headers
#define NOMCX
#define NOHELP
#define NOCOMM
#endif
// Windows Header Files:
#include <windows.h>
#include <intsafe.h>
// private dependencies
#include "../inc/unicode.hpp"
#pragma warning(pop)
// clang-format on
#include <unicode.hpp>

View File

@@ -376,6 +376,32 @@ bool TextBuffer::_PrepareForDoubleByteSequence(const DbcsAttribute dbcsAttribute
return fSuccess;
}
void TextBuffer::ConsumeGrapheme(std::wstring_view& chars) noexcept
{
// This function is supposed to mirror the behavior of ROW::Write, when it reads characters off of `chars`.
// (I know that a UTF-16 code point is not a grapheme, but that's what we're working towards.)
chars = til::utf16_pop(chars);
}
// This function is intended for writing regular "lines" of text and only the `state.text` and`state.columnBegin`
// fields are being used, whereas `state.columnLimit` is automatically overwritten by the line width of the given row.
// This allows this function to automatically set the wrap-forced field of the row, which is also the return value.
// The return value indicates to the caller whether the cursor should be moved to the next line.
void TextBuffer::WriteLine(til::CoordType row, bool wrapAtEOL, const TextAttribute& attributes, RowWriteState& state)
{
auto& r = GetRowByOffset(row);
r.ReplaceText(state);
r.ReplaceAttributes(state.columnBegin, state.columnEnd, attributes);
if (state.columnEnd >= state.columnLimit)
{
r.SetWrapForced(wrapAtEOL);
}
TriggerRedraw(Viewport::FromExclusive({ state.columnBeginDirty, row, state.columnEndDirty, row + 1 }));
}
// Routine Description:
// - Writes cells to the output buffer. Writes at the cursor.
// Arguments:

View File

@@ -89,6 +89,9 @@ public:
TextBufferTextIterator GetTextDataAt(const til::point at, const Microsoft::Console::Types::Viewport limit) const;
// Text insertion functions
static void ConsumeGrapheme(std::wstring_view& chars) noexcept;
void WriteLine(til::CoordType row, bool wrapAtEOL, const TextAttribute& attributes, RowWriteState& state);
OutputCellIterator Write(const OutputCellIterator givenIt);
OutputCellIterator Write(const OutputCellIterator givenIt,

View File

@@ -18,7 +18,7 @@ Author(s):
// Manually include til after we include Windows.Foundation to give it winrt superpowers
#define BLOCK_TIL
// This includes support libraries from the CRT, STL, WIL, and GSL
#include "LibraryIncludes.h"
#include <LibraryIncludes.h>
// This is inexplicable, but for whatever reason, cppwinrt conflicts with the
// SDK definition of this function, so the only fix is to undef it.
// from WinBase.h
@@ -28,8 +28,9 @@ Author(s):
#endif
#include <wil/cppwinrt.h>
#include <unknwn.h>
#include <Unknwn.h>
#include <hstring.h>
#include <shellapi.h>
#include <WexTestClass.h>
#include <json.h>

View File

@@ -5249,6 +5249,16 @@ void ScreenBufferTests::SetAutoWrapMode()
// Content should be clamped to the line width, overwriting the last char.
VERIFY_IS_TRUE(_ValidateLineContains({ 80 - 3, startLine }, L"abf", attributes));
VERIFY_ARE_EQUAL(til::point(79, startLine), cursor.GetPosition());
// Writing a wide glyph into the last 2 columns and overwriting it with a narrow one.
cursor.SetPosition({ 80 - 3, startLine });
stateMachine.ProcessString(L"a\U0001F604b");
VERIFY_IS_TRUE(_ValidateLineContains({ 80 - 3, startLine }, L"a b", attributes));
VERIFY_ARE_EQUAL(til::point(79, startLine), cursor.GetPosition());
// Writing a wide glyph into the last column and overwriting it with a narrow one.
cursor.SetPosition({ 80 - 3, startLine });
stateMachine.ProcessString(L"ab\U0001F604c");
VERIFY_IS_TRUE(_ValidateLineContains({ 80 - 3, startLine }, L"abc", attributes));
VERIFY_ARE_EQUAL(til::point(79, startLine), cursor.GetPosition());
Log::Comment(L"When DECAWM is set, output is wrapped again.");
stateMachine.ProcessString(L"\x1b[?7h");

View File

@@ -147,6 +147,7 @@ class TextBufferTests
TEST_METHOD(TestBurrito);
TEST_METHOD(TestOverwriteChars);
TEST_METHOD(TestRowReplaceText);
TEST_METHOD(TestAppendRTFText);
@@ -2046,6 +2047,87 @@ void TextBufferTests::TestOverwriteChars()
#undef complex1
}
void TextBufferTests::TestRowReplaceText()
{
static constexpr til::size bufferSize{ 10, 3 };
static constexpr UINT cursorSize = 12;
const TextAttribute attr{ 0x7f };
TextBuffer buffer{ bufferSize, attr, cursorSize, false, _renderer };
auto& row = buffer.GetRowByOffset(0);
#define complex L"\U0001F41B"
struct Test
{
const wchar_t* description;
struct
{
std::wstring_view text;
til::CoordType columnBegin = 0;
til::CoordType columnLimit = 0;
} input;
struct
{
std::wstring_view text;
til::CoordType columnEnd = 0;
til::CoordType columnBeginDirty = 0;
til::CoordType columnEndDirty = 0;
} expected;
std::wstring_view expectedRow;
};
static constexpr std::array tests{
Test{
L"Not enough space -> early exit",
{ complex, 2, 2 },
{ complex, 2, 2, 2 },
L" ",
},
Test{
L"Exact right amount of space",
{ complex, 2, 4 },
{ L"", 4, 2, 4 },
L" " complex L" ",
},
Test{
L"Not enough space -> columnEnd = columnLimit",
{ complex complex, 0, 3 },
{ complex, 3, 0, 4 },
complex L" ",
},
Test{
L"Too much to fit into the row",
{ complex L"b" complex L"c" complex L"abcd", 0, til::CoordTypeMax },
{ L"cd", 10, 0, 10 },
complex L"b" complex L"c" complex L"ab",
},
Test{
L"Overwriting wide glyphs dirties both cells, but leaves columnEnd at the end of the text",
{ L"efg", 1, til::CoordTypeMax },
{ L"", 4, 0, 5 },
L" efg c" complex L"ab",
},
};
for (const auto& t : tests)
{
Log::Comment(t.description);
RowWriteState actual{
.text = t.input.text,
.columnBegin = t.input.columnBegin,
.columnLimit = t.input.columnLimit,
};
row.ReplaceText(actual);
VERIFY_ARE_EQUAL(t.expected.text, actual.text);
VERIFY_ARE_EQUAL(t.expected.columnEnd, actual.columnEnd);
VERIFY_ARE_EQUAL(t.expected.columnBeginDirty, actual.columnBeginDirty);
VERIFY_ARE_EQUAL(t.expected.columnEndDirty, actual.columnEndDirty);
VERIFY_ARE_EQUAL(t.expectedRow, row.GetText());
}
#undef complex
}
void TextBufferTests::TestAppendRTFText()
{
{

View File

@@ -17,6 +17,8 @@
// Block minwindef.h min/max macros to prevent <algorithm> conflict
#define NOMINMAX
// Exclude rarely-used stuff from Windows headers
#define WIN32_LEAN_AND_MEAN
#include <algorithm>
#include <atomic>
@@ -52,21 +54,19 @@
#include <vector>
// WIL
#include <wil/Common.h>
#include <wil/Result.h>
#include <wil/nt_result_macros.h>
#include <wil/resource.h>
#include <wil/wistd_memory.h>
#include <wil/stl.h>
#include <wil/com.h>
#include <wil/stl.h>
#include <wil/filesystem.h>
#include <wil/win32_helpers.h>
// Due to the use of RESOURCE_SUPPRESS_STL in result.h, we need to include resource.h first, which happens
// implicitly through the includes above. If RESOURCE_SUPPRESS_STL is gone, the order doesn't matter anymore.
#include <wil/result.h>
#include <wil/nt_result_macros.h>
// GSL
// Block GSL Multi Span include because it both has C++17 deprecated iterators
// and uses the C-namespaced "max" which conflicts with Windows definitions.
#define GSL_MULTI_SPAN_H
#include <gsl/gsl>
#include <gsl/gsl_util>
#include <gsl/pointers>
// CppCoreCheck
#include <CppCoreCheck/Warnings.h>

View File

@@ -256,33 +256,6 @@ private:
std::unique_ptr<TextBuffer> m_backupTextBufferInfo;
std::unique_ptr<INPUT_READ_HANDLE_DATA> m_readHandle;
struct TestString
{
std::wstring_view string;
bool wide = false;
};
static void applyTestString(ROW* pRow, const auto& testStrings)
{
uint16_t x = 0;
for (const auto& t : testStrings)
{
if (t.wide)
{
pRow->ReplaceCharacters(x, 2, t.string);
x += 2;
}
else
{
for (const auto& ch : t.string)
{
pRow->ReplaceCharacters(x, 1, { &ch, 1 });
x += 1;
}
}
}
}
void FillRow(ROW* pRow, bool wrapForced)
{
// fill a row
@@ -290,15 +263,13 @@ private:
// か = \x304b
// き = \x304d
static constexpr std::array testStrings{
TestString{ L"AB" },
TestString{ L"\x304b", true },
TestString{ L"C" },
TestString{ L"\x304d", true },
TestString{ L"DE " },
};
applyTestString(pRow, testStrings);
uint16_t column = 0;
for (const auto& ch : std::wstring_view{ L"AB\u304bC\u304dDE " })
{
const uint16_t width = ch >= 0x80 ? 2 : 1;
pRow->ReplaceCharacters(column, width, { &ch, 1 });
column += width;
}
// A = bright red on dark gray
// This string starts at index 0

View File

@@ -6,6 +6,8 @@
namespace til // Terminal Implementation Library. Also: "Today I Learned"
{
using CoordType = int32_t;
inline constexpr CoordType CoordTypeMin = INT32_MIN;
inline constexpr CoordType CoordTypeMax = INT32_MAX;
namespace details
{

View File

@@ -479,6 +479,15 @@ namespace til // Terminal Implementation Library. Also: "Today I Learned"
_replace_unchecked(start_index, end_index, replacements);
}
// Replace the range [start_index, end_index) with replacements.
// If end_index is larger than size() it's set to size().
// start_index must be smaller or equal to end_index.
void replace(size_type start_index, size_type end_index, const basic_rle& replacements)
{
_check_indices(start_index, end_index);
_replace_unchecked(start_index, end_index, replacements._runs);
}
// Replaces every instance of old_value in this vector with new_value.
void replace_values(const value_type& old_value, const value_type& new_value)
{

View File

@@ -59,6 +59,30 @@ namespace til
return { ptr, len };
}
// Removes the first code point off of `wstr` and returns the rest.
constexpr std::wstring_view utf16_pop(std::wstring_view wstr) noexcept
{
auto it = wstr.begin();
const auto end = wstr.end();
if (it != end)
{
const auto wch = *it;
++it;
if (is_surrogate(wch))
{
const auto wch2 = it != end ? *it : wchar_t{};
if (is_leading_surrogate(wch) && is_trailing_surrogate(wch2))
{
++it;
}
}
}
return { it, end };
}
// Splits a UTF16 string into codepoints, yielding `wstring_view`s of UTF16 text. Use it as:
// for (const auto& str : til::utf16_iterator{ input }) { ... }
struct utf16_iterator

View File

@@ -80,11 +80,12 @@ void AdaptDispatch::_WriteToBuffer(const std::wstring_view string)
// Turn off the cursor until we're done, so it isn't refreshed unnecessarily.
cursor.SetIsOn(false);
// The width at which we wrap is determined by the line rendition attribute.
auto lineWidth = textBuffer.GetLineWidth(cursorPosition.y);
RowWriteState state{
.text = string,
.columnLimit = textBuffer.GetLineWidth(cursorPosition.y),
};
auto stringPosition = string.cbegin();
while (stringPosition < string.cend())
while (!state.text.empty())
{
if (cursor.IsDelayedEOLWrap() && wrapAtEOL)
{
@@ -97,64 +98,59 @@ void AdaptDispatch::_WriteToBuffer(const std::wstring_view string)
_api.LineFeed(true, true);
cursorPosition = cursor.GetPosition();
// We need to recalculate the width when moving to a new line.
lineWidth = textBuffer.GetLineWidth(cursorPosition.y);
state.columnLimit = textBuffer.GetLineWidth(cursorPosition.y);
}
}
const OutputCellIterator it(std::wstring_view{ stringPosition, string.cend() }, attributes);
if (_modes.test(Mode::InsertReplace))
{
// If insert-replace mode is enabled, we first measure how many cells
// the string will occupy, and scroll the target area right by that
// amount to make space for the incoming text.
const OutputCellIterator it(state.text, attributes);
auto measureIt = it;
while (measureIt && measureIt.GetCellDistance(it) < lineWidth)
while (measureIt && measureIt.GetCellDistance(it) < state.columnLimit)
{
measureIt++;
++measureIt;
}
const auto row = cursorPosition.y;
const auto cellCount = measureIt.GetCellDistance(it);
_ScrollRectHorizontally(textBuffer, { cursorPosition.x, row, lineWidth, row + 1 }, cellCount);
_ScrollRectHorizontally(textBuffer, { cursorPosition.x, row, state.columnLimit, row + 1 }, cellCount);
}
const auto itEnd = textBuffer.WriteLine(it, cursorPosition, wrapAtEOL, lineWidth - 1);
if (itEnd.GetInputDistance(it) == 0)
state.columnBegin = cursorPosition.x;
const auto textPositionBefore = state.text.data();
textBuffer.WriteLine(cursorPosition.y, wrapAtEOL, attributes, state);
const auto textPositionAfter = state.text.data();
if (state.columnBeginDirty != state.columnEndDirty)
{
// If we haven't written anything out because there wasn't enough space,
// we move the cursor to the end of the line so that it's forced to wrap.
cursorPosition.x = lineWidth;
// But if wrapping is disabled, we also need to move to the next string
// position, otherwise we'll be stuck in this loop forever.
if (!wrapAtEOL)
{
stringPosition++;
}
}
else
{
const auto cellCount = itEnd.GetCellDistance(it);
const auto changedRect = til::rect{ cursorPosition, til::size{ cellCount, 1 } };
const til::rect changedRect{ state.columnBeginDirty, cursorPosition.y, state.columnEndDirty, cursorPosition.y + 1 };
_api.NotifyAccessibilityChange(changedRect);
stringPosition += itEnd.GetInputDistance(it);
cursorPosition.x += cellCount;
}
if (cursorPosition.x >= lineWidth)
// If we're past the end of the line, we need to clamp the cursor
// back into range, and if wrapping is enabled, set the delayed wrap
// flag. The wrapping only occurs once another character is output.
const auto isWrapping = state.columnEnd >= state.columnLimit;
cursorPosition.x = isWrapping ? state.columnLimit - 1 : state.columnEnd;
cursor.SetPosition(cursorPosition);
if (isWrapping)
{
// If we're past the end of the line, we need to clamp the cursor
// back into range, and if wrapping is enabled, set the delayed wrap
// flag. The wrapping only occurs once another character is output.
cursorPosition.x = lineWidth - 1;
cursor.SetPosition(cursorPosition);
if (wrapAtEOL)
{
cursor.DelayEOLWrap();
}
}
else
{
cursor.SetPosition(cursorPosition);
else if (textPositionBefore == textPositionAfter)
{
// We want to wrap, but we're not allowed to and we failed to write even a single character into the row.
// This can only mean one thing! The DECAWM Autowrap mode is disabled ("\x1b[?7l") and we tried writing a
// wide glyph into the last column. ROW::Write() returns the lineWidth and leaves stringIterator untouched.
// To prevent a deadlock, because stringIterator never advances, we need to throw that glyph away.
textBuffer.ConsumeGrapheme(state.text);
}
}
}

View File

@@ -190,7 +190,7 @@ class RunLengthEncodingTests
// We're testing replace() elsewhere, but this is special:
// This ensures that even if we're default constructed we can add data.
rle.replace(0, 0, { 1, 5 });
rle.replace(0, 0, rle_type{ 1, 5 });
VERIFY_ARE_EQUAL(5u, rle.size());
VERIFY_IS_FALSE(rle.empty());
}

View File

@@ -38,31 +38,12 @@
<DisplayString>{{LT({Left}, {Top}) RB({Right}, {Bottom}) In:[{Right-Left+1} x {Bottom-Top+1}] Ex:[{Right-Left} x {Bottom-Top}]}}</DisplayString>
</Type>
<Type Name="CharRowCell">
<DisplayString Condition="_attr._glyphStored">Stored Glyph, go to UnicodeStorage.</DisplayString>
<DisplayString Condition="_attr._attribute == 0">{_wch,X} Single</DisplayString>
<DisplayString Condition="_attr._attribute == 1">{_wch,X} Lead</DisplayString>
<DisplayString Condition="_attr._attribute == 2">{_wch,X} Trail</DisplayString>
</Type>
<Type Name="ATTR_ROW">
<Expand>
<ExpandedItem>_data</ExpandedItem>
</Expand>
</Type>
<Type Name="CharRow">
<DisplayString>{{ wrap={_wrapForced} padded={_doubleBytePadded} }}</DisplayString>
<Expand>
<ExpandedItem>_data</ExpandedItem>
</Expand>
</Type>
<Type Name="ROW">
<DisplayString>{{ id={_id} width={_rowWidth} }}</DisplayString>
<DisplayString>{_chars.data(),[_charOffsets[_columnCount]]}</DisplayString>
<StringView>_chars.data(),[_charOffsets[_columnCount]]</StringView>
<Expand>
<Item Name="_charRow">_charRow</Item>
<Item Name="_attrRow">_attrRow</Item>
<Item Name="_chars">_chars.data(),[_charOffsets[_columnCount]]</Item>
<Item Name="_charOffsets">_charOffsets.data(),[_charOffsets.size()]</Item>
</Expand>
</Type>