mirror of
https://github.com/microsoft/terminal.git
synced 2025-12-19 18:11:39 -05:00
Clean up CodepointWidthDetector (#14396)
My long-term plan is to replace the `CodepointWidth` enum with a simple integer return value that indicates the amount of columns a codepoint is wide. This is necessary so that we can return 0 for ZWJs (zero width joiners). This initial commit represents a cleanup effort around `CodepointWidthDetector`. Since less code runs faster, this change has the nice side-effect of running roughly 5-10% faster across the board. It also drops the binary size by ~1.2kB. ## Validation Steps Performed * `CodepointWidthDetectorTests` passes ✅ * U+26bf (``"`u{26bf}"`` inside pwsh) is a wide glyph in OpenConsole and narrow one in Windows Terminal ✅
This commit is contained in:
@@ -20,8 +20,8 @@
|
||||
# significantly smaller, which would provide a performance win on the admittedly
|
||||
# extremely rare occasion that we should need to regenerate our table.
|
||||
#
|
||||
# Invoke as ./Generate-xxx ucd.nounihan.flat.xml -Pack | Out-File -Encoding
|
||||
# UTF-8 Temporary.cpp
|
||||
# Invoke this script from the root of this repository as:
|
||||
# .\tools\Generate-CodepointWidthsFromUCD.ps1 -Path .\path\to\ucd.nounihan.flat.xml -OverridePath .\src\types\unicode_width_overrides.xml -Pack
|
||||
#
|
||||
# [1]: https://www.unicode.org/Public/UCD/latest/ucdxml/
|
||||
# [2]: https://www.unicode.org/reports/tr42/
|
||||
@@ -43,15 +43,13 @@ Param(
|
||||
[string]$OverridePath = "overrides.xml",
|
||||
|
||||
[switch]$Pack, # Pack tightly based on width
|
||||
[switch]$NoOverrides, # Do not include overrides
|
||||
[switch]$Full = $False # Include Narrow codepoints
|
||||
[switch]$NoOverrides # Do not include overrides
|
||||
)
|
||||
|
||||
Enum CodepointWidth {
|
||||
Narrow;
|
||||
Wide;
|
||||
Ambiguous;
|
||||
Invalid;
|
||||
}
|
||||
|
||||
# UCD Functions {{{
|
||||
@@ -82,8 +80,8 @@ Function Get-UCDEntryWidth($entry) {
|
||||
"W" { [CodepointWidth]::Wide; Return }
|
||||
"F" { [CodepointWidth]::Wide; Return }
|
||||
"A" { [CodepointWidth]::Ambiguous; Return }
|
||||
default { throw "Unexpected East_Asian_Width property" }
|
||||
}
|
||||
[CodepointWidth]::Invalid
|
||||
}
|
||||
|
||||
Function Get-UCDEntryFlags($entry) {
|
||||
@@ -224,20 +222,10 @@ $UCDRepertoire = $InputObject.ucd.repertoire.ChildNodes | Sort-Object {
|
||||
}
|
||||
}
|
||||
|
||||
If (-not $Full) {
|
||||
$UCDRepertoire = $UCDRepertoire | Where-Object {
|
||||
# Select everything Wide/Ambiguous/Full OR Emoji w/ Emoji Presentation
|
||||
($_.ea -notin "N", "Na", "H") -or ($_.Emoji -eq "Y" -and $_.EPres -eq "Y")
|
||||
}
|
||||
}
|
||||
|
||||
$ranges = [UnicodeRangeList]::New(1024)
|
||||
|
||||
$c = 0
|
||||
ForEach($v in $UCDRepertoire) {
|
||||
$range = [UnicodeRange]::new($v)
|
||||
$c += $range.Length()
|
||||
|
||||
If ($ranges.Count -gt 0 -and $ranges[$ranges.Count - 1].Merge($range)) {
|
||||
# Merged into last entry
|
||||
Continue
|
||||
@@ -260,9 +248,16 @@ If (-not $NoOverrides) {
|
||||
}
|
||||
}
|
||||
|
||||
$ranges.RemoveAll({ $args[0].Width -eq [CodepointWidth]::Narrow }) | Out-Null
|
||||
|
||||
$c = 0
|
||||
ForEach($_ in $ranges) {
|
||||
$c += $_.End - $_.Start + 1
|
||||
}
|
||||
|
||||
# Emit Code
|
||||
" // Generated by {0} -Pack:{1} -Full:{2} -NoOverrides:{3}" -f $MyInvocation.MyCommand.Name, $Pack, $Full, $NoOverrides
|
||||
" // on {0} (UTC) from {1}." -f (Get-Date -AsUTC), $InputObject.ucd.description
|
||||
" // on {0} from {1}." -f (Get-Date -AsUTC -Format "u"), $InputObject.ucd.description
|
||||
" // {0} (0x{0:X}) codepoints covered." -f $c
|
||||
If (-not $NoOverrides) {
|
||||
" // {0} (0x{0:X}) codepoints overridden." -f $overrideCount
|
||||
@@ -270,11 +265,12 @@ If (-not $NoOverrides) {
|
||||
}
|
||||
" static constexpr std::array<UnicodeRange, {0}> s_wideAndAmbiguousTable{{" -f $ranges.Count
|
||||
ForEach($_ in $ranges) {
|
||||
$isAmbiguous = $_.Width -eq [CodepointWidth]::Ambiguous
|
||||
$comment = ""
|
||||
if ($null -ne $_.Comment) {
|
||||
# We only vend comments when we aren't packing tightly
|
||||
$comment = " // {0}" -f $_.Comment
|
||||
}
|
||||
" UnicodeRange{{ 0x{0:x}, 0x{1:x}, CodepointWidth::{2} }},{3}" -f $_.Start, $_.End, $_.Width, $comment
|
||||
" UnicodeRange{{ 0x{0:x}, 0x{1:x}, {2} }},{3}" -f $_.Start, $_.End, [int]$isAmbiguous, $comment
|
||||
}
|
||||
" };"
|
||||
|
||||
Reference in New Issue
Block a user