Implement grapheme clusters (#16916)

First, this adds `GraphemeTableGen` which
* parses `ucd.nounihan.grouped.xml`
* computes the cluster break property for each codepoint
* computes the East Asian Width property for each codepoint
* compresses everything into a 4-stage trie
* computes a LUT of cluster break rules between 2 codepoints
* and serializes everything to C++ tables and helper functions

Next, this adds `GraphemeTestTableGen` which
* parses `GraphemeBreakTest.txt`
* splits each test into graphemes and break opportunities
* and serializes everything to a C++ table for use as unit tests

`CodepointWidthDetector.cpp` was rewritten from scratch to
* use an iterator struct (`GraphemeState`) to maintain state
* accumulate codepoints until a break opportunity arises
* accumulate the total width of a grapheme
* support 3 different measurement modes: Grapheme clusters,
  `wcswidth`-style, and a mode identical to the old conhost

With this in place the following changes were made:
* `ROW::WriteHelper::_replaceTextUnicode` now uses the new
  grapheme cluster text iterators
* The same function was modified to join new text with existing
  contents of the current cell if they join to form a cluster
* Otherwise, a ton of places were modified to funnel the selection
  of the measurement mode over from WT's settings to ConPTY

This is part of #1472

## Validation Steps Performed
* So many tests 
* https://github.com/apparebit/demicode works fantastic 
* UTF8-torture-test.txt works fantastic 
This commit is contained in:
Leonard Hecker 2024-06-26 20:40:27 +02:00 committed by GitHub
parent 174dcb9091
commit cb48babe9d
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
54 changed files with 3817 additions and 750 deletions

View File

@ -146,6 +146,7 @@ bytebuffer
cac
cacafire
CALLCONV
CANDRABINDU
capslock
CARETBLINKINGENABLED
CARRIAGERETURN
@ -156,6 +157,7 @@ CBash
cbiex
CBN
cbt
Ccc
CCCBB
cch
CCHAR
@ -293,7 +295,6 @@ CREATESTRUCT
CREATESTRUCTW
createvpack
crisman
CRLFs
crloew
CRTLIBS
csbi
@ -594,6 +595,7 @@ fesb
FFAF
ffd
FFDE
FFFD
FFFDb
fgbg
FGCOLOR
@ -614,6 +616,7 @@ FINDREGEX
FINDSTRINGEXACT
FINDUP
FIter
FITZPATRICK
FIXEDFILEINFO
Flg
flyouts
@ -882,10 +885,12 @@ jconcpp
JLO
JOBOBJECT
JOBOBJECTINFOCLASS
JONGSEONG
JPN
jsoncpp
jsprovider
jumplist
JUNGSEONG
KAttrs
kawa
Kazu
@ -904,6 +909,7 @@ keyups
KILLACTIVE
KILLFOCUS
kinda
KIYEOK
KLF
KLMNO
KLMNOPQRST
@ -1013,6 +1019,7 @@ luma
lval
LVB
LVERTICAL
LVT
LWA
LWIN
lwkmvj
@ -1209,6 +1216,7 @@ ntuser
NTVDM
ntverp
nugetversions
NUKTA
nullness
nullonfailure
nullopts
@ -1471,7 +1479,6 @@ READMODE
rectread
redef
redefinable
Redir
redist
REDSCROLL
REFCLSID
@ -1489,6 +1496,7 @@ renderengine
rendersize
reparented
reparenting
REPH
replatformed
Replymessage
reportfileaccesses
@ -1519,6 +1527,7 @@ rgw
RIGHTALIGN
RIGHTBUTTON
riid
ris
RIS
roadmap
robomac
@ -1924,6 +1933,7 @@ vga
vgaoem
viewkind
viewports
VIRAMA
Virt
VIRTTERM
vkey
@ -1974,8 +1984,8 @@ wchars
WCIA
WCIW
WCSHELPER
wcsicmp
wcsrev
wcswidth
wddm
wddmcon
WDDMCONSOLECONTEXT
@ -2131,6 +2141,7 @@ XFORM
XIn
XManifest
XMath
XNamespace
xorg
XPan
XResource
@ -2162,6 +2173,7 @@ Zabcdefghijklmn
Zabcdefghijklmnopqrstuvwxyz
ZCmd
ZCtrl
ZWJs
zxcvbnm
ZYXWVU
ZYXWVUTd

View File

@ -5,10 +5,8 @@
#include "Row.hpp"
#include <isa_availability.h>
#include <til/unicode.h>
#include "textBuffer.hpp"
#include "../../types/inc/GlyphWidth.hpp"
#include "../../types/inc/CodepointWidthDetector.hpp"
// It would be nice to add checked array access in the future, but it's a little annoying to do so without impacting
// performance (including Debug performance). Other languages are a little bit more ergonomic there than C++.
@ -568,6 +566,7 @@ void ROW::ReplaceAttributes(const til::CoordType beginIndex, const til::CoordTyp
void ROW::ReplaceCharacters(til::CoordType columnBegin, til::CoordType width, const std::wstring_view& chars)
try
{
assert(width >= 1 && width <= 2);
WriteHelper h{ *this, columnBegin, _columnCount, chars };
if (!h.IsValid())
{
@ -666,56 +665,91 @@ catch (...)
[[msvc::forceinline]] void ROW::WriteHelper::_replaceTextUnicode(size_t ch, std::wstring_view::const_iterator it) noexcept
{
const auto end = chars.end();
auto& cwd = CodepointWidthDetector::Singleton();
while (it != end)
// Check if the new text joins with the existing contents of the row to form a single grapheme cluster.
if (it == chars.begin())
{
unsigned int width = 1;
auto ptr = &*it;
const auto wch = *ptr;
size_t advance = 1;
++it;
// Even in our slow-path we can avoid calling IsGlyphFullWidth if the current character is ASCII.
// It also allows us to skip the surrogate pair decoding at the same time.
if (wch >= 0x80)
auto colPrev = colBeg;
while (colPrev > 0 && row._uncheckedIsTrailer(--colPrev))
{
if (til::is_surrogate(wch))
}
const auto chPrev = row._uncheckedCharOffset(colPrev);
const std::wstring_view charsPrev{ row._chars.data() + chPrev, ch - chPrev };
GraphemeState state;
cwd.GraphemeNext(state, charsPrev);
cwd.GraphemeNext(state, chars);
if (state.len > 0)
{
colBegDirty = colPrev;
colEnd = colPrev;
const auto width = std::max(1, state.width);
const auto colEndNew = gsl::narrow_cast<uint16_t>(colEnd + width);
if (colEndNew > colLimit)
{
if (it != end && til::is_leading_surrogate(wch) && til::is_trailing_surrogate(*it))
{
advance = 2;
++it;
}
else
{
ptr = &UNICODE_REPLACEMENT;
}
colEndDirty = colLimit;
charsConsumed = ch - chBeg;
return;
}
width = IsGlyphFullWidth({ ptr, advance }) + 1u;
}
// Fill our char-offset buffer with 1 entry containing the mapping from the
// current column (colEnd) to the start of the glyph in the string (ch)...
til::at(row._charOffsets, colEnd++) = gsl::narrow_cast<uint16_t>(chPrev);
// ...followed by 0-N entries containing an indication that the
// columns are just a wide-glyph extension of the preceding one.
while (colEnd < colEndNew)
{
til::at(row._charOffsets, colEnd++) = gsl::narrow_cast<uint16_t>(chPrev | CharOffsetsTrailer);
}
const auto colEndNew = gsl::narrow_cast<uint16_t>(colEnd + width);
if (colEndNew > colLimit)
ch += state.len;
it += state.len;
}
}
else
{
// The non-ASCII character we have encountered may be a combining mark, like "a^" which is then displayed as "â".
// In order to recognize both characters as a single grapheme, we need to back up by 1 ASCII character
// and let MeasureNext() find the next proper grapheme boundary.
--colEnd;
--ch;
--it;
}
if (const auto end = chars.end(); it != end)
{
GraphemeState state{ .beg = &*it };
do
{
colEndDirty = colLimit;
charsConsumed = ch - chBeg;
return;
}
cwd.GraphemeNext(state, chars);
// Fill our char-offset buffer with 1 entry containing the mapping from the
// current column (colEnd) to the start of the glyph in the string (ch)...
til::at(row._charOffsets, colEnd++) = gsl::narrow_cast<uint16_t>(ch);
// ...followed by 0-N entries containing an indication that the
// columns are just a wide-glyph extension of the preceding one.
while (colEnd < colEndNew)
{
til::at(row._charOffsets, colEnd++) = gsl::narrow_cast<uint16_t>(ch | CharOffsetsTrailer);
}
const auto width = std::max(1, state.width);
const auto colEndNew = gsl::narrow_cast<uint16_t>(colEnd + width);
if (colEndNew > colLimit)
{
colEndDirty = colLimit;
charsConsumed = ch - chBeg;
return;
}
ch += advance;
// Fill our char-offset buffer with 1 entry containing the mapping from the
// current column (colEnd) to the start of the glyph in the string (ch)...
til::at(row._charOffsets, colEnd++) = gsl::narrow_cast<uint16_t>(ch);
// ...followed by 0-N entries containing an indication that the
// columns are just a wide-glyph extension of the preceding one.
while (colEnd < colEndNew)
{
til::at(row._charOffsets, colEnd++) = gsl::narrow_cast<uint16_t>(ch | CharOffsetsTrailer);
}
ch += state.len;
it += state.len;
} while (it != end);
}
colEndDirty = colEnd;
@ -1058,7 +1092,7 @@ std::wstring_view ROW::GetText() const noexcept
std::wstring_view ROW::GetText(til::CoordType columnBegin, til::CoordType columnEnd) const noexcept
{
const til::CoordType columns = _columnCount;
const auto columns = GetReadableColumnCount();
const auto colBeg = clamp(columnBegin, 0, columns);
const auto colEnd = clamp(columnEnd, colBeg, columns);
const size_t chBeg = _uncheckedCharOffset(gsl::narrow_cast<size_t>(colBeg));

View File

@ -2,14 +2,12 @@
// Licensed under the MIT license.
#include "precomp.h"
#include "textBuffer.hpp"
#include <til/hash.h>
#include <til/unicode.h>
#include "UTextAdapter.h"
#include "../../types/inc/GlyphWidth.hpp"
#include "../../types/inc/CodepointWidthDetector.hpp"
#include "../renderer/base/renderer.hpp"
#include "../types/inc/utils.hpp"
#include "search.h"
@ -376,17 +374,23 @@ TextBufferCellIterator TextBuffer::GetCellDataAt(const til::point at, const View
// Given the character offset `position` in the `chars` string, this function returns the starting position of the next grapheme.
// For instance, given a `chars` of L"x\uD83D\uDE42y" and a `position` of 1 it'll return 3.
// GraphemePrev would do the exact inverse of this operation.
// In the future, these functions are expected to also deliver information about how many columns a grapheme occupies.
// (I know that mere UTF-16 code point iteration doesn't handle graphemes, but that's what we're working towards.)
size_t TextBuffer::GraphemeNext(const std::wstring_view& chars, size_t position) noexcept
{
return til::utf16_iterate_next(chars, position);
auto& cwd = CodepointWidthDetector::Singleton();
#pragma warning(suppress : 26481) // Don't use pointer arithmetic. Use span instead (bounds.1).
GraphemeState state{ .beg = chars.data() + position };
cwd.GraphemeNext(state, chars);
return position + state.len;
}
// It's the counterpart to GraphemeNext. See GraphemeNext.
size_t TextBuffer::GraphemePrev(const std::wstring_view& chars, size_t position) noexcept
{
return til::utf16_iterate_prev(chars, position);
auto& cwd = CodepointWidthDetector::Singleton();
#pragma warning(suppress : 26481) // Don't use pointer arithmetic. Use span instead (bounds.1).
GraphemeState state{ .beg = chars.data() + position };
cwd.GraphemePrev(state, chars);
return position - state.len;
}
// Ever wondered how much space a piece of text needs before inserting it? This function will tell you!
@ -413,7 +417,7 @@ size_t TextBuffer::FitTextIntoColumns(const std::wstring_view& chars, til::Coord
{
}
const auto dist = gsl::narrow_cast<size_t>(it - beg);
auto dist = gsl::narrow_cast<size_t>(it - beg);
auto col = gsl::narrow_cast<til::CoordType>(dist);
if (it == asciiEnd) [[likely]]
@ -423,33 +427,26 @@ size_t TextBuffer::FitTextIntoColumns(const std::wstring_view& chars, til::Coord
}
// Unicode slow-path where we need to count text and columns separately.
for (;;)
auto& cwd = CodepointWidthDetector::Singleton();
const auto len = chars.size();
// The non-ASCII character we have encountered may be a combining mark, like "a^" which is then displayed as "â".
// In order to recognize both characters as a single grapheme, we need to back up by 1 ASCII character
// and let GraphemeNext() find the next proper grapheme boundary.
if (dist != 0)
{
auto ptr = &*it;
const auto wch = *ptr;
size_t len = 1;
dist--;
col--;
}
col++;
#pragma warning(suppress : 26481) // Don't use pointer arithmetic. Use span instead (bounds.1).
GraphemeState state{ .beg = chars.data() + dist };
// Even in our slow-path we can avoid calling IsGlyphFullWidth if the current character is ASCII.
// It also allows us to skip the surrogate pair decoding at the same time.
if (wch >= 0x80)
{
if (til::is_surrogate(wch))
{
const auto it2 = it + 1;
if (til::is_leading_surrogate(wch) && it2 != end && til::is_trailing_surrogate(*it2))
{
len = 2;
}
else
{
ptr = &UNICODE_REPLACEMENT;
}
}
col += IsGlyphFullWidth({ ptr, len });
}
while (dist < len)
{
cwd.GraphemeNext(state, chars);
dist += state.len;
col += state.width;
// If we ran out of columns, we need to always return `columnLimit` and not `cols`,
// because if we tried inserting a wide glyph into just 1 remaining column it will
@ -458,17 +455,13 @@ size_t TextBuffer::FitTextIntoColumns(const std::wstring_view& chars, til::Coord
if (col > columnLimit)
{
columns = columnLimit;
return gsl::narrow_cast<size_t>(it - beg);
}
// But if we simply ran out of text we just need to return the actual number of columns.
it += len;
if (it == end)
{
columns = col;
return chars.size();
return dist;
}
}
// But if we simply ran out of text we just need to return the actual number of columns.
columns = col;
return chars.size();
}
// Pretend as if `position` is a regular cursor in the TextBuffer.

View File

@ -4,18 +4,11 @@
#include "pch.h"
#include "TerminalPage.h"
#include "TerminalPage.g.cpp"
#include "RenameWindowRequestedArgs.g.cpp"
#include "RequestMoveContentArgs.g.cpp"
#include "RequestReceiveContentArgs.g.cpp"
#include "LaunchPositionRequest.g.cpp"
#include <filesystem>
#include <inc/WindowingBehavior.h>
#include <LibraryResources.h>
#include <TerminalCore/ControlKeyStates.hpp>
#include <til/latch.h>
#include <Utils.h>
#include "../../types/inc/utils.hpp"
#include "App.h"
@ -24,7 +17,12 @@
#include "SettingsPaneContent.h"
#include "ScratchpadContent.h"
#include "TabRowControl.h"
#include "Utils.h"
#include "TerminalPage.g.cpp"
#include "RenameWindowRequestedArgs.g.cpp"
#include "RequestMoveContentArgs.g.cpp"
#include "RequestReceiveContentArgs.g.cpp"
#include "LaunchPositionRequest.g.cpp"
using namespace winrt;
using namespace winrt::Microsoft::Terminal::Control;
@ -1258,6 +1256,20 @@ namespace winrt::TerminalApp::implementation
TerminalSettings settings,
const bool inheritCursor)
{
static const auto textMeasurement = [&]() -> std::wstring_view {
switch (_settings.GlobalSettings().TextMeasurement())
{
case TextMeasurement::Graphemes:
return L"graphemes";
case TextMeasurement::Wcswidth:
return L"wcswidth";
case TextMeasurement::Console:
return L"console";
default:
return {};
}
}();
TerminalConnection::ITerminalConnection connection{ nullptr };
auto connectionType = profile.ConnectionType();
@ -1329,6 +1341,11 @@ namespace winrt::TerminalApp::implementation
}
}
if (!textMeasurement.empty())
{
valueSet.Insert(L"textMeasurement", Windows::Foundation::PropertyValue::CreateString(textMeasurement));
}
if (const auto id = settings.SessionId(); id != winrt::guid{})
{
valueSet.Insert(L"sessionId", Windows::Foundation::PropertyValue::CreateGuid(id));

View File

@ -5,7 +5,6 @@
#include "ConptyConnection.h"
#include <conpty-static.h>
#include <til/string.h>
#include <winternl.h>
#include "CTerminalHandoff.h"
@ -259,11 +258,39 @@ namespace winrt::Microsoft::Terminal::TerminalConnection::implementation
_cols = unbox_prop_or<uint32_t>(settings, L"initialCols", _cols);
_sessionId = unbox_prop_or<winrt::guid>(settings, L"sessionId", _sessionId);
_environment = settings.TryLookup(L"environment").try_as<Windows::Foundation::Collections::ValueSet>();
_inheritCursor = unbox_prop_or<bool>(settings, L"inheritCursor", _inheritCursor);
_profileGuid = unbox_prop_or<winrt::guid>(settings, L"profileGuid", _profileGuid);
const auto& initialEnvironment{ unbox_prop_or<winrt::hstring>(settings, L"initialEnvironment", L"") };
_flags = PSEUDOCONSOLE_RESIZE_QUIRK;
// If we're using an existing buffer, we want the new connection
// to reuse the existing cursor. When not setting this flag, the
// PseudoConsole sends a clear screen VT code which our renderer
// interprets into making all the previous lines be outside the
// current viewport.
const auto inheritCursor = unbox_prop_or<bool>(settings, L"inheritCursor", false);
if (inheritCursor)
{
_flags |= PSEUDOCONSOLE_INHERIT_CURSOR;
}
const auto textMeasurement = unbox_prop_or<winrt::hstring>(settings, L"textMeasurement", winrt::hstring{});
if (!textMeasurement.empty())
{
if (textMeasurement == L"graphemes")
{
_flags |= PSEUDOCONSOLE_GLYPH_WIDTH_GRAPHEMES;
}
else if (textMeasurement == L"wcswidth")
{
_flags |= PSEUDOCONSOLE_GLYPH_WIDTH_WCSWIDTH;
}
else if (textMeasurement == L"console")
{
_flags |= PSEUDOCONSOLE_GLYPH_WIDTH_CONSOLE;
}
}
const auto& initialEnvironment{ unbox_prop_or<winrt::hstring>(settings, L"initialEnvironment", L"") };
const bool reloadEnvironmentVariables = unbox_prop_or<bool>(settings, L"reloadEnvironmentVariables", false);
if (reloadEnvironmentVariables)
@ -318,19 +345,7 @@ namespace winrt::Microsoft::Terminal::TerminalConnection::implementation
// handoff from an already-started PTY process.
if (!_inPipe)
{
DWORD flags = PSEUDOCONSOLE_RESIZE_QUIRK;
// If we're using an existing buffer, we want the new connection
// to reuse the existing cursor. When not setting this flag, the
// PseudoConsole sends a clear screen VT code which our renderer
// interprets into making all the previous lines be outside the
// current viewport.
if (_inheritCursor)
{
flags |= PSEUDOCONSOLE_INHERIT_CURSOR;
}
THROW_IF_FAILED(_CreatePseudoConsoleAndPipes(til::unwrap_coord_size(dimensions), flags, &_inPipe, &_outPipe, &_hPC));
THROW_IF_FAILED(_CreatePseudoConsoleAndPipes(til::unwrap_coord_size(dimensions), _flags, &_inPipe, &_outPipe, &_hPC));
if (_initialParentHwnd != 0)
{

View File

@ -90,7 +90,7 @@ namespace winrt::Microsoft::Terminal::TerminalConnection::implementation
til::u8state _u8State{};
std::wstring _u16Str{};
std::array<char, 4096> _buffer{};
bool _inheritCursor{ false };
DWORD _flags{ 0 };
til::env _initialEnv{};
guid _profileGuid{};

View File

@ -18,6 +18,7 @@
#include "../../renderer/atlas/AtlasEngine.h"
#include "../../renderer/base/renderer.hpp"
#include "../../renderer/uia/UiaRenderer.hpp"
#include "../../types/inc/CodepointWidthDetector.hpp"
#include "ControlCore.g.cpp"
#include "SelectionColor.g.cpp"
@ -71,6 +72,23 @@ namespace winrt::Microsoft::Terminal::Control::implementation
_desiredFont{ DEFAULT_FONT_FACE, 0, DEFAULT_FONT_WEIGHT, DEFAULT_FONT_SIZE, CP_UTF8 },
_actualFont{ DEFAULT_FONT_FACE, 0, DEFAULT_FONT_WEIGHT, { 0, DEFAULT_FONT_SIZE }, CP_UTF8, false }
{
static const auto textMeasurementInit = [&]() {
TextMeasurementMode mode = TextMeasurementMode::Graphemes;
switch (settings.TextMeasurement())
{
case TextMeasurement::Wcswidth:
mode = TextMeasurementMode::Wcswidth;
break;
case TextMeasurement::Console:
mode = TextMeasurementMode::Console;
break;
default:
break;
}
CodepointWidthDetector::Singleton().Reset(mode);
return true;
}();
_settings = winrt::make_self<implementation::ControlSettings>(settings, unfocusedAppearance);
_terminal = std::make_shared<::Microsoft::Terminal::Core::Terminal>();
const auto lock = _terminal->LockForWriting();

View File

@ -18,6 +18,13 @@ namespace Microsoft.Terminal.Control
Direct3D11,
};
enum TextMeasurement
{
Graphemes,
Wcswidth,
Console,
};
runtimeclass FontSizeChangedArgs
{
Int32 Width { get; };

View File

@ -62,6 +62,7 @@ namespace Microsoft.Terminal.Control
Microsoft.Terminal.Control.GraphicsAPI GraphicsAPI { get; };
Boolean DisablePartialInvalidation { get; };
Boolean SoftwareRendering { get; };
Microsoft.Terminal.Control.TextMeasurement TextMeasurement { get; };
Boolean ShowMarks { get; };
Boolean UseBackgroundImageForWindow { get; };
Boolean RightClickContextMenu { get; };

View File

@ -41,5 +41,13 @@
<ToggleSwitch IsOn="{x:Bind ViewModel.SoftwareRendering, Mode=TwoWay}"
Style="{StaticResource ToggleSwitchInExpanderStyle}" />
</local:SettingContainer>
<local:SettingContainer x:Uid="Globals_TextMeasurement">
<ComboBox AutomationProperties.AccessibilityView="Content"
ItemTemplate="{StaticResource EnumComboBoxTemplate}"
ItemsSource="{x:Bind ViewModel.TextMeasurementList}"
SelectedItem="{x:Bind ViewModel.CurrentTextMeasurement, Mode=TwoWay}"
Style="{StaticResource ComboBoxSettingStyle}" />
</local:SettingContainer>
</StackPanel>
</Page>

View File

@ -17,5 +17,6 @@ namespace winrt::Microsoft::Terminal::Settings::Editor::implementation
_settings{ std::move(settings) }
{
INITIALIZE_BINDABLE_ENUM_SETTING(GraphicsAPI, GraphicsAPI, winrt::Microsoft::Terminal::Control::GraphicsAPI, L"Globals_GraphicsAPI_", L"Text");
INITIALIZE_BINDABLE_ENUM_SETTING(TextMeasurement, TextMeasurement, winrt::Microsoft::Terminal::Control::TextMeasurement, L"Globals_TextMeasurement_", L"Text");
}
}

View File

@ -16,6 +16,7 @@ namespace winrt::Microsoft::Terminal::Settings::Editor::implementation
GETSET_BINDABLE_ENUM_SETTING(GraphicsAPI, winrt::Microsoft::Terminal::Control::GraphicsAPI, _settings.GlobalSettings().GraphicsAPI);
PERMANENT_OBSERVABLE_PROJECTED_SETTING(_settings.GlobalSettings(), DisablePartialInvalidation);
PERMANENT_OBSERVABLE_PROJECTED_SETTING(_settings.GlobalSettings(), SoftwareRendering);
GETSET_BINDABLE_ENUM_SETTING(TextMeasurement, winrt::Microsoft::Terminal::Control::TextMeasurement, _settings.GlobalSettings().TextMeasurement);
private:
Model::CascadiaSettings _settings{ nullptr };

View File

@ -15,5 +15,7 @@ namespace Microsoft.Terminal.Settings.Editor
Windows.Foundation.Collections.IObservableVector<Microsoft.Terminal.Settings.Editor.EnumEntry> GraphicsAPIList { get; };
PERMANENT_OBSERVABLE_PROJECTED_SETTING(Boolean, DisablePartialInvalidation);
PERMANENT_OBSERVABLE_PROJECTED_SETTING(Boolean, SoftwareRendering);
IInspectable CurrentTextMeasurement;
Windows.Foundation.Collections.IObservableVector<Microsoft.Terminal.Settings.Editor.EnumEntry> TextMeasurementList { get; };
}
}

View File

@ -342,6 +342,24 @@
<value>When enabled, the terminal will use a software rasterizer (WARP). This setting should be left disabled under almost all circumstances.</value>
<comment>{Locked="WARP"} WARP is the "Windows Advanced Rasterization Platform".</comment>
</data>
<data name="Globals_TextMeasurement.Header" xml:space="preserve">
<value>Text measurement mode</value>
<comment>This text is shown next to a list of choices.</comment>
</data>
<data name="Globals_TextMeasurement.HelpText" xml:space="preserve">
<value>This changes the way incoming text is grouped into cells. The "Grapheme clusters" option is the most modern and Unicode-correct way to do so, while "wcswidth" is a common approach on UNIX, and "Windows Console" replicates the way it used to work on Windows. Changing this setting requires a restart of Windows Terminal and it only applies to applications launched from within it.</value>
</data>
<data name="Globals_TextMeasurement_Graphemes.Text" xml:space="preserve">
<value>Grapheme clusters</value>
<comment>The default choice between multiple graphics APIs.</comment>
</data>
<data name="Globals_TextMeasurement_Wcswidth.Text" xml:space="preserve">
<value>wcswidth</value>
<comment>{Locked="wcswidth"}</comment>
</data>
<data name="Globals_TextMeasurement_Console.Text" xml:space="preserve">
<value>Windows Console</value>
</data>
<data name="Globals_InitialCols.Text" xml:space="preserve">
<value>Columns</value>
<comment>Header for a control to choose the number of columns in the terminal's text grid.</comment>

View File

@ -40,6 +40,7 @@ namespace winrt::Microsoft::Terminal::Settings::Model::implementation
DEFINE_ENUM_MAP(Model::WindowingMode, WindowingMode);
DEFINE_ENUM_MAP(Microsoft::Terminal::Core::MatchMode, MatchMode);
DEFINE_ENUM_MAP(Microsoft::Terminal::Control::GraphicsAPI, GraphicsAPI);
DEFINE_ENUM_MAP(Microsoft::Terminal::Control::TextMeasurement, TextMeasurement);
// Profile Settings
DEFINE_ENUM_MAP(Model::CloseOnExitMode, CloseOnExitMode);

View File

@ -36,6 +36,7 @@ namespace winrt::Microsoft::Terminal::Settings::Model::implementation
static winrt::Windows::Foundation::Collections::IMap<winrt::hstring, WindowingMode> WindowingMode();
static winrt::Windows::Foundation::Collections::IMap<winrt::hstring, winrt::Microsoft::Terminal::Core::MatchMode> MatchMode();
static winrt::Windows::Foundation::Collections::IMap<winrt::hstring, winrt::Microsoft::Terminal::Control::GraphicsAPI> GraphicsAPI();
static winrt::Windows::Foundation::Collections::IMap<winrt::hstring, winrt::Microsoft::Terminal::Control::TextMeasurement> TextMeasurement();
// Profile Settings
static winrt::Windows::Foundation::Collections::IMap<winrt::hstring, CloseOnExitMode> CloseOnExitMode();

View File

@ -18,6 +18,7 @@ namespace Microsoft.Terminal.Settings.Model
static Windows.Foundation.Collections.IMap<String, Microsoft.Terminal.Settings.Model.WindowingMode> WindowingMode { get; };
static Windows.Foundation.Collections.IMap<String, Microsoft.Terminal.Core.MatchMode> MatchMode { get; };
static Windows.Foundation.Collections.IMap<String, Microsoft.Terminal.Control.GraphicsAPI> GraphicsAPI { get; };
static Windows.Foundation.Collections.IMap<String, Microsoft.Terminal.Control.TextMeasurement> TextMeasurement { get; };
// Profile Settings
static Windows.Foundation.Collections.IMap<String, Microsoft.Terminal.Settings.Model.CloseOnExitMode> CloseOnExitMode { get; };

View File

@ -243,6 +243,11 @@ Json::Value GlobalAppSettings::ToJson()
{
_GraphicsAPI.reset();
}
if (_TextMeasurement == Control::TextMeasurement::Graphemes)
{
_TextMeasurement.reset();
}
if (_DisablePartialInvalidation == false)
{
_DisablePartialInvalidation.reset();

View File

@ -79,6 +79,7 @@ namespace Microsoft.Terminal.Settings.Model
INHERITABLE_SETTING(Microsoft.Terminal.Control.GraphicsAPI, GraphicsAPI);
INHERITABLE_SETTING(Boolean, DisablePartialInvalidation);
INHERITABLE_SETTING(Boolean, SoftwareRendering);
INHERITABLE_SETTING(Microsoft.Terminal.Control.TextMeasurement, TextMeasurement);
INHERITABLE_SETTING(Boolean, UseBackgroundImageForWindow);
INHERITABLE_SETTING(Boolean, ForceVTInput);
INHERITABLE_SETTING(Boolean, DebugFeaturesEnabled);

View File

@ -27,6 +27,7 @@ Author(s):
X(winrt::Microsoft::Terminal::Control::GraphicsAPI, GraphicsAPI, "rendering.graphicsAPI") \
X(bool, DisablePartialInvalidation, "rendering.disablePartialInvalidation", false) \
X(bool, SoftwareRendering, "rendering.software", false) \
X(winrt::Microsoft::Terminal::Control::TextMeasurement, TextMeasurement, "compatibility.textMeasurement") \
X(bool, UseBackgroundImageForWindow, "experimental.useBackgroundImageForWindow", false) \
X(bool, ForceVTInput, "experimental.input.forceVT", false) \
X(bool, TrimBlockSelection, "trimBlockSelection", true) \

View File

@ -366,6 +366,7 @@ namespace winrt::Microsoft::Terminal::Settings::Model::implementation
_GraphicsAPI = globalSettings.GraphicsAPI();
_DisablePartialInvalidation = globalSettings.DisablePartialInvalidation();
_SoftwareRendering = globalSettings.SoftwareRendering();
_TextMeasurement = globalSettings.TextMeasurement();
_UseBackgroundImageForWindow = globalSettings.UseBackgroundImageForWindow();
_ForceVTInput = globalSettings.ForceVTInput();
_TrimBlockSelection = globalSettings.TrimBlockSelection();

View File

@ -159,6 +159,7 @@ namespace winrt::Microsoft::Terminal::Settings::Model::implementation
INHERITABLE_SETTING(Model::TerminalSettings, Microsoft::Terminal::Control::GraphicsAPI, GraphicsAPI);
INHERITABLE_SETTING(Model::TerminalSettings, bool, DisablePartialInvalidation, false);
INHERITABLE_SETTING(Model::TerminalSettings, bool, SoftwareRendering, false);
INHERITABLE_SETTING(Model::TerminalSettings, Microsoft::Terminal::Control::TextMeasurement, TextMeasurement);
INHERITABLE_SETTING(Model::TerminalSettings, bool, UseBackgroundImageForWindow, false);
INHERITABLE_SETTING(Model::TerminalSettings, bool, ForceVTInput, false);

View File

@ -771,3 +771,12 @@ JSON_ENUM_MAPPER(::winrt::Microsoft::Terminal::Control::GraphicsAPI)
pair_type{ "direct3d11", ValueType::Direct3D11 },
};
};
JSON_ENUM_MAPPER(::winrt::Microsoft::Terminal::Control::TextMeasurement)
{
JSON_MAPPINGS(3) = {
pair_type{ "graphemes", ValueType::Graphemes },
pair_type{ "wcswidth", ValueType::Wcswidth },
pair_type{ "console", ValueType::Console },
};
};

View File

@ -77,6 +77,7 @@
X(winrt::Microsoft::Terminal::Control::GraphicsAPI, GraphicsAPI) \
X(bool, DisablePartialInvalidation, false) \
X(bool, SoftwareRendering, false) \
X(winrt::Microsoft::Terminal::Control::TextMeasurement, TextMeasurement) \
X(bool, UseBackgroundImageForWindow, false) \
X(bool, ShowMarks, false) \
X(winrt::Microsoft::Terminal::Control::CopyFormat, CopyFormatting, 0) \

View File

@ -23,6 +23,7 @@ const std::wstring_view ConsoleArguments::RESIZE_QUIRK = L"--resizeQuirk";
const std::wstring_view ConsoleArguments::FEATURE_ARG = L"--feature";
const std::wstring_view ConsoleArguments::FEATURE_PTY_ARG = L"pty";
const std::wstring_view ConsoleArguments::COM_SERVER_ARG = L"-Embedding";
static constexpr std::wstring_view GLYPH_WIDTH{ L"--textMeasurement" };
// NOTE: Thinking about adding more commandline args that control conpty, for
// the Terminal? Make sure you add them to the commandline in
// ConsoleEstablishHandoff. We use that to initialize the ConsoleArguments for a
@ -507,6 +508,10 @@ void ConsoleArguments::s_ConsumeArg(_Inout_ std::vector<std::wstring>& args, _In
s_ConsumeArg(args, i);
hr = S_OK;
}
else if (arg == GLYPH_WIDTH)
{
hr = s_GetArgumentValue(args, i, &_textMeasurement);
}
else if (arg == CLIENT_COMMANDLINE_ARG)
{
// Everything after this is the explicit commandline
@ -630,6 +635,11 @@ std::wstring ConsoleArguments::GetVtMode() const
return _vtMode;
}
const std::wstring& ConsoleArguments::GetTextMeasurement() const
{
return _textMeasurement;
}
bool ConsoleArguments::GetForceV1() const
{
return _forceV1;

View File

@ -47,6 +47,7 @@ public:
std::wstring GetOriginalCommandLine() const;
std::wstring GetClientCommandline() const;
std::wstring GetVtMode() const;
const std::wstring& GetTextMeasurement() const;
bool GetForceV1() const;
bool GetForceNoHandoff() const;
@ -123,6 +124,7 @@ private:
HANDLE _vtOutHandle;
std::wstring _vtMode;
std::wstring _textMeasurement;
bool _forceNoHandoff;
bool _forceV1;

View File

@ -9,6 +9,7 @@
#include "../renderer/vt/Xterm256Engine.hpp"
#include "../renderer/base/renderer.hpp"
#include "../types/inc/CodepointWidthDetector.hpp"
#include "../types/inc/utils.hpp"
#include "handle.h" // LockConsole
#include "input.h" // ProcessCtrlEvents
@ -73,6 +74,28 @@ VtIo::VtIo() :
// If we were already given VT handles, set up the VT IO engine to use those.
if (pArgs->InConptyMode())
{
// Honestly, no idea where else to put this.
if (const auto& textMeasurement = pArgs->GetTextMeasurement(); !textMeasurement.empty())
{
auto& gci = ServiceLocator::LocateGlobals().getConsoleInformation();
SettingsTextMeasurementMode settingsMode = SettingsTextMeasurementMode::Graphemes;
TextMeasurementMode mode = TextMeasurementMode::Graphemes;
if (textMeasurement == L"wcswidth")
{
settingsMode = SettingsTextMeasurementMode::Wcswidth;
mode = TextMeasurementMode::Wcswidth;
}
else if (textMeasurement == L"console")
{
settingsMode = SettingsTextMeasurementMode::Console;
mode = TextMeasurementMode::Console;
}
gci.SetTextMeasurementMode(settingsMode);
CodepointWidthDetector::Singleton().Reset(mode);
}
return _Initialize(pArgs->GetVtInHandle(), pArgs->GetVtOutHandle(), pArgs->GetVtMode(), pArgs->GetSignalHandle());
}
// Didn't need to initialize if we didn't have VT stuff. It's still OK, but report we did nothing.

View File

@ -2,24 +2,13 @@
// Licensed under the MIT license.
#include "precomp.h"
#include "screenInfo.hpp"
#include "dbcs.h"
#include "output.h"
#include "_output.h"
#include "misc.h"
#include "handle.h"
#include <cmath>
#include "../interactivity/inc/ServiceLocator.hpp"
#include "../types/inc/Viewport.hpp"
#include "../types/inc/GlyphWidth.hpp"
#include "../terminal/parser/OutputStateMachineEngine.hpp"
#include "../types/inc/CodepointWidthDetector.hpp"
#include "../types/inc/convert.hpp"
#pragma hdrstop
using namespace Microsoft::Console;
using namespace Microsoft::Console::Types;
using namespace Microsoft::Console::Render;
@ -524,15 +513,30 @@ void SCREEN_INFORMATION::RefreshFontWithRenderer()
{
if (IsActiveScreenBuffer())
{
// Hand the handle to our internal structure to the font change trigger in case it updates it based on what's appropriate.
if (ServiceLocator::LocateGlobals().pRender != nullptr)
{
ServiceLocator::LocateGlobals().pRender->TriggerFontChange(ServiceLocator::LocateGlobals().dpi,
GetDesiredFont(),
GetCurrentFont());
auto& globals = ServiceLocator::LocateGlobals();
const auto& gci = globals.getConsoleInformation();
NotifyGlyphWidthFontChanged();
// Hand the handle to our internal structure to the font change trigger in case it updates it based on what's appropriate.
if (globals.pRender != nullptr)
{
globals.pRender->TriggerFontChange(globals.dpi, GetDesiredFont(), GetCurrentFont());
}
TextMeasurementMode mode;
switch (gci.GetTextMeasurementMode())
{
case SettingsTextMeasurementMode::Wcswidth:
mode = TextMeasurementMode::Wcswidth;
break;
case SettingsTextMeasurementMode::Console:
mode = TextMeasurementMode::Console;
break;
default:
mode = TextMeasurementMode::Graphemes;
break;
}
CodepointWidthDetector::Singleton().Reset(mode);
}
}
@ -2466,7 +2470,6 @@ Viewport SCREEN_INFORMATION::GetVirtualViewport() const noexcept
// Method Description:
// - Returns true if the character at the cursor's current position is wide.
// See IsGlyphFullWidth
// Arguments:
// - <none>
// Return Value:

View File

@ -777,6 +777,16 @@ bool Settings::GetCopyColor() const noexcept
return _fCopyColor;
}
SettingsTextMeasurementMode Settings::GetTextMeasurementMode() const noexcept
{
return _textMeasurement;
}
void Settings::SetTextMeasurementMode(const SettingsTextMeasurementMode mode) noexcept
{
_textMeasurement = mode;
}
bool Settings::GetEnableBuiltinGlyphs() const noexcept
{
return _fEnableBuiltinGlyphs;

View File

@ -24,6 +24,13 @@ constexpr unsigned short MIN_WINDOW_OPACITY = 0x4D; // 0x4D is approximately 30%
#include "ConsoleArguments.hpp"
#include "../renderer/inc/RenderSettings.hpp"
enum class SettingsTextMeasurementMode : DWORD
{
Graphemes,
Wcswidth,
Console,
};
class Settings
{
using RenderSettings = Microsoft::Console::Render::RenderSettings;
@ -171,6 +178,8 @@ public:
bool GetUseDx() const noexcept;
bool GetCopyColor() const noexcept;
SettingsTextMeasurementMode GetTextMeasurementMode() const noexcept;
void SetTextMeasurementMode(SettingsTextMeasurementMode mode) noexcept;
bool GetEnableBuiltinGlyphs() const noexcept;
private:
@ -213,6 +222,7 @@ private:
std::wstring _LaunchFaceName;
bool _fAllowAltF4Close;
DWORD _dwVirtTermLevel;
SettingsTextMeasurementMode _textMeasurement = SettingsTextMeasurementMode::Graphemes;
bool _fUseDx;
bool _fCopyColor;
bool _fEnableBuiltinGlyphs = true;

View File

@ -2,33 +2,18 @@
// Licensed under the MIT license.
#include "precomp.h"
#include "srvinit.h"
#include "dbcs.h"
#include "handle.h"
#include "registry.hpp"
#include "renderFontDefaults.hpp"
#include "ApiRoutines.h"
#include "../types/inc/GlyphWidth.hpp"
#include "../server/DeviceHandle.h"
#include "../server/Entrypoints.h"
#include "../server/IoSorter.h"
#include "../interactivity/inc/ISystemConfigurationProvider.hpp"
#include "../interactivity/inc/ServiceLocator.hpp"
#include "../interactivity/base/ApiDetector.hpp"
#include "../interactivity/base/RemoteConsoleControl.hpp"
#include "renderData.hpp"
#include "../renderer/base/renderer.hpp"
#include "../inc/conint.h"
#include "tracing.hpp"
#include "../interactivity/inc/ServiceLocator.hpp"
#include "../server/DeviceHandle.h"
#include "../server/IoSorter.h"
#include "../types/inc/CodepointWidthDetector.hpp"
#if TIL_FEATURE_RECEIVEINCOMINGHANDOFF_ENABLED
#include "ITerminalHandoff.h"
@ -882,8 +867,9 @@ PWSTR TranslateConsoleTitle(_In_ PCWSTR pwszConsoleTitle, const BOOL fUnexpand,
// Set up the renderer to be used to calculate the width of a glyph,
// should we be unable to figure out its width another way.
auto pfn = [ObjectPtr = static_cast<Renderer*>(g.pRender)](auto&& PH1) { return ObjectPtr->IsGlyphWideByFont(std::forward<decltype(PH1)>(PH1)); };
SetGlyphWidthFallback(pfn);
CodepointWidthDetector::Singleton().SetFallbackMethod([](const std::wstring_view& glyph) {
return ServiceLocator::LocateGlobals().pRender->IsGlyphWideByFont(glyph);
});
}
catch (...)
{

View File

@ -1,89 +0,0 @@
// Copyright (c) Microsoft Corporation.
// Licensed under the MIT license.
#include "precomp.h"
#include "WexTestClass.h"
#include "../../inc/consoletaeftemplates.hpp"
#include "CommonState.hpp"
#include "../types/inc/CodepointWidthDetector.hpp"
using namespace WEX::Logging;
static constexpr std::wstring_view emoji = L"\xD83E\xDD22"; // U+1F922 nauseated face
static constexpr std::wstring_view ambiguous = L"\x414"; // U+0414 cyrillic capital de
// codepoint and utf16 encoded string
static const std::vector<std::tuple<unsigned int, std::wstring, CodepointWidth>> testData = {
{ 0x7, L"\a", CodepointWidth::Narrow }, // BEL
{ 0x20, L" ", CodepointWidth::Narrow },
{ 0x39, L"9", CodepointWidth::Narrow },
{ 0x414, L"\x414", CodepointWidth::Narrow }, // U+0414 cyrillic capital de
{ 0x1104, L"\x1104", CodepointWidth::Wide }, // U+1104 hangul choseong ssangtikeut
{ 0x306A, L"\x306A", CodepointWidth::Wide }, // U+306A hiragana na
{ 0x30CA, L"\x30CA", CodepointWidth::Wide }, // U+30CA katakana na
{ 0x72D7, L"\x72D7", CodepointWidth::Wide }, // U+72D7
{ 0x1F47E, L"\xD83D\xDC7E", CodepointWidth::Wide }, // U+1F47E alien monster
{ 0x1F51C, L"\xD83D\xDD1C", CodepointWidth::Wide } // U+1F51C SOON
};
class CodepointWidthDetectorTests
{
TEST_CLASS(CodepointWidthDetectorTests);
TEST_METHOD(CanLookUpEmoji)
{
CodepointWidthDetector widthDetector;
VERIFY_IS_TRUE(widthDetector.IsWide(emoji));
}
TEST_METHOD(CanGetWidths)
{
CodepointWidthDetector widthDetector;
for (const auto& data : testData)
{
const auto& expected = std::get<2>(data);
const auto& wstr = std::get<1>(data);
const auto result = widthDetector.GetWidth({ wstr.c_str(), wstr.size() });
VERIFY_ARE_EQUAL(result, expected);
}
}
static bool FallbackMethod(const std::wstring_view glyph)
{
if (glyph.size() < 1)
{
return false;
}
else
{
return (glyph.at(0) % 2) == 1;
}
}
TEST_METHOD(AmbiguousCache)
{
// Set up a detector with fallback.
CodepointWidthDetector widthDetector;
widthDetector.SetFallbackMethod(std::bind(&FallbackMethod, std::placeholders::_1));
// Ensure fallback cache is empty.
VERIFY_ARE_EQUAL(0u, widthDetector._fallbackCache.size());
// Lookup ambiguous width character.
widthDetector.IsWide(ambiguous);
// Cache should hold it.
VERIFY_ARE_EQUAL(1u, widthDetector._fallbackCache.size());
// Cached item should match what we expect
const auto it = widthDetector._fallbackCache.begin();
VERIFY_ARE_EQUAL(ambiguous[0], it->first);
VERIFY_ARE_EQUAL(FallbackMethod(ambiguous) ? 2u : 1u, it->second);
// Cache should empty when font changes.
widthDetector.NotifyFontChanged();
VERIFY_ARE_EQUAL(0u, widthDetector._fallbackCache.size());
}
};

View File

@ -15,7 +15,6 @@
<ClCompile Include="ApiRoutinesTests.cpp" />
<ClCompile Include="ClipboardTests.cpp" />
<ClCompile Include="ConsoleArgumentsTests.cpp" />
<ClCompile Include="CodepointWidthDetectorTests.cpp" />
<ClCompile Include="DbcsTests.cpp" />
<ClCompile Include="HistoryTests.cpp" />
<ClCompile Include="InitTests.cpp" />

View File

@ -69,9 +69,6 @@
<ClCompile Include="HistoryTests.cpp">
<Filter>Source Files</Filter>
</ClCompile>
<ClCompile Include="CodepointWidthDetectorTests.cpp">
<Filter>Source Files</Filter>
</ClCompile>
<ClCompile Include="TextBufferIteratorTests.cpp">
<Filter>Source Files</Filter>
</ClCompile>

View File

@ -20,7 +20,6 @@ SOURCES = \
HistoryTests.cpp \
UtilsTests.cpp \
ConsoleArgumentsTests.cpp \
CodepointWidthDetectorTests.cpp \
DbcsTests.cpp \
ScreenBufferTests.cpp \
TextBufferIteratorTests.cpp \

View File

@ -23,7 +23,19 @@
#endif
#endif
#define PSEUDOCONSOLE_RESIZE_QUIRK (2u)
// CreatePseudoConsole Flags
#ifndef PSEUDOCONSOLE_INHERIT_CURSOR
#define PSEUDOCONSOLE_INHERIT_CURSOR (0x1)
#endif
#ifndef PSEUDOCONSOLE_RESIZE_QUIRK
#define PSEUDOCONSOLE_RESIZE_QUIRK (0x2)
#endif
#ifndef PSEUDOCONSOLE_GLYPH_WIDTH__MASK
#define PSEUDOCONSOLE_GLYPH_WIDTH__MASK 0x18
#define PSEUDOCONSOLE_GLYPH_WIDTH_GRAPHEMES 0x08
#define PSEUDOCONSOLE_GLYPH_WIDTH_WCSWIDTH 0x10
#define PSEUDOCONSOLE_GLYPH_WIDTH_CONSOLE 0x18
#endif
CONPTY_EXPORT HRESULT WINAPI ConptyCreatePseudoConsole(COORD size, HANDLE hInput, HANDLE hOutput, DWORD dwFlags, HPCON* phPC);
CONPTY_EXPORT HRESULT WINAPI ConptyCreatePseudoConsoleAsUser(HANDLE hToken, COORD size, HANDLE hInput, HANDLE hOutput, DWORD dwFlags, HPCON* phPC);

View File

@ -62,6 +62,7 @@ const RegistrySerialization::_RegPropertyMap RegistrySerialization::s_PropertyMa
{ _RegPropertyType::Boolean, CONSOLE_REGISTRY_TERMINALSCROLLING, SET_FIELD_AND_SIZE(_TerminalScrolling) },
{ _RegPropertyType::Boolean, CONSOLE_REGISTRY_USEDX, SET_FIELD_AND_SIZE(_fUseDx) },
{ _RegPropertyType::Boolean, CONSOLE_REGISTRY_COPYCOLOR, SET_FIELD_AND_SIZE(_fCopyColor) },
{ _RegPropertyType::Dword, L"TextMeasurement", SET_FIELD_AND_SIZE(_textMeasurement) },
#if TIL_FEATURE_CONHOSTATLASENGINE_ENABLED
{ _RegPropertyType::Boolean, L"EnableBuiltinGlyphs", SET_FIELD_AND_SIZE(_fEnableBuiltinGlyphs) },
#endif

View File

@ -545,9 +545,19 @@ namespace Microsoft::Console::VirtualTerminal::DispatchTypes
ALTERNATE_SCROLL = DECPrivateMode(1007),
ASB_AlternateScreenBuffer = DECPrivateMode(1049),
XTERM_BracketedPasteMode = DECPrivateMode(2004),
GCM_GraphemeClusterMode = DECPrivateMode(2027),
W32IM_Win32InputMode = DECPrivateMode(9001),
};
enum ModeResponses : VTInt
{
DECRPM_Unsupported = 0,
DECRPM_Enabled = 1,
DECRPM_Disabled = 2,
DECRPM_PermanentlyEnabled = 3,
DECRPM_PermanentlyDisabled = 4,
};
enum CharacterSets : uint64_t
{
DecSpecialGraphics = VTID("0"),

View File

@ -4,10 +4,11 @@
#include "precomp.h"
#include "adaptDispatch.hpp"
#include "../../renderer/base/renderer.hpp"
#include "../../types/inc/Viewport.hpp"
#include "../../types/inc/utils.hpp"
#include "../../inc/unicode.hpp"
#include "../../renderer/base/renderer.hpp"
#include "../../types/inc/CodepointWidthDetector.hpp"
#include "../../types/inc/utils.hpp"
#include "../../types/inc/Viewport.hpp"
#include "../parser/ascii.hpp"
using namespace Microsoft::Console::Types;
@ -2022,6 +2023,8 @@ bool AdaptDispatch::_ModeParamsHelper(const DispatchTypes::ModeParams param, con
case DispatchTypes::ModeParams::XTERM_BracketedPasteMode:
_api.SetSystemMode(ITerminalApi::Mode::BracketedPaste, enable);
return !_api.IsConsolePty();
case DispatchTypes::ModeParams::GCM_GraphemeClusterMode:
return true;
case DispatchTypes::ModeParams::W32IM_Win32InputMode:
_terminalInput.SetInputMode(TerminalInput::Mode::Win32, enable);
// ConPTY requests the Win32InputMode on startup and disables it on shutdown. When nesting ConPTY inside
@ -2068,116 +2071,124 @@ bool AdaptDispatch::ResetMode(const DispatchTypes::ModeParams param)
// - True if handled successfully. False otherwise.
bool AdaptDispatch::RequestMode(const DispatchTypes::ModeParams param)
{
auto enabled = std::optional<bool>{};
static constexpr auto mapTemp = [](const bool b) { return b ? DispatchTypes::DECRPM_Enabled : DispatchTypes::DECRPM_Disabled; };
static constexpr auto mapPerm = [](const bool b) { return b ? DispatchTypes::DECRPM_PermanentlyEnabled : DispatchTypes::DECRPM_PermanentlyDisabled; };
VTInt state = DispatchTypes::DECRPM_Unsupported;
switch (param)
{
case DispatchTypes::ModeParams::IRM_InsertReplaceMode:
enabled = _modes.test(Mode::InsertReplace);
state = mapTemp(_modes.test(Mode::InsertReplace));
break;
case DispatchTypes::ModeParams::LNM_LineFeedNewLineMode:
// VT apps expect that the system and input modes are the same, so if
// they become out of sync, we just act as if LNM mode isn't supported.
if (_api.GetSystemMode(ITerminalApi::Mode::LineFeed) == _terminalInput.GetInputMode(TerminalInput::Mode::LineFeed))
{
enabled = _terminalInput.GetInputMode(TerminalInput::Mode::LineFeed);
state = mapTemp(_terminalInput.GetInputMode(TerminalInput::Mode::LineFeed));
}
break;
case DispatchTypes::ModeParams::DECCKM_CursorKeysMode:
enabled = _terminalInput.GetInputMode(TerminalInput::Mode::CursorKey);
state = mapTemp(_terminalInput.GetInputMode(TerminalInput::Mode::CursorKey));
break;
case DispatchTypes::ModeParams::DECANM_AnsiMode:
enabled = _api.GetStateMachine().GetParserMode(StateMachine::Mode::Ansi);
state = mapTemp(_api.GetStateMachine().GetParserMode(StateMachine::Mode::Ansi));
break;
case DispatchTypes::ModeParams::DECCOLM_SetNumberOfColumns:
// DECCOLM is not supported in conpty mode
if (!_api.IsConsolePty())
{
enabled = _modes.test(Mode::Column);
state = mapTemp(_modes.test(Mode::Column));
}
break;
case DispatchTypes::ModeParams::DECSCNM_ScreenMode:
enabled = _renderSettings.GetRenderMode(RenderSettings::Mode::ScreenReversed);
state = mapTemp(_renderSettings.GetRenderMode(RenderSettings::Mode::ScreenReversed));
break;
case DispatchTypes::ModeParams::DECOM_OriginMode:
enabled = _modes.test(Mode::Origin);
state = mapTemp(_modes.test(Mode::Origin));
break;
case DispatchTypes::ModeParams::DECAWM_AutoWrapMode:
enabled = _api.GetSystemMode(ITerminalApi::Mode::AutoWrap);
state = mapTemp(_api.GetSystemMode(ITerminalApi::Mode::AutoWrap));
break;
case DispatchTypes::ModeParams::DECARM_AutoRepeatMode:
enabled = _terminalInput.GetInputMode(TerminalInput::Mode::AutoRepeat);
state = mapTemp(_terminalInput.GetInputMode(TerminalInput::Mode::AutoRepeat));
break;
case DispatchTypes::ModeParams::ATT610_StartCursorBlink:
enabled = _pages.ActivePage().Cursor().IsBlinkingAllowed();
state = mapTemp(_pages.ActivePage().Cursor().IsBlinkingAllowed());
break;
case DispatchTypes::ModeParams::DECTCEM_TextCursorEnableMode:
enabled = _pages.ActivePage().Cursor().IsVisible();
state = mapTemp(_pages.ActivePage().Cursor().IsVisible());
break;
case DispatchTypes::ModeParams::XTERM_EnableDECCOLMSupport:
// DECCOLM is not supported in conpty mode
if (!_api.IsConsolePty())
{
enabled = _modes.test(Mode::AllowDECCOLM);
state = mapTemp(_modes.test(Mode::AllowDECCOLM));
}
break;
case DispatchTypes::ModeParams::DECPCCM_PageCursorCouplingMode:
enabled = _modes.test(Mode::PageCursorCoupling);
state = mapTemp(_modes.test(Mode::PageCursorCoupling));
break;
case DispatchTypes::ModeParams::DECNKM_NumericKeypadMode:
enabled = _terminalInput.GetInputMode(TerminalInput::Mode::Keypad);
state = mapTemp(_terminalInput.GetInputMode(TerminalInput::Mode::Keypad));
break;
case DispatchTypes::ModeParams::DECBKM_BackarrowKeyMode:
enabled = _terminalInput.GetInputMode(TerminalInput::Mode::BackarrowKey);
state = mapTemp(_terminalInput.GetInputMode(TerminalInput::Mode::BackarrowKey));
break;
case DispatchTypes::ModeParams::DECLRMM_LeftRightMarginMode:
enabled = _modes.test(Mode::AllowDECSLRM);
state = mapTemp(_modes.test(Mode::AllowDECSLRM));
break;
case DispatchTypes::ModeParams::DECECM_EraseColorMode:
enabled = _modes.test(Mode::EraseColor);
state = mapTemp(_modes.test(Mode::EraseColor));
break;
case DispatchTypes::ModeParams::VT200_MOUSE_MODE:
enabled = _terminalInput.GetInputMode(TerminalInput::Mode::DefaultMouseTracking);
state = mapTemp(_terminalInput.GetInputMode(TerminalInput::Mode::DefaultMouseTracking));
break;
case DispatchTypes::ModeParams::BUTTON_EVENT_MOUSE_MODE:
enabled = _terminalInput.GetInputMode(TerminalInput::Mode::ButtonEventMouseTracking);
state = mapTemp(_terminalInput.GetInputMode(TerminalInput::Mode::ButtonEventMouseTracking));
break;
case DispatchTypes::ModeParams::ANY_EVENT_MOUSE_MODE:
enabled = _terminalInput.GetInputMode(TerminalInput::Mode::AnyEventMouseTracking);
state = mapTemp(_terminalInput.GetInputMode(TerminalInput::Mode::AnyEventMouseTracking));
break;
case DispatchTypes::ModeParams::UTF8_EXTENDED_MODE:
enabled = _terminalInput.GetInputMode(TerminalInput::Mode::Utf8MouseEncoding);
state = mapTemp(_terminalInput.GetInputMode(TerminalInput::Mode::Utf8MouseEncoding));
break;
case DispatchTypes::ModeParams::SGR_EXTENDED_MODE:
enabled = _terminalInput.GetInputMode(TerminalInput::Mode::SgrMouseEncoding);
state = mapTemp(_terminalInput.GetInputMode(TerminalInput::Mode::SgrMouseEncoding));
break;
case DispatchTypes::ModeParams::FOCUS_EVENT_MODE:
enabled = _terminalInput.GetInputMode(TerminalInput::Mode::FocusEvent);
state = mapTemp(_terminalInput.GetInputMode(TerminalInput::Mode::FocusEvent));
break;
case DispatchTypes::ModeParams::ALTERNATE_SCROLL:
enabled = _terminalInput.GetInputMode(TerminalInput::Mode::AlternateScroll);
state = mapTemp(_terminalInput.GetInputMode(TerminalInput::Mode::AlternateScroll));
break;
case DispatchTypes::ModeParams::ASB_AlternateScreenBuffer:
enabled = _usingAltBuffer;
state = mapTemp(_usingAltBuffer);
break;
case DispatchTypes::ModeParams::XTERM_BracketedPasteMode:
enabled = _api.GetSystemMode(ITerminalApi::Mode::BracketedPaste);
state = mapTemp(_api.GetSystemMode(ITerminalApi::Mode::BracketedPaste));
break;
case DispatchTypes::ModeParams::GCM_GraphemeClusterMode:
state = mapPerm(CodepointWidthDetector::Singleton().GetMode() == TextMeasurementMode::Graphemes);
break;
case DispatchTypes::ModeParams::W32IM_Win32InputMode:
enabled = _terminalInput.GetInputMode(TerminalInput::Mode::Win32);
state = mapTemp(_terminalInput.GetInputMode(TerminalInput::Mode::Win32));
break;
default:
enabled = std::nullopt;
break;
}
// 1 indicates the mode is enabled, 2 it's disabled, and 0 it's unsupported
const auto state = enabled.has_value() ? (enabled.value() ? 1 : 2) : 0;
const auto isPrivate = param >= DispatchTypes::DECPrivateMode(0);
const auto prefix = isPrivate ? L"?" : L"";
const auto mode = isPrivate ? param - DispatchTypes::DECPrivateMode(0) : param;
const auto response = wil::str_printf<std::wstring>(L"\x1b[%s%d;%d$y", prefix, mode, state);
_api.ReturnResponse(response);
VTInt mode = param;
std::wstring_view prefix;
if (mode >= DispatchTypes::DECPrivateMode(0))
{
mode -= DispatchTypes::DECPrivateMode(0);
prefix = L"?";
}
_api.ReturnResponse(fmt::format(FMT_COMPILE(L"\x1b[{}{};{}$y"), prefix, mode, state));
return true;
}

View File

@ -2059,6 +2059,25 @@ public:
_testGetSet->ValidateInputEvent(expectedResponse);
}
TEST_METHOD(RequestPermanentModeTests)
{
BEGIN_TEST_METHOD_PROPERTIES()
TEST_METHOD_PROPERTY(L"Data:modeNumber", L"{2027}")
END_TEST_METHOD_PROPERTIES()
VTInt modeNumber;
VERIFY_SUCCEEDED_RETURN(TestData::TryGetValue(L"modeNumber", modeNumber));
const auto mode = DispatchTypes::DECPrivateMode(modeNumber);
_testGetSet->PrepData();
VERIFY_IS_TRUE(_pDispatch->ResetMode(mode)); // as a test to ensure that it stays permanently enabled (= 3)
VERIFY_IS_TRUE(_pDispatch->RequestMode(mode));
wchar_t expectedResponse[20];
swprintf_s(expectedResponse, ARRAYSIZE(expectedResponse), L"\x1b[?%d;3$y", modeNumber);
_testGetSet->ValidateInputEvent(expectedResponse);
}
TEST_METHOD(RequestChecksumReportTests)
{
const auto requestChecksumReport = [this](const auto length) {

View File

@ -0,0 +1,10 @@
<Project Sdk="Microsoft.NET.Sdk">
<PropertyGroup>
<OutputType>Exe</OutputType>
<TargetFramework>net8.0</TargetFramework>
<ImplicitUsings>enable</ImplicitUsings>
<Nullable>enable</Nullable>
</PropertyGroup>
</Project>

View File

@ -0,0 +1,575 @@
using System.Text;
using System.Runtime.InteropServices;
using System.Numerics;
using System.Xml.Linq;
using TrieType = uint;
// JoinRules doesn't quite follow UAX #29, as it states:
// > Note: Testing two adjacent characters is insufficient for determining a boundary.
//
// I completely agree, however it makes the implementation complex and slow, and it only benefits what can be considered
// edge cases in the context of terminals. By using a lookup table anyway this results in a >100MB/s throughput,
// before adding any fast-passes whatsoever. This is 2x as fast as any standards conforming implementation I found.
//
// This affects the following rules:
// * GB9c: \p{InCB=Consonant} [\p{InCB=Extend}\p{InCB=Linker}]* \p{InCB=Linker} [\p{InCB=Extend}\p{InCB=Linker}]* × \p{InCB=Consonant}
// "Do not break within certain combinations with Indic_Conjunct_Break (InCB)=Linker."
// Our implementation does this:
// × \p{InCB=Linker}
// \p{InCB=Linker} × \p{InCB=Consonant}
// In other words, it doesn't check for a leading \p{InCB=Consonant} or a series of Extenders/Linkers in between.
// I suspect that these simplified rules are sufficient for the vast majority of terminal use cases.
// * GB11: \p{Extended_Pictographic} Extend* ZWJ × \p{Extended_Pictographic}
// "Do not break within emoji modifier sequences or emoji zwj sequences."
// Our implementation does this:
// ZWJ × \p{Extended_Pictographic}
// In other words, it doesn't check whether the ZWJ is led by another \p{InCB=Extended_Pictographic}.
// Again, I suspect that a trailing, standalone ZWJ is a rare occurrence and joining it with any Emoji is fine.
// * GB12: sot (RI RI)* RI × RI
// GB13: [^RI] (RI RI)* RI × RI
// "Do not break within emoji flag sequences. That is, do not break between regional indicator
// (RI) symbols if there is an odd number of RI characters before the break point."
// Our implementation does this (this is not a real notation):
// RI ÷ RI × RI ÷ RI
// In other words, it joins any pair of RIs and then immediately aborts further RI joins.
// Unlike the above two cases, this is a bit more risky, because it's much more likely to be encountered in practice.
// Imagine a shell that doesn't understand graphemes for instance. You type 2 flags (= 4 RIs) and backspace.
// You'll now have 3 RIs. If iterating through it forwards, you'd join the first two, then get 1 lone RI at the end,
// whereas if you iterate backwards you'd join the last two, then get 1 lone RI at the start.
// This asymmetry may have some subtle effects, but I suspect that it's still rare enough to not matter much.
//
// This is a great reference for the resulting table:
// https://www.unicode.org/Public/UCD/latest/ucd/auxiliary/GraphemeBreakTest.html
byte[][][] joinRules =
[
// Base table
[
/* | leading -> trailing codepoint */
/* v | cbOther | cbControl | cbExtend | cbRI | cbPrepend | cbHangulL | cbHangulV | cbHangulT | cbHangulLV | cbHangulLVT | cbInCBLinker | cbInCBConsonant | cbExtPic | cbZWJ | */
/* cbOther | */ [3 /* | */, 3 /* | */, 0 /* | */, 3 /* | */, 3 /* | */, 3 /* | */, 3 /* | */, 3 /* | */, 3 /* | */, 3 /* | */, 0 /* | */, 3 /* | */, 3 /* | */, 0 /* | */],
/* cbControl | */ [3 /* | */, 3 /* | */, 3 /* | */, 3 /* | */, 3 /* | */, 3 /* | */, 3 /* | */, 3 /* | */, 3 /* | */, 3 /* | */, 3 /* | */, 3 /* | */, 3 /* | */, 3 /* | */],
/* cbExtend | */ [3 /* | */, 3 /* | */, 0 /* | */, 3 /* | */, 3 /* | */, 3 /* | */, 3 /* | */, 3 /* | */, 3 /* | */, 3 /* | */, 0 /* | */, 3 /* | */, 3 /* | */, 0 /* | */],
/* cbRI | */ [3 /* | */, 3 /* | */, 0 /* | */, 1 /* | */, 3 /* | */, 3 /* | */, 3 /* | */, 3 /* | */, 3 /* | */, 3 /* | */, 0 /* | */, 3 /* | */, 3 /* | */, 0 /* | */],
/* cbPrepend | */ [0 /* | */, 3 /* | */, 0 /* | */, 0 /* | */, 0 /* | */, 0 /* | */, 0 /* | */, 0 /* | */, 0 /* | */, 0 /* | */, 0 /* | */, 0 /* | */, 0 /* | */, 0 /* | */],
/* cbHangulL | */ [3 /* | */, 3 /* | */, 0 /* | */, 3 /* | */, 3 /* | */, 0 /* | */, 0 /* | */, 3 /* | */, 0 /* | */, 0 /* | */, 0 /* | */, 3 /* | */, 3 /* | */, 0 /* | */],
/* cbHangulV | */ [3 /* | */, 3 /* | */, 0 /* | */, 3 /* | */, 3 /* | */, 3 /* | */, 0 /* | */, 0 /* | */, 3 /* | */, 3 /* | */, 0 /* | */, 3 /* | */, 3 /* | */, 0 /* | */],
/* cbHangulT | */ [3 /* | */, 3 /* | */, 0 /* | */, 3 /* | */, 3 /* | */, 3 /* | */, 3 /* | */, 0 /* | */, 3 /* | */, 3 /* | */, 0 /* | */, 3 /* | */, 3 /* | */, 0 /* | */],
/* cbHangulLV | */ [3 /* | */, 3 /* | */, 0 /* | */, 3 /* | */, 3 /* | */, 3 /* | */, 0 /* | */, 0 /* | */, 3 /* | */, 3 /* | */, 0 /* | */, 3 /* | */, 3 /* | */, 0 /* | */],
/* cbHangulLVT | */ [3 /* | */, 3 /* | */, 0 /* | */, 3 /* | */, 3 /* | */, 3 /* | */, 3 /* | */, 0 /* | */, 3 /* | */, 3 /* | */, 0 /* | */, 3 /* | */, 3 /* | */, 0 /* | */],
/* cbInCBLinker | */ [3 /* | */, 3 /* | */, 0 /* | */, 3 /* | */, 3 /* | */, 3 /* | */, 3 /* | */, 3 /* | */, 3 /* | */, 3 /* | */, 0 /* | */, 0 /* | */, 3 /* | */, 0 /* | */],
/* cbInCBConsonant | */ [3 /* | */, 3 /* | */, 0 /* | */, 3 /* | */, 3 /* | */, 3 /* | */, 3 /* | */, 3 /* | */, 3 /* | */, 3 /* | */, 0 /* | */, 3 /* | */, 3 /* | */, 0 /* | */],
/* cbExtPic | */ [3 /* | */, 3 /* | */, 0 /* | */, 3 /* | */, 3 /* | */, 3 /* | */, 3 /* | */, 3 /* | */, 3 /* | */, 3 /* | */, 0 /* | */, 3 /* | */, 3 /* | */, 0 /* | */],
/* cbZWJ | */ [3 /* | */, 3 /* | */, 0 /* | */, 3 /* | */, 3 /* | */, 3 /* | */, 3 /* | */, 3 /* | */, 3 /* | */, 3 /* | */, 0 /* | */, 3 /* | */, 0 /* | */, 0 /* | */],
],
// Once we have encountered a Regional Indicator pair we'll enter this table.
// It's a copy of the base table, but further Regional Indicator joins are forbidden.
[
/* | leading -> trailing codepoint */
/* v | cbOther | cbControl | cbExtend | cbRI | cbPrepend | cbHangulL | cbHangulV | cbHangulT | cbHangulLV | cbHangulLVT | cbInCBLinker | cbInCBConsonant | cbExtPic | cbZWJ | */
/* cbOther | */ [3 /* | */, 3 /* | */, 0 /* | */, 3 /* | */, 3 /* | */, 3 /* | */, 3 /* | */, 3 /* | */, 3 /* | */, 3 /* | */, 0 /* | */, 3 /* | */, 3 /* | */, 0 /* | */],
/* cbControl | */ [3 /* | */, 3 /* | */, 3 /* | */, 3 /* | */, 3 /* | */, 3 /* | */, 3 /* | */, 3 /* | */, 3 /* | */, 3 /* | */, 3 /* | */, 3 /* | */, 3 /* | */, 3 /* | */],
/* cbExtend | */ [3 /* | */, 3 /* | */, 0 /* | */, 3 /* | */, 3 /* | */, 3 /* | */, 3 /* | */, 3 /* | */, 3 /* | */, 3 /* | */, 0 /* | */, 3 /* | */, 3 /* | */, 0 /* | */],
/* cbRI | */ [3 /* | */, 3 /* | */, 0 /* | */, 3 /* | */, 3 /* | */, 3 /* | */, 3 /* | */, 3 /* | */, 3 /* | */, 3 /* | */, 0 /* | */, 3 /* | */, 3 /* | */, 0 /* | */],
/* cbPrepend | */ [0 /* | */, 3 /* | */, 0 /* | */, 0 /* | */, 0 /* | */, 0 /* | */, 0 /* | */, 0 /* | */, 0 /* | */, 0 /* | */, 0 /* | */, 0 /* | */, 0 /* | */, 0 /* | */],
/* cbHangulL | */ [3 /* | */, 3 /* | */, 0 /* | */, 3 /* | */, 3 /* | */, 0 /* | */, 0 /* | */, 3 /* | */, 0 /* | */, 0 /* | */, 0 /* | */, 3 /* | */, 3 /* | */, 0 /* | */],
/* cbHangulV | */ [3 /* | */, 3 /* | */, 0 /* | */, 3 /* | */, 3 /* | */, 3 /* | */, 0 /* | */, 0 /* | */, 3 /* | */, 3 /* | */, 0 /* | */, 3 /* | */, 3 /* | */, 0 /* | */],
/* cbHangulT | */ [3 /* | */, 3 /* | */, 0 /* | */, 3 /* | */, 3 /* | */, 3 /* | */, 3 /* | */, 0 /* | */, 3 /* | */, 3 /* | */, 0 /* | */, 3 /* | */, 3 /* | */, 0 /* | */],
/* cbHangulLV | */ [3 /* | */, 3 /* | */, 0 /* | */, 3 /* | */, 3 /* | */, 3 /* | */, 0 /* | */, 0 /* | */, 3 /* | */, 3 /* | */, 0 /* | */, 3 /* | */, 3 /* | */, 0 /* | */],
/* cbHangulLVT | */ [3 /* | */, 3 /* | */, 0 /* | */, 3 /* | */, 3 /* | */, 3 /* | */, 3 /* | */, 0 /* | */, 3 /* | */, 3 /* | */, 0 /* | */, 3 /* | */, 3 /* | */, 0 /* | */],
/* cbInCBLinker | */ [3 /* | */, 3 /* | */, 0 /* | */, 3 /* | */, 3 /* | */, 3 /* | */, 3 /* | */, 3 /* | */, 3 /* | */, 3 /* | */, 0 /* | */, 0 /* | */, 3 /* | */, 0 /* | */],
/* cbInCBConsonant | */ [3 /* | */, 3 /* | */, 0 /* | */, 3 /* | */, 3 /* | */, 3 /* | */, 3 /* | */, 3 /* | */, 3 /* | */, 3 /* | */, 0 /* | */, 3 /* | */, 3 /* | */, 0 /* | */],
/* cbExtPic | */ [3 /* | */, 3 /* | */, 0 /* | */, 3 /* | */, 3 /* | */, 3 /* | */, 3 /* | */, 3 /* | */, 3 /* | */, 3 /* | */, 0 /* | */, 3 /* | */, 3 /* | */, 0 /* | */],
/* cbZWJ | */ [3 /* | */, 3 /* | */, 0 /* | */, 3 /* | */, 3 /* | */, 3 /* | */, 3 /* | */, 3 /* | */, 3 /* | */, 3 /* | */, 0 /* | */, 3 /* | */, 0 /* | */, 0 /* | */],
]
];
if (args.Length != 1)
{
Console.WriteLine(
"""
Usage: GraphemeTableGen <path to ucd.nounihan.grouped.xml>
You can download the latest ucd.nounihan.grouped.xml from:
https://www.unicode.org/Public/UCD/latest/ucdxml/ucd.nounihan.grouped.zip
"""
);
Environment.Exit(1);
}
var ucd = ExtractValuesFromUcd(args[0]);
// Find the best trie configuration over the given block sizes (2^2 - 2^8) and stages (4).
// More stages = Less size. The trajectory roughly follows a+b*c^stages, where c < 1.
// 4 still gives ~30% savings over 3 stages and going beyond 5 gives diminishing returns (<10%).
var trie = BuildBestTrie(ucd.Values, 2, 8, 4);
// The joinRules above has 2 bits per value. This packs it into 32-bit integers to save space.
var rules = PrepareRulesTable(joinRules);
// Each rules item has the same length. Each item is 32 bits = 4 bytes.
var totalSize = trie.TotalSize + rules.Length * rules[0].Length * sizeof(TrieType);
// Run a quick sanity check to ensure that the trie works as expected.
foreach (var (expected, cp) in ucd.Values.Select((v, i) => (v, i)))
{
TrieType v = 0;
foreach (var s in trie.Stages)
{
v = s.Values[(int)v + ((cp >> s.Shift) & s.Mask)];
}
if (v != expected)
{
throw new Exception($"trie sanity check failed for {cp:X}");
}
}
// All the remaining code starting here simply generates the C++ output.
var buf = new StringBuilder();
buf.Append("// Generated by GraphemeTableGen\n");
buf.Append($"// on {DateTime.UtcNow.ToString("yyyy'-'MM'-'dd'T'HH':'mm':'ssK")}, from {ucd.Description}, {totalSize} bytes\n");
buf.Append("// clang-format off\n");
foreach (var stage in trie.Stages)
{
var fmt = $" 0x{{0:x{stage.Bits / 4}}},";
var width = 16;
if (stage.Index != 0)
{
width = stage.Mask + 1;
}
buf.Append($"static constexpr uint{stage.Bits}_t s_stage{stage.Index}[] = {{");
foreach (var (value, j) in stage.Values.Select((v, j) => (v, j)))
{
if (j % width == 0)
{
buf.Append("\n ");
}
buf.AppendFormat(fmt, value);
}
buf.Append("\n};\n");
}
buf.Append($"static constexpr uint32_t s_joinRules[{rules.Length}][{rules[0].Length}] = {{\n");
foreach (var table in rules)
{
buf.Append(" {\n");
foreach (var r in table)
{
buf.Append($" 0b{r:b32},\n");
}
buf.Append(" },\n");
}
buf.Append("};\n");
buf.Append($"constexpr uint{trie.Stages[^1].Bits}_t ucdLookup(const char32_t cp) noexcept\n");
buf.Append("{\n");
foreach (var stage in trie.Stages)
{
buf.Append($" const auto s{stage.Index} = s_stage{stage.Index}[");
if (stage.Index == 0)
{
buf.Append($"cp >> {stage.Shift}");
}
else
{
buf.Append($"s{stage.Index - 1} + ((cp >> {stage.Shift}) & {stage.Mask})");
}
buf.Append("];\n");
}
buf.Append($" return s{trie.Stages.Count - 1};\n");
buf.Append("}\n");
buf.Append("constexpr int ucdGraphemeJoins(const int state, const int lead, const int trail) noexcept\n");
buf.Append("{\n");
buf.Append(" const auto l = lead & 15;\n");
buf.Append(" const auto t = trail & 15;\n");
buf.Append($" return (s_joinRules[state][l] >> (t * 2)) & 3;\n");
buf.Append("}\n");
buf.Append("constexpr bool ucdGraphemeDone(const int state) noexcept\n");
buf.Append("{\n");
buf.Append($" return state == 3;\n");
buf.Append("}\n");
buf.Append("constexpr int ucdToCharacterWidth(const int val) noexcept\n");
buf.Append("{\n");
buf.Append(" return val >> 6;\n");
buf.Append("}\n");
buf.Append("// clang-format on\n");
Console.Write(buf);
// This reads the given ucd.nounihan.grouped.xml file and extracts the
// CharacterWidth and ClusterBreak properties for all codepoints.
static Ucd ExtractValuesFromUcd(string path)
{
var values = new TrieType[1114112];
Array.Fill(values, TrieValue(ClusterBreak.Other, CharacterWidth.Narrow));
XNamespace ns = "http://www.unicode.org/ns/2003/ucd/1.0";
var doc = XDocument.Load(path);
var root = doc.Root!;
var description = root.Element(ns + "description")!.Value;
foreach (var group in doc.Root!.Descendants(ns + "group"))
{
var groupGeneralCategory = group.Attribute("gc")?.Value;
var groupGraphemeClusterBreak = group.Attribute("GCB")?.Value;
var groupIndicConjunctBreak = group.Attribute("InCB")?.Value;
var groupExtendedPictographic = group.Attribute("ExtPict")?.Value;
var groupEastAsian = group.Attribute("ea")?.Value;
foreach (var ch in group.Elements())
{
int firstCp;
int lastCp;
if (ch.Attribute("cp") is { } val)
{
var cp = Convert.ToInt32(val.Value, 16);
firstCp = cp;
lastCp = cp;
}
else
{
firstCp = Convert.ToInt32(ch.Attribute("first-cp")!.Value, 16);
lastCp = Convert.ToInt32(ch.Attribute("last-cp")!.Value, 16);
}
var generalCategory = ch.Attribute("gc")?.Value ?? groupGeneralCategory ?? "";
var graphemeClusterBreak = ch.Attribute("GCB")?.Value ?? groupGraphemeClusterBreak ?? "";
var indicConjunctBreak = ch.Attribute("InCB")?.Value ?? groupIndicConjunctBreak ?? "";
var extendedPictographic = ch.Attribute("ExtPict")?.Value ?? groupExtendedPictographic ?? "";
var eastAsian = ch.Attribute("ea")?.Value ?? groupEastAsian ?? "";
var cb = graphemeClusterBreak switch
{
"XX" => ClusterBreak.Other, // Anything else
// We ignore GB3 which demands that CR × LF do not break apart, because
// * these control characters won't normally reach our text storage
// * otherwise we're in a raw write mode and historically conhost stores them in separate cells
"CR" or "LF" or "CN" => ClusterBreak.Control, // Carriage Return, Line Feed, Control
"EX" or "SM" => ClusterBreak.Extend, // Extend, SpacingMark
"PP" => ClusterBreak.Prepend, // Prepend
"ZWJ" => ClusterBreak.ZWJ, // Zero Width Joiner
"RI" => ClusterBreak.RI, // Regional Indicator
"L" => ClusterBreak.HangulL, // Hangul Syllable Type L
"V" => ClusterBreak.HangulV, // Hangul Syllable Type V
"T" => ClusterBreak.HangulT, // Hangul Syllable Type T
"LV" => ClusterBreak.HangulLV, // Hangul Syllable Type LV
"LVT" => ClusterBreak.HangulLVT, // Hangul Syllable Type LVT
_ => throw new Exception($"Unrecognized GCB {graphemeClusterBreak} for {firstCp} to {lastCp}")
};
if (extendedPictographic == "Y")
{
// Currently every single Extended_Pictographic codepoint happens to be GCB=XX.
// This is fantastic for us because it means we can stuff it into the ClusterBreak enum
// and treat it as an alias of EXTEND, but with the special GB11 properties.
if (cb != ClusterBreak.Other)
{
throw new Exception(
$"Unexpected GCB {graphemeClusterBreak} with ExtPict=Y for {firstCp} to {lastCp}");
}
cb = ClusterBreak.ExtPic;
}
cb = indicConjunctBreak switch
{
"None" or "Extend" => cb,
"Linker" => ClusterBreak.InCBLinker,
"Consonant" => ClusterBreak.InCBConsonant,
_ => throw new Exception($"Unrecognized InCB {indicConjunctBreak} for {firstCp} to {lastCp}")
};
var width = eastAsian switch
{
"N" or "Na" or "H" => CharacterWidth.Narrow, // Half-width, Narrow, Neutral
"F" or "W" => CharacterWidth.Wide, // Wide, Full-width
"A" => CharacterWidth.Ambiguous, // Ambiguous
_ => throw new Exception($"Unrecognized ea {eastAsian} for {firstCp} to {lastCp}")
};
// There's no "ea" attribute for "zero width" so we need to do that ourselves. This matches:
// Mc: Mark, spacing combining
// Me: Mark, enclosing
// Mn: Mark, non-spacing
// Cf: Control, format
if (generalCategory.StartsWith("M") || generalCategory == "Cf")
{
width = CharacterWidth.ZeroWidth;
}
Fill(firstCp, lastCp, TrieValue(cb, width));
}
}
// Box-drawing and block elements are ambiguous according to their EastAsian attribute,
// but by convention terminals always consider them to be narrow.
Fill(0x2500, 0x259F, TrieValue(ClusterBreak.Other, CharacterWidth.Narrow));
return new Ucd
{
Description = description,
Values = values.ToList(),
};
void Fill(int first, int last, TrieType value)
{
Array.Fill(values, value, first, last - first + 1);
}
}
// Packs the arguments into a single integer that's stored as-is in the final trie stage.
static TrieType TrieValue(ClusterBreak cb, CharacterWidth width)
{
return (TrieType)((byte)(cb) | (byte)(width) << 6);
}
// Because each item in the list of 2D rule tables only uses 2 bits and not all 8 in each byte,
// this function packs them into chunks of 32-bit integers to save space.
static uint[][] PrepareRulesTable(byte[][][] rules)
{
var compressed = new uint[rules.Length][];
for (var i = 0; i < compressed.Length; i++)
{
compressed[i] = new uint[16];
}
foreach (var (table, prevIndex) in rules.Select((v, i) => (v, i)))
{
foreach (var (row, lead) in table.Select((v, i) => (v, i)))
{
if (table[lead].Length > 16)
{
throw new Exception("Can't pack row into 32 bits");
}
uint nextIndices = 0;
foreach (var (nextIndex, trail) in row.Select((v, i) => (v, i)))
{
if (nextIndex > 3)
{
throw new Exception("Can't pack table index into 2 bits");
}
nextIndices |= (uint)(nextIndex << (trail * 2));
}
compressed[prevIndex][lead] = nextIndices;
}
}
return compressed;
}
// This tries all possible trie configurations and returns the one with the smallest size. It's brute force.
static Trie BuildBestTrie(List<TrieType> uncompressed, int minShift, int maxShift, int stages)
{
var depth = stages - 1;
var delta = maxShift - minShift + 1;
var total = 1;
for (var i = 0; i < depth; i++)
{
total *= delta;
}
var tasks = new int[total][];
for (var i = 0; i < total; i++)
{
// Given minShift=2, maxShift=3, depth=3 this generates
// [2 2 2]
// [3 2 2]
// [2 3 2]
// [3 3 2]
// [2 2 3]
// [3 2 3]
// [2 3 3]
// [3 3 3]
var shifts = new int[depth];
for (int j = 0, index = i; j < depth; j++, index /= delta)
{
shifts[j] = minShift + index % delta;
}
tasks[i] = shifts;
}
return tasks.AsParallel().Select(shifts => BuildTrie(uncompressed, shifts)).MinBy(t => t.TotalSize)!;
}
// Compresses the given uncompressed data into a multi-level trie with shifts.Count+1 stages.
// shifts defines the power-of-two sizes of the deduplicated chunks in each stage.
// The final output receives no deduplication which is why this returns shifts.Count+1 stages.
static Trie BuildTrie(List<TrieType> uncompressed, Span<int> shifts)
{
var cumulativeShift = 0;
var stages = new List<Stage>();
for (int i = 0; i < shifts.Length; i++)
{
var shift = shifts[i];
var chunkSize = 1 << shift;
var cache = new Dictionary<ReadOnlyTrieTypeSpan, TrieType>();
var compressed = new List<TrieType>();
var offsets = new List<TrieType>();
for (var off = 0; off < uncompressed.Count; off += chunkSize)
{
var key = new ReadOnlyTrieTypeSpan(uncompressed, off, Math.Min(chunkSize, uncompressed.Count - off));
// Cast the integer slice to a string so that it can be hashed.
if (!cache.TryGetValue(key, out var offset))
{
// For a 4-stage trie searching for existing occurrences of chunk in compressed yields a ~10%
// compression improvement. Checking for overlaps with the tail end of compressed yields another ~15%.
// FYI I tried to shuffle the order of compressed chunks but found that this has a negligible impact.
var haystack = CollectionsMarshal.AsSpan(compressed);
var needle = key.AsSpan();
var existing = FindExisting(haystack, needle);
if (existing >= 0)
{
offset = (TrieType)existing;
cache[key] = offset;
}
else
{
var overlap = MeasureOverlap(CollectionsMarshal.AsSpan(compressed), needle);
compressed.AddRange(needle[overlap..]);
offset = (TrieType)(compressed.Count - needle.Length);
cache[key] = offset;
}
}
offsets.Add(offset);
}
stages.Add(new Stage
{
Values = compressed,
Index = shifts.Length - i,
Shift = cumulativeShift,
Mask = chunkSize - 1,
Bits = 0,
});
uncompressed = offsets;
cumulativeShift += shift;
}
stages.Add(new Stage
{
Values = uncompressed,
Index = 0,
Shift = cumulativeShift,
Mask = int.MaxValue,
Bits = 0,
});
stages.Reverse();
foreach (var s in stages)
{
var m = s.Values.Max();
s.Bits = m switch
{
<= 0xff => 8,
<= 0xffff => 16,
_ => 32
};
}
return new Trie
{
Stages = stages,
TotalSize = stages.Sum(s => (s.Bits / 8) * s.Values.Count)
};
}
// Finds needle in haystack. Returns -1 if it couldn't be found.
static int FindExisting(ReadOnlySpan<TrieType> haystack, ReadOnlySpan<TrieType> needle)
{
var idx = haystack.IndexOf(needle);
return idx;
}
// Given two slices, this returns the amount by which `prev`s end overlaps with `next`s start.
// That is, given [0,1,2,3,4] and [2,3,4,5] this returns 3 because [2,3,4] is the "overlap".
static int MeasureOverlap(ReadOnlySpan<TrieType> prev, ReadOnlySpan<TrieType> next)
{
for (var overlap = Math.Min(prev.Length, next.Length); overlap >= 0; overlap--)
{
if (prev[^overlap..].SequenceEqual(next[..overlap]))
{
return overlap;
}
}
return 0;
}
enum CharacterWidth
{
ZeroWidth,
Narrow,
Wide,
Ambiguous
}
enum ClusterBreak
{
Other, // GB999
Control, // GB3, GB4, GB5 -- includes CR, LF
Extend, // GB9, GB9a -- includes SpacingMark
RI, // GB12, GB13
Prepend, // GB9b
HangulL, // GB6, GB7, GB8
HangulV, // GB6, GB7, GB8
HangulT, // GB6, GB7, GB8
HangulLV, // GB6, GB7, GB8
HangulLVT, // GB6, GB7, GB8
InCBLinker, // GB9c
InCBConsonant, // GB9c
ExtPic, // GB11
ZWJ, // GB9, GB11
}
class Ucd
{
public required string Description;
public required List<TrieType> Values;
}
class Stage
{
public required List<TrieType> Values;
public required int Index;
public required int Shift;
public required int Mask;
public required int Bits;
}
class Trie
{
public required List<Stage> Stages;
public required int TotalSize;
}
// Because you can't put a Span<TrieType> into a Dictionary.
// This works around that by simply keeping a reference to the List<TrieType> around.
struct ReadOnlyTrieTypeSpan(List<TrieType> list, int start, int length)
{
public ReadOnlySpan<TrieType> AsSpan() => CollectionsMarshal.AsSpan(list).Slice(start, length);
public override bool Equals(object? obj)
{
return obj is ReadOnlyTrieTypeSpan other && AsSpan().SequenceEqual(other.AsSpan());
}
public override int GetHashCode()
{
HashCode hashCode = default;
hashCode.AddBytes(MemoryMarshal.AsBytes(AsSpan()));
return hashCode.ToHashCode();
}
}

View File

@ -0,0 +1,10 @@
<Project Sdk="Microsoft.NET.Sdk">
<PropertyGroup>
<OutputType>Exe</OutputType>
<TargetFramework>net8.0</TargetFramework>
<ImplicitUsings>enable</ImplicitUsings>
<Nullable>enable</Nullable>
</PropertyGroup>
</Project>

View File

@ -0,0 +1,121 @@
using System.Text;
using System.Text.RegularExpressions;
string data;
using (var client = new HttpClient())
{
var response = await client.GetAsync("https://www.unicode.org/Public/UCD/latest/ucd/auxiliary/GraphemeBreakTest.txt");
response.EnsureSuccessStatusCode();
data = await response.Content.ReadAsStringAsync();
}
var testString = new StringBuilder();
var scanner = new StringReader(data);
var firstLine = true;
while (await scanner.ReadLineAsync() is { } line)
{
var parts = line.Split('#');
var test = parts[0].Trim();
var comment = parts.Length > 1 ? parts[1].Trim() : "";
if (firstLine)
{
firstLine = false;
var re = new Regex(@"^GraphemeBreakTest-(\d+\.\d+\.\d+)\.txt$");
var m = re.Match(comment);
if (!m.Success)
{
throw new Exception($"Failed to find version number, got: {comment}");
}
testString.Append(
$$"""
// Generated by GraphemeTestTableGen
// on {{DateTime.UtcNow.ToString("yyyy'-'MM'-'dd'T'HH':'mm':'ssK")}}, from Unicode {{m.Groups[1].Value}}
struct GraphemeBreakTest
{
const wchar_t* comment;
const wchar_t* graphemes[4];
};
static constexpr GraphemeBreakTest s_graphemeBreakTests[] = {
"""
);
}
if (test == "" || comment == "")
{
continue;
}
var graphemes = test.Split('÷');
for (var i = 0; i < graphemes.Length; i++)
{
graphemes[i] = graphemes[i].Trim();
}
testString.Append($" {{ L\"{comment}\"");
foreach (var g in graphemes)
{
if (string.IsNullOrEmpty(g))
{
continue;
}
testString.Append(", L\"");
var codepoints = g.Split('×');
foreach (var c in codepoints)
{
var i = Convert.ToUInt32(c.Trim(), 16);
switch (i)
{
case 0x07:
testString.Append("\\a");
break;
case 0x08:
testString.Append("\\b");
break;
case 0x09:
testString.Append("\\t");
break;
case 0x0A:
testString.Append("\\n");
break;
case 0x0B:
testString.Append("\\v");
break;
case 0x0C:
testString.Append("\\f");
break;
case 0x0D:
testString.Append("\\r");
break;
case >= 0x20 and <= 0x7e:
testString.Append((char)i);
break;
case <= 0xff:
testString.Append($"\\x{i:X2}");
break;
case <= 0xffff:
testString.Append($"\\x{i:X4}");
break;
default:
testString.Append($"\\U{i:X8}");
break;
}
}
testString.Append("\"");
}
testString.Append(" },\n");
}
testString.Append("};\n");
Console.OutputEncoding = System.Text.Encoding.UTF8;
Console.Write(testString);

File diff suppressed because it is too large Load Diff

View File

@ -5,16 +5,14 @@
#include "inc/CodepointWidthDetector.hpp"
#include "inc/GlyphWidth.hpp"
#pragma warning(suppress : 26426)
// TODO GH 2676 - remove warning suppression and decide what to do re: singleton instance of CodepointWidthDetector
static CodepointWidthDetector widthDetector;
// Function Description:
// - determines if the glyph represented by the string of characters should be
// wide or not. See CodepointWidthDetector::IsWide
bool IsGlyphFullWidth(const std::wstring_view& glyph) noexcept
{
return widthDetector.IsWide(glyph);
GraphemeState state;
CodepointWidthDetector::Singleton().GraphemeNext(state, glyph);
return state.width == 2;
}
// Function Description:
@ -24,29 +22,3 @@ bool IsGlyphFullWidth(const wchar_t wch) noexcept
{
return wch < 0x80 ? false : IsGlyphFullWidth({ &wch, 1 });
}
// Function Description:
// - Sets a function that should be used by the global CodepointWidthDetector
// as the fallback mechanism for determining a particular glyph's width,
// should the glyph be an ambiguous width.
// A Terminal could hook in a Renderer's IsGlyphWideByFont method as the
// fallback to ask the renderer for the glyph's width (for example).
// Arguments:
// - pfnFallback - the function to use as the fallback method.
// Return Value:
// - <none>
void SetGlyphWidthFallback(std::function<bool(const std::wstring_view&)> pfnFallback) noexcept
{
widthDetector.SetFallbackMethod(std::move(pfnFallback));
}
// Function Description:
// - Forwards notification about font changing to glyph width detector
// Arguments:
// - <none>
// Return Value:
// - <none>
void NotifyGlyphWidthFontChanged() noexcept
{
widthDetector.NotifyFontChanged();
}

View File

@ -1,37 +1,75 @@
/*++
Copyright (c) Microsoft Corporation
Module Name:
- CodepointWidthDetector.hpp
Abstract:
- Object used to measure the width of a codepoint when it's rendered
Author:
- Austin Diviness (AustDi) 18-May-2018
--*/
// Copyright (c) Microsoft Corporation.
// Licensed under the MIT license.
#pragma once
#include "convert.hpp"
// use to measure the width of a codepoint
class CodepointWidthDetector final
enum class TextMeasurementMode
{
public:
CodepointWidth GetWidth(const std::wstring_view& glyph) noexcept;
bool IsWide(const std::wstring_view& glyph) noexcept;
void SetFallbackMethod(std::function<bool(const std::wstring_view&)> pfnFallback) noexcept;
void NotifyFontChanged() noexcept;
// Uses a method very similar to the official UAX #29 Extended Grapheme Cluster algorithm.
Graphemes,
// wcswidth() is a popular method on UNIX to measure text width. It basically treats
// any zero-width character as a continuation of a preceding non-zero-width character.
Wcswidth,
// The old conhost algorithm is UCS-2 based and assigns a minimum width of 1 to all codepoints.
// It had been extended to support UTF-16 after the introduction of Windows Terminal,
// but retained the behavior of not supporting zero-width characters.
Console,
};
#ifdef UNIT_TESTING
friend class CodepointWidthDetectorTests;
#endif
// NOTE: The same GraphemeState instance should be passed for a series of GraphemeNext *or* GraphemePrev calls,
// but NOT between GraphemeNext *and* GraphemePrev ("exclusive OR"). They're also not reusable when the
// CodepointWidthDetector::_legacy flag changes. Different functions treat these members differently.
struct GraphemeState
{
// These are the [out] parameters for GraphemeNext/Prev.
//
// If a previous call returned false (= reached the end of the string), then on the first call with
// the next string, beg/len will contain the parts of the grapheme cluster that are found in that
// new string argument. That's true even if the two strings don't join to form a single cluster.
// In that case beg/len will simply be an empty string. It basically tells you
// "Yup, that cluster in the last string was complete after all".
//
// However, width will always be updated to represent the width of the current cluster.
//
// For instance, if the first string is a narrow emoji and the second one is U+FE0F, the first call will return
// the emoji with a width of 1, and the second call will return U+FE0F with a width of 2.
// You know these two belong together because the first call returned false.
// The total width is not 1+2 but rather just 2.
const wchar_t* beg = nullptr;
int len = 0;
// width will always be between 0 or 2.
int width = 0;
// If GraphemeNext/Prev return false (= reached the end of the string), they'll fill these struct
// members with some info so that we can check if it joins with the start of the next string argument.
// _state is stored ~flipped, so that we can differentiate between it being unset (0) and it being set to 0 (~0 = 255).
int _state = 0;
int _last = 0;
};
struct CodepointWidthDetector
{
static CodepointWidthDetector& Singleton() noexcept;
// Returns false if the end of the string has been reached.
bool GraphemeNext(GraphemeState& s, const std::wstring_view& str) noexcept;
bool GraphemePrev(GraphemeState& s, const std::wstring_view& str) noexcept;
TextMeasurementMode GetMode() const noexcept;
void SetFallbackMethod(std::function<bool(const std::wstring_view&)> pfnFallback) noexcept;
void Reset(TextMeasurementMode mode) noexcept;
private:
uint8_t _lookupGlyphWidth(char32_t codepoint, const std::wstring_view& glyph) noexcept;
uint8_t _checkFallbackViaCache(char32_t codepoint, const std::wstring_view& glyph) noexcept;
bool _graphemeNext(GraphemeState& s, const std::wstring_view& str) const noexcept;
bool _graphemePrev(GraphemeState& s, const std::wstring_view& str) const noexcept;
bool _graphemeNextWcswidth(GraphemeState& s, const std::wstring_view& str) const noexcept;
bool _graphemePrevWcswidth(GraphemeState& s, const std::wstring_view& str) const noexcept;
bool _graphemeNextConsole(GraphemeState& s, const std::wstring_view& str) noexcept;
bool _graphemePrevConsole(GraphemeState& s, const std::wstring_view& str) noexcept;
__declspec(noinline) int _checkFallbackViaCache(char32_t codepoint) noexcept;
std::unordered_map<char32_t, uint8_t> _fallbackCache;
std::unordered_map<char32_t, int> _fallbackCache;
std::function<bool(const std::wstring_view&)> _pfnFallbackMethod;
TextMeasurementMode _mode = TextMeasurementMode::Graphemes;
int _ambiguousWidth = 1;
};

View File

@ -10,12 +10,5 @@ Abstract:
*/
#pragma once
#include <functional>
#include <string_view>
#include "convert.hpp"
bool IsGlyphFullWidth(const std::wstring_view& glyph) noexcept;
bool IsGlyphFullWidth(const wchar_t wch) noexcept;
void SetGlyphWidthFallback(std::function<bool(const std::wstring_view&)> pfnFallback) noexcept;
void NotifyGlyphWidthFontChanged() noexcept;

View File

@ -30,9 +30,6 @@
<ClCompile Include="..\precomp.cpp">
<Filter>Source Files</Filter>
</ClCompile>
<ClCompile Include="..\CodepointWidthDetector.cpp">
<Filter>Source Files</Filter>
</ClCompile>
<ClCompile Include="..\GlyphWidth.cpp">
<Filter>Source Files</Filter>
</ClCompile>
@ -60,6 +57,9 @@
<ClCompile Include="..\TermControlUiaTextRange.cpp">
<Filter>Source Files</Filter>
</ClCompile>
<ClCompile Include="..\CodepointWidthDetector.cpp">
<Filter>Source Files</Filter>
</ClCompile>
</ItemGroup>
<ItemGroup>
<ClInclude Include="..\inc\IInputEvent.hpp">
@ -122,5 +122,6 @@
</ItemGroup>
<ItemGroup>
<Natvis Include="$(SolutionDir)tools\ConsoleTypes.natvis" />
<Natvis Include="$(MSBuildThisFileDirectory)..\..\natvis\wil.natvis" />
</ItemGroup>
</Project>
</Project>

File diff suppressed because it is too large Load Diff

View File

@ -11,6 +11,7 @@
<Import Project="$(SolutionDir)src\common.build.pre.props" />
<Import Project="$(SolutionDir)\src\common.nugetversions.props" />
<ItemGroup>
<ClCompile Include="CodepointWidthDetectorTests.cpp" />
<ClCompile Include="UtilsTests.cpp" />
<ClCompile Include="UuidTests.cpp" />
<ClCompile Include="..\precomp.cpp">

View File

@ -14,6 +14,7 @@ DLLDEF =
SOURCES = \
$(SOURCES) \
CodepointWidthDetectorTests.cpp \
UuidTests.cpp \
UtilsTests.cpp \
DefaultResource.rc \

View File

@ -137,12 +137,31 @@ HRESULT _CreatePseudoConsole(const HANDLE hToken,
wchar_t cmd[MAX_PATH]{};
const BOOL bInheritCursor = (dwFlags & PSEUDOCONSOLE_INHERIT_CURSOR) == PSEUDOCONSOLE_INHERIT_CURSOR;
const BOOL bResizeQuirk = (dwFlags & PSEUDOCONSOLE_RESIZE_QUIRK) == PSEUDOCONSOLE_RESIZE_QUIRK;
const wchar_t* textMeasurement;
switch (dwFlags & PSEUDOCONSOLE_GLYPH_WIDTH__MASK)
{
case PSEUDOCONSOLE_GLYPH_WIDTH_GRAPHEMES:
textMeasurement = L"--textMeasurement graphemes ";
break;
case PSEUDOCONSOLE_GLYPH_WIDTH_WCSWIDTH:
textMeasurement = L"--textMeasurement wcswidth ";
break;
case PSEUDOCONSOLE_GLYPH_WIDTH_CONSOLE:
textMeasurement = L"--textMeasurement console ";
break;
default:
textMeasurement = L"";
break;
}
swprintf_s(cmd,
MAX_PATH,
L"\"%s\" --headless %s%s--width %hd --height %hd --signal 0x%tx --server 0x%tx",
L"\"%s\" --headless %s%s%s--width %hd --height %hd --signal 0x%tx --server 0x%tx",
_ConsoleHostPath(),
bInheritCursor ? L"--inheritcursor " : L"",
bResizeQuirk ? L"--resizeQuirk " : L"",
textMeasurement,
size.X,
size.Y,
std::bit_cast<uintptr_t>(signalPipeConhostSide.get()),

View File

@ -55,8 +55,11 @@ typedef struct _PseudoConsole
#ifndef PSEUDOCONSOLE_RESIZE_QUIRK
#define PSEUDOCONSOLE_RESIZE_QUIRK (0x2)
#endif
#ifndef PSEUDOCONSOLE_WIN32_INPUT_MODE
#define PSEUDOCONSOLE_WIN32_INPUT_MODE (0x4)
#ifndef PSEUDOCONSOLE_GLYPH_WIDTH__MASK
#define PSEUDOCONSOLE_GLYPH_WIDTH__MASK 0x18
#define PSEUDOCONSOLE_GLYPH_WIDTH_GRAPHEMES 0x08
#define PSEUDOCONSOLE_GLYPH_WIDTH_WCSWIDTH 0x10
#define PSEUDOCONSOLE_GLYPH_WIDTH_CONSOLE 0x18
#endif
// Implementations of the various PseudoConsole functions.