From 2c452e0fd6f23fe7a6892c17998b39d12644f26f Mon Sep 17 00:00:00 2001 From: Leonard Hecker Date: Wed, 7 Aug 2024 22:19:11 +0200 Subject: [PATCH] Remove IsGlyphFullWidth from InputBuffer (#17680) In several places the old conhost codebase appears to assume that any wide glyph is represented by two codepoints. This is probably an artifact of the ASCII/DBCS split that conhost used to have. When conhost got merged into a single UCS2-aware application, this artifact was apparently never properly resolved. To my knowledge there are at least two places where this assumption exists: The clipboard code which translates non-wide non-ascii characters to Alt-numpad sequences, and this code. Both are wrong. This is because in a Unicode-context there's no correlation between the number of codepoints and the width of the glyph, even with UCS2. In a post-UCS2-world the correct check is for surrogate pairs, as they must be avoided for the same reason DBCS were avoided. One could consider this a breaking change of the API, as this can now result in repeat counts >1 for wide glyphs. If someone complained about this change in behavior, I'd probably not change it back, as narrow complex Unicode characters exist too. --- src/host/inputBuffer.cpp | 11 +++-------- src/host/ut_host/InputBufferTests.cpp | 19 ++++++++----------- 2 files changed, 11 insertions(+), 19 deletions(-) diff --git a/src/host/inputBuffer.cpp b/src/host/inputBuffer.cpp index a843a01082..e943fa9ac7 100644 --- a/src/host/inputBuffer.cpp +++ b/src/host/inputBuffer.cpp @@ -10,7 +10,6 @@ #include "misc.h" #include "stream.h" #include "../interactivity/inc/ServiceLocator.hpp" -#include "../types/inc/GlyphWidth.hpp" #define INPUT_BUFFER_DEFAULT_INPUT_MODE (ENABLE_LINE_INPUT | ENABLE_PROCESSED_INPUT | ENABLE_ECHO_INPUT | ENABLE_MOUSE_INPUT) @@ -804,13 +803,9 @@ bool InputBuffer::_CoalesceEvent(const INPUT_RECORD& inEvent) noexcept (lastKey.wVirtualScanCode == inKey.wVirtualScanCode || WI_IsFlagSet(inKey.dwControlKeyState, NLS_IME_CONVERSION)) && lastKey.uChar.UnicodeChar == inKey.uChar.UnicodeChar && lastKey.dwControlKeyState == inKey.dwControlKeyState && - // TODO:GH#8000 This behavior is an import from old conhost v1 and has been broken for decades. - // This is probably the outdated idea that any wide glyph is being represented by 2 characters (DBCS) and likely - // resulted from conhost originally being split into a ASCII/OEM and a DBCS variant with preprocessor flags. - // You can't update the repeat count of such a A,B pair, because they're stored as A,A,B,B (down-down, up-up). - // I believe the proper approach is to store pairs of characters as pairs, update their combined - // repeat count and only when they're being read de-coalesce them into their alternating form. - !IsGlyphFullWidth(inKey.uChar.UnicodeChar)) + // A single repeat count cannot represent two INPUT_RECORDs simultaneously, + // and so it cannot represent a surrogate pair either. + !til::is_surrogate(inKey.uChar.UnicodeChar)) { lastKey.wRepeatCount += inKey.wRepeatCount; return true; diff --git a/src/host/ut_host/InputBufferTests.cpp b/src/host/ut_host/InputBufferTests.cpp index f81d304bda..fd69126d6d 100644 --- a/src/host/ut_host/InputBufferTests.cpp +++ b/src/host/ut_host/InputBufferTests.cpp @@ -219,22 +219,19 @@ class InputBufferTests } } - TEST_METHOD(InputBufferDoesNotCoalesceFullWidthChars) + TEST_METHOD(InputBufferDoesNotCoalesceSurrogatePairs) { InputBuffer inputBuffer; - WCHAR hiraganaA = 0x3042; // U+3042 hiragana A - auto record = MakeKeyEvent(true, 1, hiraganaA, 0, hiraganaA, 0); - // send a bunch of identical events - inputBuffer.Flush(); - for (size_t i = 0; i < RECORD_INSERT_COUNT; ++i) - { - VERIFY_IS_GREATER_THAN(inputBuffer.Write(record), 0u); - VERIFY_ARE_EQUAL(inputBuffer._storage.back(), record); - } + // U+1F44D thumbs up emoji + inputBuffer.Write(MakeKeyEvent(true, 1, 0xD83D, 0, 0xD83D, 0)); + inputBuffer.Write(MakeKeyEvent(true, 1, 0xDC4D, 0, 0xDC4D, 0)); + + // Should not coalesce despite otherwise matching perfectly. + inputBuffer.Write(MakeKeyEvent(true, 1, 0xDC4D, 0, 0xDC4D, 0)); // The events shouldn't be coalesced - VERIFY_ARE_EQUAL(inputBuffer.GetNumberOfReadyEvents(), RECORD_INSERT_COUNT); + VERIFY_ARE_EQUAL(3u, inputBuffer.GetNumberOfReadyEvents()); } TEST_METHOD(CanFlushAllOutput)