Fix RTF generation for Unicode characters (#12586)

## Summary of the Pull Request Fixes RTF generation for text with Unicode characters. ## PR Checklist * [x] Closes #12379 * [x] CLA signed. If not, go over [here](https://cla.opensource.microsoft.com/microsoft/Terminal) and sign the CLA * [x] Tests added/passed * [ ] Documentation updated. If checked, please file a pull request on [our docs repo](https://github.com/MicrosoftDocs/terminal) and link it here: #xxx * [ ] Schema updated. * [ ] I've discussed this with core contributors already. If not checked, I'm ready to accept this work might be rejected in favor of a different grand plan. Issue number where discussion took place: #xxx ## Validation Steps Performed Added some unit tests. Ran the following in PowerShell and copied the emitted text into WordPad. ```pwsh echo "This is some Ascii \ {}`nLow code units: á é í ó ú `u{2b81} `u{2b82}`nHigh code units: `u{a7b5} `u{a7b7}`nSurrogates: `u{1f366} `u{1f47e} `u{1f440}" ```
2025-12-10 18:43:54 -06:00 · 2022-03-01 23:14:16 +00:00 · 2022-03-01 23:14:16 +00:00 · 00113e3e48
commit 00113e3e48
parent 79a08ecd18
3 changed files with 62 additions and 14 deletions
--- a/src/buffer/out/textBuffer.cpp
+++ b/src/buffer/out/textBuffer.cpp
@ -2034,20 +2034,8 @@ std::string TextBuffer::GenRTF(const TextAndColor& rows, const int fontHeightPoi
                const auto writeAccumulatedChars = [&](bool includeCurrent) {
                    if (col >= startOffset)
                    {
-                        const auto unescapedText = ConvertToA(CP_UTF8, std::wstring_view(rows.text.at(row)).substr(startOffset, col - startOffset + includeCurrent));
-                        for (const auto c : unescapedText)
-                        {
-                            switch (c)
-                            {
-                            case '\\':
-                            case '{':
-                            case '}':
-                                contentBuilder << "\\" << c;
-                                break;
-                            default:
-                                contentBuilder << c;
-                            }
-                        }
+                        const auto text = std::wstring_view{ rows.text.at(row) }.substr(startOffset, col - startOffset + includeCurrent);
+                        _AppendRTFText(contentBuilder, text);

                        startOffset = col;
                    }
@ -2146,6 +2134,31 @@ std::string TextBuffer::GenRTF(const TextAndColor& rows, const int fontHeightPoi
    }
 }

+void TextBuffer::_AppendRTFText(std::ostringstream& contentBuilder, const std::wstring_view& text)
+{
+    for (const auto codeUnit : text)
+    {
+        if (codeUnit <= 127)
+        {
+            switch (codeUnit)
+            {
+            case L'\\':
+            case L'{':
+            case L'}':
+                contentBuilder << "\\" << gsl::narrow<char>(codeUnit);
+                break;
+            default:
+                contentBuilder << gsl::narrow<char>(codeUnit);
+            }
+        }
+        else
+        {
+            // Windows uses unsigned wchar_t - RTF uses signed ones.
+            contentBuilder << "\\u" << std::to_string(til::bit_cast<int16_t>(codeUnit)) << "?";
+        }
+    }
+}
+
 // Function Description:
 // - Reflow the contents from the old buffer into the new buffer. The new buffer
 //   can have different dimensions than the old buffer. If it does, then this
--- a/src/buffer/out/textBuffer.hpp
+++ b/src/buffer/out/textBuffer.hpp
@ -247,6 +247,8 @@ private:

    void _PruneHyperlinks();

+    static void _AppendRTFText(std::ostringstream& contentBuilder, const std::wstring_view& text);
+
    std::unordered_map<size_t, std::wstring> _idsAndPatterns;
    size_t _currentPatternId;

--- a/src/host/ut_host/TextBufferTests.cpp
+++ b/src/host/ut_host/TextBufferTests.cpp
@ -146,6 +146,8 @@ class TextBufferTests

    TEST_METHOD(TestBurrito);

+    TEST_METHOD(TestAppendRTFText);
+
    void WriteLinesToBuffer(const std::vector<std::wstring>& text, TextBuffer& buffer);
    TEST_METHOD(GetWordBoundaries);
    TEST_METHOD(MoveByWord);
@ -2011,6 +2013,37 @@ void TextBufferTests::TestBurrito()
    VERIFY_IS_FALSE(afterBurritoIter);
 }

+void TextBufferTests::TestAppendRTFText()
+{
+    {
+        std::ostringstream contentStream;
+        const auto ascii = L"This is some Ascii \\ {}";
+        TextBuffer::_AppendRTFText(contentStream, ascii);
+        VERIFY_ARE_EQUAL("This is some Ascii \\\\ \\{\\}", contentStream.str());
+    }
+    {
+        std::ostringstream contentStream;
+        // "Low code units: á é í ó ú ⮁ ⮂" in UTF-16
+        const auto lowCodeUnits = L"Low code units: \x00E1 \x00E9 \x00ED \x00F3 \x00FA \x2B81 \x2B82";
+        TextBuffer::_AppendRTFText(contentStream, lowCodeUnits);
+        VERIFY_ARE_EQUAL("Low code units: \\u225? \\u233? \\u237? \\u243? \\u250? \\u11137? \\u11138?", contentStream.str());
+    }
+    {
+        std::ostringstream contentStream;
+        // "High code units: ꞵ ꞷ" in UTF-16
+        const auto highCodeUnits = L"High code units: \xA7B5 \xA7B7";
+        TextBuffer::_AppendRTFText(contentStream, highCodeUnits);
+        VERIFY_ARE_EQUAL("High code units: \\u-22603? \\u-22601?", contentStream.str());
+    }
+    {
+        std::ostringstream contentStream;
+        // "Surrogates: 🍦 👾 👀" in UTF-16
+        const auto surrogates = L"Surrogates: \xD83C\xDF66 \xD83D\xDC7E \xD83D\xDC40";
+        TextBuffer::_AppendRTFText(contentStream, surrogates);
+        VERIFY_ARE_EQUAL("Surrogates: \\u-10180?\\u-8346? \\u-10179?\\u-9090? \\u-10179?\\u-9152?", contentStream.str());
+    }
+}
+
 void TextBufferTests::WriteLinesToBuffer(const std::vector<std::wstring>& text, TextBuffer& buffer)
 {
    const auto bufferSize = buffer.GetSize();