Fix RTF generation for Unicode characters (#12586)

## Summary of the Pull Request
Fixes RTF generation for text with Unicode characters.

## PR Checklist
* [x] Closes #12379
* [x] CLA signed. If not, go over [here](https://cla.opensource.microsoft.com/microsoft/Terminal) and sign the CLA
* [x] Tests added/passed
* [ ] Documentation updated. If checked, please file a pull request on [our docs repo](https://github.com/MicrosoftDocs/terminal) and link it here: #xxx
* [ ] Schema updated.
* [ ] I've discussed this with core contributors already. If not checked, I'm ready to accept this work might be rejected in favor of a different grand plan. Issue number where discussion took place: #xxx

## Validation Steps Performed
Added some unit tests.

Ran the following in PowerShell and copied the emitted text into WordPad.
```pwsh
echo "This is some Ascii \ {}`nLow code units: á é í ó ú `u{2b81} `u{2b82}`nHigh code units: `u{a7b5} `u{a7b7}`nSurrogates: `u{1f366} `u{1f47e} `u{1f440}"
```
This commit is contained in:
Ian O'Neill 2022-03-01 23:14:16 +00:00 committed by GitHub
parent 79a08ecd18
commit 00113e3e48
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
3 changed files with 62 additions and 14 deletions

View File

@ -2034,20 +2034,8 @@ std::string TextBuffer::GenRTF(const TextAndColor& rows, const int fontHeightPoi
const auto writeAccumulatedChars = [&](bool includeCurrent) {
if (col >= startOffset)
{
const auto unescapedText = ConvertToA(CP_UTF8, std::wstring_view(rows.text.at(row)).substr(startOffset, col - startOffset + includeCurrent));
for (const auto c : unescapedText)
{
switch (c)
{
case '\\':
case '{':
case '}':
contentBuilder << "\\" << c;
break;
default:
contentBuilder << c;
}
}
const auto text = std::wstring_view{ rows.text.at(row) }.substr(startOffset, col - startOffset + includeCurrent);
_AppendRTFText(contentBuilder, text);
startOffset = col;
}
@ -2146,6 +2134,31 @@ std::string TextBuffer::GenRTF(const TextAndColor& rows, const int fontHeightPoi
}
}
void TextBuffer::_AppendRTFText(std::ostringstream& contentBuilder, const std::wstring_view& text)
{
for (const auto codeUnit : text)
{
if (codeUnit <= 127)
{
switch (codeUnit)
{
case L'\\':
case L'{':
case L'}':
contentBuilder << "\\" << gsl::narrow<char>(codeUnit);
break;
default:
contentBuilder << gsl::narrow<char>(codeUnit);
}
}
else
{
// Windows uses unsigned wchar_t - RTF uses signed ones.
contentBuilder << "\\u" << std::to_string(til::bit_cast<int16_t>(codeUnit)) << "?";
}
}
}
// Function Description:
// - Reflow the contents from the old buffer into the new buffer. The new buffer
// can have different dimensions than the old buffer. If it does, then this

View File

@ -247,6 +247,8 @@ private:
void _PruneHyperlinks();
static void _AppendRTFText(std::ostringstream& contentBuilder, const std::wstring_view& text);
std::unordered_map<size_t, std::wstring> _idsAndPatterns;
size_t _currentPatternId;

View File

@ -146,6 +146,8 @@ class TextBufferTests
TEST_METHOD(TestBurrito);
TEST_METHOD(TestAppendRTFText);
void WriteLinesToBuffer(const std::vector<std::wstring>& text, TextBuffer& buffer);
TEST_METHOD(GetWordBoundaries);
TEST_METHOD(MoveByWord);
@ -2011,6 +2013,37 @@ void TextBufferTests::TestBurrito()
VERIFY_IS_FALSE(afterBurritoIter);
}
void TextBufferTests::TestAppendRTFText()
{
{
std::ostringstream contentStream;
const auto ascii = L"This is some Ascii \\ {}";
TextBuffer::_AppendRTFText(contentStream, ascii);
VERIFY_ARE_EQUAL("This is some Ascii \\\\ \\{\\}", contentStream.str());
}
{
std::ostringstream contentStream;
// "Low code units: á é í ó ú ⮁ ⮂" in UTF-16
const auto lowCodeUnits = L"Low code units: \x00E1 \x00E9 \x00ED \x00F3 \x00FA \x2B81 \x2B82";
TextBuffer::_AppendRTFText(contentStream, lowCodeUnits);
VERIFY_ARE_EQUAL("Low code units: \\u225? \\u233? \\u237? \\u243? \\u250? \\u11137? \\u11138?", contentStream.str());
}
{
std::ostringstream contentStream;
// "High code units: ꞵ ꞷ" in UTF-16
const auto highCodeUnits = L"High code units: \xA7B5 \xA7B7";
TextBuffer::_AppendRTFText(contentStream, highCodeUnits);
VERIFY_ARE_EQUAL("High code units: \\u-22603? \\u-22601?", contentStream.str());
}
{
std::ostringstream contentStream;
// "Surrogates: 🍦 👾 👀" in UTF-16
const auto surrogates = L"Surrogates: \xD83C\xDF66 \xD83D\xDC7E \xD83D\xDC40";
TextBuffer::_AppendRTFText(contentStream, surrogates);
VERIFY_ARE_EQUAL("Surrogates: \\u-10180?\\u-8346? \\u-10179?\\u-9090? \\u-10179?\\u-9152?", contentStream.str());
}
}
void TextBufferTests::WriteLinesToBuffer(const std::vector<std::wstring>& text, TextBuffer& buffer)
{
const auto bufferSize = buffer.GetSize();