Fix decoding of HTML entities in TSX/JSX (#35739)

This commit is contained in:
Ron Buckton
2019-12-17 17:32:48 -08:00
committed by GitHub
parent cafa175501
commit 2eb60c2cb2
6 changed files with 45 additions and 22 deletions

View File

@@ -1337,20 +1337,6 @@ namespace ts {
return utf16EncodeAsString(escapedValue);
}
// Derived from the 10.1.1 UTF16Encoding of the ES6 Spec.
function utf16EncodeAsString(codePoint: number): string {
Debug.assert(0x0 <= codePoint && codePoint <= 0x10FFFF);
if (codePoint <= 65535) {
return String.fromCharCode(codePoint);
}
const codeUnit1 = Math.floor((codePoint - 65536) / 1024) + 0xD800;
const codeUnit2 = ((codePoint - 65536) % 1024) + 0xDC00;
return String.fromCharCode(codeUnit1, codeUnit2);
}
// Current character is known to be a backslash. Check for Unicode escape of the form '\uXXXX'
// and return code point value if valid Unicode escape is found. Otherwise return -1.
function peekUnicodeEscape(): number {
@@ -2339,4 +2325,25 @@ namespace ts {
}
return 1;
}
// Derived from the 10.1.1 UTF16Encoding of the ES6 Spec.
function utf16EncodeAsStringFallback(codePoint: number) {
Debug.assert(0x0 <= codePoint && codePoint <= 0x10FFFF);
if (codePoint <= 65535) {
return String.fromCharCode(codePoint);
}
const codeUnit1 = Math.floor((codePoint - 65536) / 1024) + 0xD800;
const codeUnit2 = ((codePoint - 65536) % 1024) + 0xDC00;
return String.fromCharCode(codeUnit1, codeUnit2);
}
const utf16EncodeAsStringWorker: (codePoint: number) => string = (String as any).fromCodePoint ? codePoint => String.fromCodePoint(codePoint) : utf16EncodeAsStringFallback;
/* @internal */
export function utf16EncodeAsString(codePoint: number) {
return utf16EncodeAsStringWorker(codePoint);
}
}

View File

@@ -253,15 +253,15 @@ namespace ts {
function decodeEntities(text: string): string {
return text.replace(/&((#((\d+)|x([\da-fA-F]+)))|(\w+));/g, (match, _all, _number, _digits, decimal, hex, word) => {
if (decimal) {
return String.fromCharCode(parseInt(decimal, 10));
return utf16EncodeAsString(parseInt(decimal, 10));
}
else if (hex) {
return String.fromCharCode(parseInt(hex, 16));
return utf16EncodeAsString(parseInt(hex, 16));
}
else {
const ch = entities.get(word);
// If this is not a valid entity, then just use `match` (replace it with itself, i.e. don't replace)
return ch ? String.fromCharCode(ch) : match;
return ch ? utf16EncodeAsString(ch) : match;
}
});
}