From a81bf8cdadeb351bc048ff096b246a8ccd48c527 Mon Sep 17 00:00:00 2001 From: Daniel Rosenwasser Date: Fri, 27 Feb 2015 11:33:39 -0800 Subject: [PATCH] Emit non-ASCII characters with unicode escape sequences. --- src/compiler/core.ts | 23 --------- src/compiler/emitter.ts | 7 ++- src/compiler/utilities.ts | 51 ++++++++++++++++++- ...taggedTemplateStringsWithUnicodeEscapes.js | 2 +- .../templateStringWhitespaceEscapes2.js | 2 +- ...unicodeExtendedEscapesInTemplates06_ES5.js | 2 +- ...unicodeExtendedEscapesInTemplates08_ES5.js | 2 +- ...unicodeExtendedEscapesInTemplates09_ES5.js | 2 +- ...unicodeExtendedEscapesInTemplates10_ES5.js | 2 +- ...unicodeExtendedEscapesInTemplates11_ES5.js | 2 +- ...unicodeExtendedEscapesInTemplates13_ES5.js | 2 +- ...unicodeExtendedEscapesInTemplates15_ES5.js | 2 +- ...unicodeExtendedEscapesInTemplates16_ES5.js | 2 +- 13 files changed, 65 insertions(+), 36 deletions(-) diff --git a/src/compiler/core.ts b/src/compiler/core.ts index e27d3cb5207..2518b95d37e 100644 --- a/src/compiler/core.ts +++ b/src/compiler/core.ts @@ -623,29 +623,6 @@ module ts { "\u0085": "\\u0085" // nextLine }; - /** - * Based heavily on the abstract 'Quote'/ 'QuoteJSONString' operation from ECMA-262 (24.3.2.2), - * but augmented for a few select characters. - * Note that this doesn't actually wrap the input in double quotes. - */ - export function escapeString(s: string): string { - // Prioritize '"' and '\' - s = backslashOrDoubleQuote.test(s) ? s.replace(backslashOrDoubleQuote, getReplacement) : s; - s = escapedCharsRegExp.test(s) ? s.replace(escapedCharsRegExp, getReplacement) : s; - - return s; - - function getReplacement(c: string) { - return escapedCharsMap[c] || unicodeEscape(c); - } - - function unicodeEscape(c: string): string { - var hexCharCode = c.charCodeAt(0).toString(16); - var paddedHexCode = ("0000" + hexCharCode).slice(-4); - return "\\u" + paddedHexCode; - } - } - export function getDefaultLibFileName(options: CompilerOptions): string { return options.target === ScriptTarget.ES6 ? "lib.es6.d.ts" : "lib.d.ts"; } diff --git a/src/compiler/emitter.ts b/src/compiler/emitter.ts index e57255a776b..777968ee683 100644 --- a/src/compiler/emitter.ts +++ b/src/compiler/emitter.ts @@ -2201,9 +2201,12 @@ module ts { write(text); } } - + function getTemplateLiteralAsStringLiteral(node: LiteralExpression): string { - return '"' + escapeString(node.text) + '"'; + var result = escapeString(node.text); + result = replaceNonAsciiCharacters(result); + + return '"' + result + '"'; } function emitDownlevelRawTemplateLiteral(node: LiteralExpression) { diff --git a/src/compiler/utilities.ts b/src/compiler/utilities.ts index b53c0007248..51981db0163 100644 --- a/src/compiler/utilities.ts +++ b/src/compiler/utilities.ts @@ -1120,7 +1120,7 @@ module ts { newEndN = Math.max(newEnd2, newEnd2 + (newEnd1 - oldEnd2)); } - return createTextChangeRange(createTextSpanFromBounds(oldStartN, oldEndN), /*newLength: */newEndN - oldStartN); + return createTextChangeRange(createTextSpanFromBounds(oldStartN, oldEndN), /*newLength: */ newEndN - oldStartN); } // @internal @@ -1202,4 +1202,53 @@ module ts { } } } + + var backslashOrDoubleQuote = /[\"\\]/g; + var escapedCharsRegExp = /[\u0000-\u001f\t\v\f\b\r\n\u2028\u2029\u0085]/g; + var escapedCharsMap: Map = { + "\0": "\\0", + "\t": "\\t", + "\v": "\\v", + "\f": "\\f", + "\b": "\\b", + "\r": "\\r", + "\n": "\\n", + "\\": "\\\\", + "\"": "\\\"", + "\u2028": "\\u2028", // lineSeparator + "\u2029": "\\u2029", // paragraphSeparator + "\u0085": "\\u0085" // nextLine + }; + + /** + * Based heavily on the abstract 'Quote'/ 'QuoteJSONString' operation from ECMA-262 (24.3.2.2), + * but augmented for a few select characters. + * Note that this doesn't actually wrap the input in double quotes. + */ + export function escapeString(s: string): string { + // Prioritize '"' and '\' + s = backslashOrDoubleQuote.test(s) ? s.replace(backslashOrDoubleQuote, getReplacement) : s; + s = escapedCharsRegExp.test(s) ? s.replace(escapedCharsRegExp, getReplacement) : s; + + return s; + + function getReplacement(c: string) { + return escapedCharsMap[c] || get16BitUnicodeEscapeSequence(c.charCodeAt(0)); + } + } + + function get16BitUnicodeEscapeSequence(charCode: number): string { + var hexCharCode = charCode.toString(16); + var paddedHexCode = ("0000" + hexCharCode).slice(-4); + return "\\u" + paddedHexCode; + } + + var nonAsciiCharacters = /[^\u0000-\u007F]/g; + export function replaceNonAsciiCharacters(s: string): string { + // Replace non-ASCII characters with '\uNNNN' escapes if any exist. + // Otherwise just return the original string. + return nonAsciiCharacters.test(s) ? + s.replace(nonAsciiCharacters, c => get16BitUnicodeEscapeSequence(c.charCodeAt(0))) : + s; + } } \ No newline at end of file diff --git a/tests/baselines/reference/taggedTemplateStringsWithUnicodeEscapes.js b/tests/baselines/reference/taggedTemplateStringsWithUnicodeEscapes.js index e2f6b608f96..bf4501804a6 100644 --- a/tests/baselines/reference/taggedTemplateStringsWithUnicodeEscapes.js +++ b/tests/baselines/reference/taggedTemplateStringsWithUnicodeEscapes.js @@ -11,5 +11,5 @@ function f() { args[_i - 0] = arguments[_i]; } } -(_a = ["'💩'", "'💩'"], _a.raw = ["'\\u{1f4a9}'", "'\\uD83D\\uDCA9'"], f(_a, " should be converted to ")); +(_a = ["'\ud83d\udca9'", "'\ud83d\udca9'"], _a.raw = ["'\\u{1f4a9}'", "'\\uD83D\\uDCA9'"], f(_a, " should be converted to ")); var _a; diff --git a/tests/baselines/reference/templateStringWhitespaceEscapes2.js b/tests/baselines/reference/templateStringWhitespaceEscapes2.js index 144b1e78b8b..d836bb11b80 100644 --- a/tests/baselines/reference/templateStringWhitespaceEscapes2.js +++ b/tests/baselines/reference/templateStringWhitespaceEscapes2.js @@ -6,4 +6,4 @@ //// [templateStringWhitespaceEscapes2.js] // , , , , , -"\t\v\f  "; +"\t\v\f \u00a0\ufeff"; diff --git a/tests/baselines/reference/unicodeExtendedEscapesInTemplates06_ES5.js b/tests/baselines/reference/unicodeExtendedEscapesInTemplates06_ES5.js index cb470d5adf6..cf3eadfeb77 100644 --- a/tests/baselines/reference/unicodeExtendedEscapesInTemplates06_ES5.js +++ b/tests/baselines/reference/unicodeExtendedEscapesInTemplates06_ES5.js @@ -8,4 +8,4 @@ var x = `\u{10FFFF}`; //// [unicodeExtendedEscapesInTemplates06_ES5.js] // ES6 Spec - 10.1.1 Static Semantics: UTF16Encoding (cp) // 1. Assert: 0 ≤ cp ≤ 0x10FFFF. -var x = "􏿿"; +var x = "\udbff\udfff"; diff --git a/tests/baselines/reference/unicodeExtendedEscapesInTemplates08_ES5.js b/tests/baselines/reference/unicodeExtendedEscapesInTemplates08_ES5.js index 03445e734b1..5744dfb8e4f 100644 --- a/tests/baselines/reference/unicodeExtendedEscapesInTemplates08_ES5.js +++ b/tests/baselines/reference/unicodeExtendedEscapesInTemplates08_ES5.js @@ -10,4 +10,4 @@ var x = `\u{FFFF}`; // ES6 Spec - 10.1.1 Static Semantics: UTF16Encoding (cp) // 2. If cp ≤ 65535, return cp. // (FFFF == 65535) -var x = "￿"; +var x = "\uffff"; diff --git a/tests/baselines/reference/unicodeExtendedEscapesInTemplates09_ES5.js b/tests/baselines/reference/unicodeExtendedEscapesInTemplates09_ES5.js index 8c18768a229..d2bfe6a45f4 100644 --- a/tests/baselines/reference/unicodeExtendedEscapesInTemplates09_ES5.js +++ b/tests/baselines/reference/unicodeExtendedEscapesInTemplates09_ES5.js @@ -10,4 +10,4 @@ var x = `\u{10000}`; // ES6 Spec - 10.1.1 Static Semantics: UTF16Encoding (cp) // 2. If cp ≤ 65535, return cp. // (10000 == 65536) -var x = "𐀀"; +var x = "\ud800\udc00"; diff --git a/tests/baselines/reference/unicodeExtendedEscapesInTemplates10_ES5.js b/tests/baselines/reference/unicodeExtendedEscapesInTemplates10_ES5.js index 68693d6fd42..ed8d60f3b69 100644 --- a/tests/baselines/reference/unicodeExtendedEscapesInTemplates10_ES5.js +++ b/tests/baselines/reference/unicodeExtendedEscapesInTemplates10_ES5.js @@ -12,4 +12,4 @@ var x = `\u{D800}`; // 2. Let cu1 be floor((cp – 65536) / 1024) + 0xD800. // Although we should just get back a single code point value of 0xD800, // this is a useful edge-case test. -var x = "�"; +var x = "\ud800"; diff --git a/tests/baselines/reference/unicodeExtendedEscapesInTemplates11_ES5.js b/tests/baselines/reference/unicodeExtendedEscapesInTemplates11_ES5.js index 336030c54fc..f6bd2d76437 100644 --- a/tests/baselines/reference/unicodeExtendedEscapesInTemplates11_ES5.js +++ b/tests/baselines/reference/unicodeExtendedEscapesInTemplates11_ES5.js @@ -12,4 +12,4 @@ var x = `\u{DC00}`; // 2. Let cu2 be ((cp – 65536) modulo 1024) + 0xDC00. // Although we should just get back a single code point value of 0xDC00, // this is a useful edge-case test. -var x = "�"; +var x = "\udc00"; diff --git a/tests/baselines/reference/unicodeExtendedEscapesInTemplates13_ES5.js b/tests/baselines/reference/unicodeExtendedEscapesInTemplates13_ES5.js index f59110e062c..7de26d0ed22 100644 --- a/tests/baselines/reference/unicodeExtendedEscapesInTemplates13_ES5.js +++ b/tests/baselines/reference/unicodeExtendedEscapesInTemplates13_ES5.js @@ -4,4 +4,4 @@ var x = `\u{DDDDD}`; //// [unicodeExtendedEscapesInTemplates13_ES5.js] -var x = "󝷝"; +var x = "\udb37\udddd"; diff --git a/tests/baselines/reference/unicodeExtendedEscapesInTemplates15_ES5.js b/tests/baselines/reference/unicodeExtendedEscapesInTemplates15_ES5.js index 91b928af877..ec40b217870 100644 --- a/tests/baselines/reference/unicodeExtendedEscapesInTemplates15_ES5.js +++ b/tests/baselines/reference/unicodeExtendedEscapesInTemplates15_ES5.js @@ -4,4 +4,4 @@ var x = `\u{abcd}\u{ef12}\u{3456}\u{7890}`; //// [unicodeExtendedEscapesInTemplates15_ES5.js] -var x = "ꯍ㑖碐"; +var x = "\uabcd\uef12\u3456\u7890"; diff --git a/tests/baselines/reference/unicodeExtendedEscapesInTemplates16_ES5.js b/tests/baselines/reference/unicodeExtendedEscapesInTemplates16_ES5.js index 6333eb753b5..e3464ce05fb 100644 --- a/tests/baselines/reference/unicodeExtendedEscapesInTemplates16_ES5.js +++ b/tests/baselines/reference/unicodeExtendedEscapesInTemplates16_ES5.js @@ -4,4 +4,4 @@ var x = `\u{ABCD}\u{EF12}\u{3456}\u{7890}`; //// [unicodeExtendedEscapesInTemplates16_ES5.js] -var x = "ꯍ㑖碐"; +var x = "\uabcd\uef12\u3456\u7890";