Emit non-ASCII characters with unicode escape sequences.

This commit is contained in:
Daniel Rosenwasser 2015-02-27 11:33:39 -08:00
parent bbf9579021
commit a81bf8cdad
13 changed files with 65 additions and 36 deletions

View File

@ -623,29 +623,6 @@ module ts {
"\u0085": "\\u0085" // nextLine
};
/**
* Based heavily on the abstract 'Quote'/ 'QuoteJSONString' operation from ECMA-262 (24.3.2.2),
* but augmented for a few select characters.
* Note that this doesn't actually wrap the input in double quotes.
*/
export function escapeString(s: string): string {
// Prioritize '"' and '\'
s = backslashOrDoubleQuote.test(s) ? s.replace(backslashOrDoubleQuote, getReplacement) : s;
s = escapedCharsRegExp.test(s) ? s.replace(escapedCharsRegExp, getReplacement) : s;
return s;
function getReplacement(c: string) {
return escapedCharsMap[c] || unicodeEscape(c);
}
function unicodeEscape(c: string): string {
var hexCharCode = c.charCodeAt(0).toString(16);
var paddedHexCode = ("0000" + hexCharCode).slice(-4);
return "\\u" + paddedHexCode;
}
}
export function getDefaultLibFileName(options: CompilerOptions): string {
return options.target === ScriptTarget.ES6 ? "lib.es6.d.ts" : "lib.d.ts";
}

View File

@ -2201,9 +2201,12 @@ module ts {
write(text);
}
}
function getTemplateLiteralAsStringLiteral(node: LiteralExpression): string {
return '"' + escapeString(node.text) + '"';
var result = escapeString(node.text);
result = replaceNonAsciiCharacters(result);
return '"' + result + '"';
}
function emitDownlevelRawTemplateLiteral(node: LiteralExpression) {

View File

@ -1120,7 +1120,7 @@ module ts {
newEndN = Math.max(newEnd2, newEnd2 + (newEnd1 - oldEnd2));
}
return createTextChangeRange(createTextSpanFromBounds(oldStartN, oldEndN), /*newLength: */newEndN - oldStartN);
return createTextChangeRange(createTextSpanFromBounds(oldStartN, oldEndN), /*newLength: */ newEndN - oldStartN);
}
// @internal
@ -1202,4 +1202,53 @@ module ts {
}
}
}
var backslashOrDoubleQuote = /[\"\\]/g;
var escapedCharsRegExp = /[\u0000-\u001f\t\v\f\b\r\n\u2028\u2029\u0085]/g;
var escapedCharsMap: Map<string> = {
"\0": "\\0",
"\t": "\\t",
"\v": "\\v",
"\f": "\\f",
"\b": "\\b",
"\r": "\\r",
"\n": "\\n",
"\\": "\\\\",
"\"": "\\\"",
"\u2028": "\\u2028", // lineSeparator
"\u2029": "\\u2029", // paragraphSeparator
"\u0085": "\\u0085" // nextLine
};
/**
* Based heavily on the abstract 'Quote'/ 'QuoteJSONString' operation from ECMA-262 (24.3.2.2),
* but augmented for a few select characters.
* Note that this doesn't actually wrap the input in double quotes.
*/
export function escapeString(s: string): string {
// Prioritize '"' and '\'
s = backslashOrDoubleQuote.test(s) ? s.replace(backslashOrDoubleQuote, getReplacement) : s;
s = escapedCharsRegExp.test(s) ? s.replace(escapedCharsRegExp, getReplacement) : s;
return s;
function getReplacement(c: string) {
return escapedCharsMap[c] || get16BitUnicodeEscapeSequence(c.charCodeAt(0));
}
}
function get16BitUnicodeEscapeSequence(charCode: number): string {
var hexCharCode = charCode.toString(16);
var paddedHexCode = ("0000" + hexCharCode).slice(-4);
return "\\u" + paddedHexCode;
}
var nonAsciiCharacters = /[^\u0000-\u007F]/g;
export function replaceNonAsciiCharacters(s: string): string {
// Replace non-ASCII characters with '\uNNNN' escapes if any exist.
// Otherwise just return the original string.
return nonAsciiCharacters.test(s) ?
s.replace(nonAsciiCharacters, c => get16BitUnicodeEscapeSequence(c.charCodeAt(0))) :
s;
}
}

View File

@ -11,5 +11,5 @@ function f() {
args[_i - 0] = arguments[_i];
}
}
(_a = ["'💩'", "'💩'"], _a.raw = ["'\\u{1f4a9}'", "'\\uD83D\\uDCA9'"], f(_a, " should be converted to "));
(_a = ["'\ud83d\udca9'", "'\ud83d\udca9'"], _a.raw = ["'\\u{1f4a9}'", "'\\uD83D\\uDCA9'"], f(_a, " should be converted to "));
var _a;

View File

@ -6,4 +6,4 @@
//// [templateStringWhitespaceEscapes2.js]
// <TAB>, <VT>, <FF>, <SP>, <NBSP>, <BOM>
"\t\v\f  ";
"\t\v\f \u00a0\ufeff";

View File

@ -8,4 +8,4 @@ var x = `\u{10FFFF}`;
//// [unicodeExtendedEscapesInTemplates06_ES5.js]
// ES6 Spec - 10.1.1 Static Semantics: UTF16Encoding (cp)
// 1. Assert: 0 ≤ cp ≤ 0x10FFFF.
var x = "􏿿";
var x = "\udbff\udfff";

View File

@ -10,4 +10,4 @@ var x = `\u{FFFF}`;
// ES6 Spec - 10.1.1 Static Semantics: UTF16Encoding (cp)
// 2. If cp ≤ 65535, return cp.
// (FFFF == 65535)
var x = "￿";
var x = "\uffff";

View File

@ -10,4 +10,4 @@ var x = `\u{10000}`;
// ES6 Spec - 10.1.1 Static Semantics: UTF16Encoding (cp)
// 2. If cp ≤ 65535, return cp.
// (10000 == 65536)
var x = "𐀀";
var x = "\ud800\udc00";

View File

@ -12,4 +12,4 @@ var x = `\u{D800}`;
// 2. Let cu1 be floor((cp 65536) / 1024) + 0xD800.
// Although we should just get back a single code point value of 0xD800,
// this is a useful edge-case test.
var x = "<EFBFBD>";
var x = "\ud800";

View File

@ -12,4 +12,4 @@ var x = `\u{DC00}`;
// 2. Let cu2 be ((cp 65536) modulo 1024) + 0xDC00.
// Although we should just get back a single code point value of 0xDC00,
// this is a useful edge-case test.
var x = "<EFBFBD>";
var x = "\udc00";

View File

@ -4,4 +4,4 @@ var x = `\u{DDDDD}`;
//// [unicodeExtendedEscapesInTemplates13_ES5.js]
var x = "󝷝";
var x = "\udb37\udddd";

View File

@ -4,4 +4,4 @@ var x = `\u{abcd}\u{ef12}\u{3456}\u{7890}`;
//// [unicodeExtendedEscapesInTemplates15_ES5.js]
var x = "ꯍ㑖碐";
var x = "\uabcd\uef12\u3456\u7890";

View File

@ -4,4 +4,4 @@ var x = `\u{ABCD}\u{EF12}\u{3456}\u{7890}`;
//// [unicodeExtendedEscapesInTemplates16_ES5.js]
var x = "ꯍ㑖碐";
var x = "\uabcd\uef12\u3456\u7890";