Adjust isIdentifierText to skip multiple characters when a code point is multiple chars long (#32720)

* Adjust isIdentifierText to skip multiple characters when a code point is multiple chars long

* Add a few examples with mixed unicode characters

* for posterity, add some unicode cursive script characters

* Test some more planes more explicitly
This commit is contained in:
Wesley Wigham
2019-08-05 23:37:26 -07:00
committed by GitHub
parent 624d1cad93
commit 7adc175dfc
5 changed files with 278 additions and 10 deletions

View File

@@ -832,12 +832,13 @@ namespace ts {
/* @internal */
export function isIdentifierText(name: string, languageVersion: ScriptTarget | undefined): boolean {
if (!isIdentifierStart(name.charCodeAt(0), languageVersion)) {
let ch = codePointAt(name, 0);
if (!isIdentifierStart(ch, languageVersion)) {
return false;
}
for (let i = 1; i < name.length; i++) {
if (!isIdentifierPart(name.charCodeAt(i), languageVersion)) {
for (let i = charSize(ch); i < name.length; i += charSize(ch)) {
if (!isIdentifierPart(ch = codePointAt(name, i), languageVersion)) {
return false;
}
}
@@ -1870,13 +1871,6 @@ namespace ts {
}
}
function charSize(ch: number) {
if (ch > 0x10000) {
return 2;
}
return 1;
}
function reScanGreaterToken(): SyntaxKind {
if (token === SyntaxKind.GreaterThanToken) {
if (text.charCodeAt(pos) === CharacterCodes.greaterThan) {
@@ -2238,4 +2232,12 @@ namespace ts {
}
return first;
};
/* @internal */
function charSize(ch: number) {
if (ch > 0x10000) {
return 2;
}
return 1;
}
}

View File

@@ -2,9 +2,64 @@
const 𝑚 = 4;
const 𝑀 = 5;
console.log(𝑀 + 𝑚); // 9
// lower 8 bits look like 'a'
const = 6;
console.log( ** );
// lower 8 bits aren't a valid unicode character
const = 7;
console.log( ** );
// a mix, for good measure
const 𝑀 = 7;
console.log(𝑀 ** 𝑀);
const 𝑀 = 7;
console.log(𝑀 ** 𝑀);
const 𝑀 = 7;
console.log(𝑀 ** 𝑀);
const 𝓱𝓮𝓵𝓵𝓸 = "𝔀𝓸𝓻𝓵𝓭";
const = "ok"; // BMP
const 𓀸𓀹𓀺 = "ok"; // SMP
const 𡚭𡚮𡚯 = "ok"; // SIP
const 𡚭𓀺𝓮 = "ok";
const 𓀺𝓮𡚭 = "ok";
const 𝓮𡚭𓀺 = "ok";
const 𝓮𡚭𓀺 = "ok";
//// [extendedUnicodePlaneIdentifiers.js]
const 𝑚 = 4;
const 𝑀 = 5;
console.log(𝑀 + 𝑚); // 9
// lower 8 bits look like 'a'
const = 6;
console.log( ** );
// lower 8 bits aren't a valid unicode character
const = 7;
console.log( ** );
// a mix, for good measure
const 𝑀 = 7;
console.log(𝑀 ** 𝑀);
const 𝑀 = 7;
console.log(𝑀 ** 𝑀);
const 𝑀 = 7;
console.log(𝑀 ** 𝑀);
const 𝓱𝓮𝓵𝓵𝓸 = "𝔀𝓸𝓻𝓵𝓭";
const = "ok"; // BMP
const 𓀸𓀹𓀺 = "ok"; // SMP
const 𡚭𡚮𡚯 = "ok"; // SIP
const 𡚭𓀺𝓮 = "ok";
const 𓀺𝓮𡚭 = "ok";
const 𝓮𡚭𓀺 = "ok";
const 𝓮𡚭𓀺 = "ok";

View File

@@ -12,3 +12,80 @@ console.log(𝑀 + 𝑚); // 9
>𝑀 : Symbol(𝑀, Decl(extendedUnicodePlaneIdentifiers.ts, 1, 5))
>𝑚 : Symbol(𝑚, Decl(extendedUnicodePlaneIdentifiers.ts, 0, 5))
// lower 8 bits look like 'a'
const ၡ = 6;
>ၡ : Symbol(ၡ, Decl(extendedUnicodePlaneIdentifiers.ts, 5, 5))
console.log(ၡ ** ၡ);
>console.log : Symbol(Console.log, Decl(lib.dom.d.ts, --, --))
>console : Symbol(console, Decl(lib.dom.d.ts, --, --))
>log : Symbol(Console.log, Decl(lib.dom.d.ts, --, --))
>ၡ : Symbol(ၡ, Decl(extendedUnicodePlaneIdentifiers.ts, 5, 5))
>ၡ : Symbol(ၡ, Decl(extendedUnicodePlaneIdentifiers.ts, 5, 5))
// lower 8 bits aren't a valid unicode character
const ဒ = 7;
>ဒ : Symbol(ဒ, Decl(extendedUnicodePlaneIdentifiers.ts, 9, 5))
console.log(ဒ ** ဒ);
>console.log : Symbol(Console.log, Decl(lib.dom.d.ts, --, --))
>console : Symbol(console, Decl(lib.dom.d.ts, --, --))
>log : Symbol(Console.log, Decl(lib.dom.d.ts, --, --))
>ဒ : Symbol(ဒ, Decl(extendedUnicodePlaneIdentifiers.ts, 9, 5))
>ဒ : Symbol(ဒ, Decl(extendedUnicodePlaneIdentifiers.ts, 9, 5))
// a mix, for good measure
const ဒၡ𝑀 = 7;
>ဒၡ𝑀 : Symbol(ဒၡ𝑀, Decl(extendedUnicodePlaneIdentifiers.ts, 13, 5))
console.log(ဒၡ𝑀 ** ဒၡ𝑀);
>console.log : Symbol(Console.log, Decl(lib.dom.d.ts, --, --))
>console : Symbol(console, Decl(lib.dom.d.ts, --, --))
>log : Symbol(Console.log, Decl(lib.dom.d.ts, --, --))
>ဒၡ𝑀 : Symbol(ဒၡ𝑀, Decl(extendedUnicodePlaneIdentifiers.ts, 13, 5))
>ဒၡ𝑀 : Symbol(ဒၡ𝑀, Decl(extendedUnicodePlaneIdentifiers.ts, 13, 5))
const ၡ𝑀ဒ = 7;
>ၡ𝑀ဒ : Symbol(ၡ𝑀ဒ, Decl(extendedUnicodePlaneIdentifiers.ts, 16, 5))
console.log(ၡ𝑀ဒ ** ၡ𝑀ဒ);
>console.log : Symbol(Console.log, Decl(lib.dom.d.ts, --, --))
>console : Symbol(console, Decl(lib.dom.d.ts, --, --))
>log : Symbol(Console.log, Decl(lib.dom.d.ts, --, --))
>ၡ𝑀ဒ : Symbol(ၡ𝑀ဒ, Decl(extendedUnicodePlaneIdentifiers.ts, 16, 5))
>ၡ𝑀ဒ : Symbol(ၡ𝑀ဒ, Decl(extendedUnicodePlaneIdentifiers.ts, 16, 5))
const 𝑀ဒၡ = 7;
>𝑀ဒၡ : Symbol(𝑀ဒၡ, Decl(extendedUnicodePlaneIdentifiers.ts, 19, 5))
console.log(𝑀ဒၡ ** 𝑀ဒၡ);
>console.log : Symbol(Console.log, Decl(lib.dom.d.ts, --, --))
>console : Symbol(console, Decl(lib.dom.d.ts, --, --))
>log : Symbol(Console.log, Decl(lib.dom.d.ts, --, --))
>𝑀ဒၡ : Symbol(𝑀ဒၡ, Decl(extendedUnicodePlaneIdentifiers.ts, 19, 5))
>𝑀ဒၡ : Symbol(𝑀ဒၡ, Decl(extendedUnicodePlaneIdentifiers.ts, 19, 5))
const 𝓱𝓮𝓵𝓵𝓸 = "𝔀𝓸𝓻𝓵𝓭";
>𝓱𝓮𝓵𝓵𝓸 : Symbol(𝓱𝓮𝓵𝓵𝓸, Decl(extendedUnicodePlaneIdentifiers.ts, 22, 5))
const Ɐⱱ = "ok"; // BMP
>Ɐⱱ : Symbol(Ɐⱱ, Decl(extendedUnicodePlaneIdentifiers.ts, 24, 5))
const 𓀸𓀹𓀺 = "ok"; // SMP
>𓀸𓀹𓀺 : Symbol(𓀸𓀹𓀺, Decl(extendedUnicodePlaneIdentifiers.ts, 26, 5))
const 𡚭𡚮𡚯 = "ok"; // SIP
>𡚭𡚮𡚯 : Symbol(𡚭𡚮𡚯, Decl(extendedUnicodePlaneIdentifiers.ts, 28, 5))
const 𡚭𓀺ⱱ𝓮 = "ok";
>𡚭𓀺ⱱ𝓮 : Symbol(𡚭𓀺ⱱ𝓮, Decl(extendedUnicodePlaneIdentifiers.ts, 30, 5))
const 𓀺ⱱ𝓮𡚭 = "ok";
>𓀺ⱱ𝓮𡚭 : Symbol(𓀺ⱱ𝓮𡚭, Decl(extendedUnicodePlaneIdentifiers.ts, 32, 5))
const ⱱ𝓮𡚭𓀺 = "ok";
>ⱱ𝓮𡚭𓀺 : Symbol(ⱱ𝓮𡚭𓀺, Decl(extendedUnicodePlaneIdentifiers.ts, 34, 5))
const 𝓮𡚭𓀺ⱱ = "ok";
>𝓮𡚭𓀺ⱱ : Symbol(𝓮𡚭𓀺ⱱ, Decl(extendedUnicodePlaneIdentifiers.ts, 36, 5))

View File

@@ -16,3 +16,103 @@ console.log(𝑀 + 𝑚); // 9
>𝑀 : 5
>𝑚 : 4
// lower 8 bits look like 'a'
const ၡ = 6;
>ၡ : 6
>6 : 6
console.log(ၡ ** ၡ);
>console.log(ၡ ** ၡ) : void
>console.log : (message?: any, ...optionalParams: any[]) => void
>console : Console
>log : (message?: any, ...optionalParams: any[]) => void
>ၡ ** ၡ : number
>ၡ : 6
>ၡ : 6
// lower 8 bits aren't a valid unicode character
const ဒ = 7;
>ဒ : 7
>7 : 7
console.log(ဒ ** ဒ);
>console.log(ဒ ** ဒ) : void
>console.log : (message?: any, ...optionalParams: any[]) => void
>console : Console
>log : (message?: any, ...optionalParams: any[]) => void
>ဒ ** ဒ : number
>ဒ : 7
>ဒ : 7
// a mix, for good measure
const ဒၡ𝑀 = 7;
>ဒၡ𝑀 : 7
>7 : 7
console.log(ဒၡ𝑀 ** ဒၡ𝑀);
>console.log(ဒၡ𝑀 ** ဒၡ𝑀) : void
>console.log : (message?: any, ...optionalParams: any[]) => void
>console : Console
>log : (message?: any, ...optionalParams: any[]) => void
>ဒၡ𝑀 ** ဒၡ𝑀 : number
>ဒၡ𝑀 : 7
>ဒၡ𝑀 : 7
const ၡ𝑀ဒ = 7;
>ၡ𝑀ဒ : 7
>7 : 7
console.log(ၡ𝑀ဒ ** ၡ𝑀ဒ);
>console.log(ၡ𝑀ဒ ** ၡ𝑀ဒ) : void
>console.log : (message?: any, ...optionalParams: any[]) => void
>console : Console
>log : (message?: any, ...optionalParams: any[]) => void
>ၡ𝑀ဒ ** ၡ𝑀ဒ : number
>ၡ𝑀ဒ : 7
>ၡ𝑀ဒ : 7
const 𝑀ဒၡ = 7;
>𝑀ဒၡ : 7
>7 : 7
console.log(𝑀ဒၡ ** 𝑀ဒၡ);
>console.log(𝑀ဒၡ ** 𝑀ဒၡ) : void
>console.log : (message?: any, ...optionalParams: any[]) => void
>console : Console
>log : (message?: any, ...optionalParams: any[]) => void
>𝑀ဒၡ ** 𝑀ဒၡ : number
>𝑀ဒၡ : 7
>𝑀ဒၡ : 7
const 𝓱𝓮𝓵𝓵𝓸 = "𝔀𝓸𝓻𝓵𝓭";
>𝓱𝓮𝓵𝓵𝓸 : "𝔀𝓸𝓻𝓵𝓭"
>"𝔀𝓸𝓻𝓵𝓭" : "𝔀𝓸𝓻𝓵𝓭"
const Ɐⱱ = "ok"; // BMP
>Ɐⱱ : "ok"
>"ok" : "ok"
const 𓀸𓀹𓀺 = "ok"; // SMP
>𓀸𓀹𓀺 : "ok"
>"ok" : "ok"
const 𡚭𡚮𡚯 = "ok"; // SIP
>𡚭𡚮𡚯 : "ok"
>"ok" : "ok"
const 𡚭𓀺ⱱ𝓮 = "ok";
>𡚭𓀺ⱱ𝓮 : "ok"
>"ok" : "ok"
const 𓀺ⱱ𝓮𡚭 = "ok";
>𓀺ⱱ𝓮𡚭 : "ok"
>"ok" : "ok"
const ⱱ𝓮𡚭𓀺 = "ok";
>ⱱ𝓮𡚭𓀺 : "ok"
>"ok" : "ok"
const 𝓮𡚭𓀺ⱱ = "ok";
>𝓮𡚭𓀺ⱱ : "ok"
>"ok" : "ok"

View File

@@ -2,3 +2,37 @@
const 𝑚 = 4;
const 𝑀 = 5;
console.log(𝑀 + 𝑚); // 9
// lower 8 bits look like 'a'
const = 6;
console.log( ** );
// lower 8 bits aren't a valid unicode character
const = 7;
console.log( ** );
// a mix, for good measure
const 𝑀 = 7;
console.log(𝑀 ** 𝑀);
const 𝑀 = 7;
console.log(𝑀 ** 𝑀);
const 𝑀 = 7;
console.log(𝑀 ** 𝑀);
const 𝓱𝓮𝓵𝓵𝓸 = "𝔀𝓸𝓻𝓵𝓭";
const = "ok"; // BMP
const 𓀸𓀹𓀺 = "ok"; // SMP
const 𡚭𡚮𡚯 = "ok"; // SIP
const 𡚭𓀺𝓮 = "ok";
const 𓀺𝓮𡚭 = "ok";
const 𝓮𡚭𓀺 = "ok";
const 𝓮𡚭𓀺 = "ok";