diff --git a/src/compiler/scanner.ts b/src/compiler/scanner.ts index d633e4926b2..7eaca34a212 100644 --- a/src/compiler/scanner.ts +++ b/src/compiler/scanner.ts @@ -832,12 +832,13 @@ namespace ts { /* @internal */ export function isIdentifierText(name: string, languageVersion: ScriptTarget | undefined): boolean { - if (!isIdentifierStart(name.charCodeAt(0), languageVersion)) { + let ch = codePointAt(name, 0); + if (!isIdentifierStart(ch, languageVersion)) { return false; } - for (let i = 1; i < name.length; i++) { - if (!isIdentifierPart(name.charCodeAt(i), languageVersion)) { + for (let i = charSize(ch); i < name.length; i += charSize(ch)) { + if (!isIdentifierPart(ch = codePointAt(name, i), languageVersion)) { return false; } } @@ -1870,13 +1871,6 @@ namespace ts { } } - function charSize(ch: number) { - if (ch > 0x10000) { - return 2; - } - return 1; - } - function reScanGreaterToken(): SyntaxKind { if (token === SyntaxKind.GreaterThanToken) { if (text.charCodeAt(pos) === CharacterCodes.greaterThan) { @@ -2238,4 +2232,12 @@ namespace ts { } return first; }; + + /* @internal */ + function charSize(ch: number) { + if (ch > 0x10000) { + return 2; + } + return 1; + } } diff --git a/tests/baselines/reference/extendedUnicodePlaneIdentifiers.js b/tests/baselines/reference/extendedUnicodePlaneIdentifiers.js index 73271b8b671..7a5a5d9cc04 100644 --- a/tests/baselines/reference/extendedUnicodePlaneIdentifiers.js +++ b/tests/baselines/reference/extendedUnicodePlaneIdentifiers.js @@ -2,9 +2,64 @@ const 𝑚 = 4; const 𝑀 = 5; console.log(𝑀 + 𝑚); // 9 + +// lower 8 bits look like 'a' +const ၡ = 6; +console.log(ၡ ** ၡ); + +// lower 8 bits aren't a valid unicode character +const ဒ = 7; +console.log(ဒ ** ဒ); + +// a mix, for good measure +const ဒၡ𝑀 = 7; +console.log(ဒၡ𝑀 ** ဒၡ𝑀); + +const ၡ𝑀ဒ = 7; +console.log(ၡ𝑀ဒ ** ၡ𝑀ဒ); + +const 𝑀ဒၡ = 7; +console.log(𝑀ဒၡ ** 𝑀ဒၡ); + +const 𝓱𝓮𝓵𝓵𝓸 = "𝔀𝓸𝓻𝓵𝓭"; + +const Ɐⱱ = "ok"; // BMP + +const 𓀸𓀹𓀺 = "ok"; // SMP + +const 𡚭𡚮𡚯 = "ok"; // SIP + +const 𡚭𓀺ⱱ𝓮 = "ok"; + +const 𓀺ⱱ𝓮𡚭 = "ok"; + +const ⱱ𝓮𡚭𓀺 = "ok"; + +const 𝓮𡚭𓀺ⱱ = "ok"; //// [extendedUnicodePlaneIdentifiers.js] const 𝑚 = 4; const 𝑀 = 5; console.log(𝑀 + 𝑚); // 9 +// lower 8 bits look like 'a' +const ၡ = 6; +console.log(ၡ ** ၡ); +// lower 8 bits aren't a valid unicode character +const ဒ = 7; +console.log(ဒ ** ဒ); +// a mix, for good measure +const ဒၡ𝑀 = 7; +console.log(ဒၡ𝑀 ** ဒၡ𝑀); +const ၡ𝑀ဒ = 7; +console.log(ၡ𝑀ဒ ** ၡ𝑀ဒ); +const 𝑀ဒၡ = 7; +console.log(𝑀ဒၡ ** 𝑀ဒၡ); +const 𝓱𝓮𝓵𝓵𝓸 = "𝔀𝓸𝓻𝓵𝓭"; +const Ɐⱱ = "ok"; // BMP +const 𓀸𓀹𓀺 = "ok"; // SMP +const 𡚭𡚮𡚯 = "ok"; // SIP +const 𡚭𓀺ⱱ𝓮 = "ok"; +const 𓀺ⱱ𝓮𡚭 = "ok"; +const ⱱ𝓮𡚭𓀺 = "ok"; +const 𝓮𡚭𓀺ⱱ = "ok"; diff --git a/tests/baselines/reference/extendedUnicodePlaneIdentifiers.symbols b/tests/baselines/reference/extendedUnicodePlaneIdentifiers.symbols index 9d78bfaee05..170b82da7e9 100644 --- a/tests/baselines/reference/extendedUnicodePlaneIdentifiers.symbols +++ b/tests/baselines/reference/extendedUnicodePlaneIdentifiers.symbols @@ -12,3 +12,80 @@ console.log(𝑀 + 𝑚); // 9 >𝑀 : Symbol(𝑀, Decl(extendedUnicodePlaneIdentifiers.ts, 1, 5)) >𝑚 : Symbol(𝑚, Decl(extendedUnicodePlaneIdentifiers.ts, 0, 5)) +// lower 8 bits look like 'a' +const ၡ = 6; +>ၡ : Symbol(ၡ, Decl(extendedUnicodePlaneIdentifiers.ts, 5, 5)) + +console.log(ၡ ** ၡ); +>console.log : Symbol(Console.log, Decl(lib.dom.d.ts, --, --)) +>console : Symbol(console, Decl(lib.dom.d.ts, --, --)) +>log : Symbol(Console.log, Decl(lib.dom.d.ts, --, --)) +>ၡ : Symbol(ၡ, Decl(extendedUnicodePlaneIdentifiers.ts, 5, 5)) +>ၡ : Symbol(ၡ, Decl(extendedUnicodePlaneIdentifiers.ts, 5, 5)) + +// lower 8 bits aren't a valid unicode character +const ဒ = 7; +>ဒ : Symbol(ဒ, Decl(extendedUnicodePlaneIdentifiers.ts, 9, 5)) + +console.log(ဒ ** ဒ); +>console.log : Symbol(Console.log, Decl(lib.dom.d.ts, --, --)) +>console : Symbol(console, Decl(lib.dom.d.ts, --, --)) +>log : Symbol(Console.log, Decl(lib.dom.d.ts, --, --)) +>ဒ : Symbol(ဒ, Decl(extendedUnicodePlaneIdentifiers.ts, 9, 5)) +>ဒ : Symbol(ဒ, Decl(extendedUnicodePlaneIdentifiers.ts, 9, 5)) + +// a mix, for good measure +const ဒၡ𝑀 = 7; +>ဒၡ𝑀 : Symbol(ဒၡ𝑀, Decl(extendedUnicodePlaneIdentifiers.ts, 13, 5)) + +console.log(ဒၡ𝑀 ** ဒၡ𝑀); +>console.log : Symbol(Console.log, Decl(lib.dom.d.ts, --, --)) +>console : Symbol(console, Decl(lib.dom.d.ts, --, --)) +>log : Symbol(Console.log, Decl(lib.dom.d.ts, --, --)) +>ဒၡ𝑀 : Symbol(ဒၡ𝑀, Decl(extendedUnicodePlaneIdentifiers.ts, 13, 5)) +>ဒၡ𝑀 : Symbol(ဒၡ𝑀, Decl(extendedUnicodePlaneIdentifiers.ts, 13, 5)) + +const ၡ𝑀ဒ = 7; +>ၡ𝑀ဒ : Symbol(ၡ𝑀ဒ, Decl(extendedUnicodePlaneIdentifiers.ts, 16, 5)) + +console.log(ၡ𝑀ဒ ** ၡ𝑀ဒ); +>console.log : Symbol(Console.log, Decl(lib.dom.d.ts, --, --)) +>console : Symbol(console, Decl(lib.dom.d.ts, --, --)) +>log : Symbol(Console.log, Decl(lib.dom.d.ts, --, --)) +>ၡ𝑀ဒ : Symbol(ၡ𝑀ဒ, Decl(extendedUnicodePlaneIdentifiers.ts, 16, 5)) +>ၡ𝑀ဒ : Symbol(ၡ𝑀ဒ, Decl(extendedUnicodePlaneIdentifiers.ts, 16, 5)) + +const 𝑀ဒၡ = 7; +>𝑀ဒၡ : Symbol(𝑀ဒၡ, Decl(extendedUnicodePlaneIdentifiers.ts, 19, 5)) + +console.log(𝑀ဒၡ ** 𝑀ဒၡ); +>console.log : Symbol(Console.log, Decl(lib.dom.d.ts, --, --)) +>console : Symbol(console, Decl(lib.dom.d.ts, --, --)) +>log : Symbol(Console.log, Decl(lib.dom.d.ts, --, --)) +>𝑀ဒၡ : Symbol(𝑀ဒၡ, Decl(extendedUnicodePlaneIdentifiers.ts, 19, 5)) +>𝑀ဒၡ : Symbol(𝑀ဒၡ, Decl(extendedUnicodePlaneIdentifiers.ts, 19, 5)) + +const 𝓱𝓮𝓵𝓵𝓸 = "𝔀𝓸𝓻𝓵𝓭"; +>𝓱𝓮𝓵𝓵𝓸 : Symbol(𝓱𝓮𝓵𝓵𝓸, Decl(extendedUnicodePlaneIdentifiers.ts, 22, 5)) + +const Ɐⱱ = "ok"; // BMP +>Ɐⱱ : Symbol(Ɐⱱ, Decl(extendedUnicodePlaneIdentifiers.ts, 24, 5)) + +const 𓀸𓀹𓀺 = "ok"; // SMP +>𓀸𓀹𓀺 : Symbol(𓀸𓀹𓀺, Decl(extendedUnicodePlaneIdentifiers.ts, 26, 5)) + +const 𡚭𡚮𡚯 = "ok"; // SIP +>𡚭𡚮𡚯 : Symbol(𡚭𡚮𡚯, Decl(extendedUnicodePlaneIdentifiers.ts, 28, 5)) + +const 𡚭𓀺ⱱ𝓮 = "ok"; +>𡚭𓀺ⱱ𝓮 : Symbol(𡚭𓀺ⱱ𝓮, Decl(extendedUnicodePlaneIdentifiers.ts, 30, 5)) + +const 𓀺ⱱ𝓮𡚭 = "ok"; +>𓀺ⱱ𝓮𡚭 : Symbol(𓀺ⱱ𝓮𡚭, Decl(extendedUnicodePlaneIdentifiers.ts, 32, 5)) + +const ⱱ𝓮𡚭𓀺 = "ok"; +>ⱱ𝓮𡚭𓀺 : Symbol(ⱱ𝓮𡚭𓀺, Decl(extendedUnicodePlaneIdentifiers.ts, 34, 5)) + +const 𝓮𡚭𓀺ⱱ = "ok"; +>𝓮𡚭𓀺ⱱ : Symbol(𝓮𡚭𓀺ⱱ, Decl(extendedUnicodePlaneIdentifiers.ts, 36, 5)) + diff --git a/tests/baselines/reference/extendedUnicodePlaneIdentifiers.types b/tests/baselines/reference/extendedUnicodePlaneIdentifiers.types index 5de90b68b6d..4ee200dd5d3 100644 --- a/tests/baselines/reference/extendedUnicodePlaneIdentifiers.types +++ b/tests/baselines/reference/extendedUnicodePlaneIdentifiers.types @@ -16,3 +16,103 @@ console.log(𝑀 + 𝑚); // 9 >𝑀 : 5 >𝑚 : 4 +// lower 8 bits look like 'a' +const ၡ = 6; +>ၡ : 6 +>6 : 6 + +console.log(ၡ ** ၡ); +>console.log(ၡ ** ၡ) : void +>console.log : (message?: any, ...optionalParams: any[]) => void +>console : Console +>log : (message?: any, ...optionalParams: any[]) => void +>ၡ ** ၡ : number +>ၡ : 6 +>ၡ : 6 + +// lower 8 bits aren't a valid unicode character +const ဒ = 7; +>ဒ : 7 +>7 : 7 + +console.log(ဒ ** ဒ); +>console.log(ဒ ** ဒ) : void +>console.log : (message?: any, ...optionalParams: any[]) => void +>console : Console +>log : (message?: any, ...optionalParams: any[]) => void +>ဒ ** ဒ : number +>ဒ : 7 +>ဒ : 7 + +// a mix, for good measure +const ဒၡ𝑀 = 7; +>ဒၡ𝑀 : 7 +>7 : 7 + +console.log(ဒၡ𝑀 ** ဒၡ𝑀); +>console.log(ဒၡ𝑀 ** ဒၡ𝑀) : void +>console.log : (message?: any, ...optionalParams: any[]) => void +>console : Console +>log : (message?: any, ...optionalParams: any[]) => void +>ဒၡ𝑀 ** ဒၡ𝑀 : number +>ဒၡ𝑀 : 7 +>ဒၡ𝑀 : 7 + +const ၡ𝑀ဒ = 7; +>ၡ𝑀ဒ : 7 +>7 : 7 + +console.log(ၡ𝑀ဒ ** ၡ𝑀ဒ); +>console.log(ၡ𝑀ဒ ** ၡ𝑀ဒ) : void +>console.log : (message?: any, ...optionalParams: any[]) => void +>console : Console +>log : (message?: any, ...optionalParams: any[]) => void +>ၡ𝑀ဒ ** ၡ𝑀ဒ : number +>ၡ𝑀ဒ : 7 +>ၡ𝑀ဒ : 7 + +const 𝑀ဒၡ = 7; +>𝑀ဒၡ : 7 +>7 : 7 + +console.log(𝑀ဒၡ ** 𝑀ဒၡ); +>console.log(𝑀ဒၡ ** 𝑀ဒၡ) : void +>console.log : (message?: any, ...optionalParams: any[]) => void +>console : Console +>log : (message?: any, ...optionalParams: any[]) => void +>𝑀ဒၡ ** 𝑀ဒၡ : number +>𝑀ဒၡ : 7 +>𝑀ဒၡ : 7 + +const 𝓱𝓮𝓵𝓵𝓸 = "𝔀𝓸𝓻𝓵𝓭"; +>𝓱𝓮𝓵𝓵𝓸 : "𝔀𝓸𝓻𝓵𝓭" +>"𝔀𝓸𝓻𝓵𝓭" : "𝔀𝓸𝓻𝓵𝓭" + +const Ɐⱱ = "ok"; // BMP +>Ɐⱱ : "ok" +>"ok" : "ok" + +const 𓀸𓀹𓀺 = "ok"; // SMP +>𓀸𓀹𓀺 : "ok" +>"ok" : "ok" + +const 𡚭𡚮𡚯 = "ok"; // SIP +>𡚭𡚮𡚯 : "ok" +>"ok" : "ok" + +const 𡚭𓀺ⱱ𝓮 = "ok"; +>𡚭𓀺ⱱ𝓮 : "ok" +>"ok" : "ok" + +const 𓀺ⱱ𝓮𡚭 = "ok"; +>𓀺ⱱ𝓮𡚭 : "ok" +>"ok" : "ok" + +const ⱱ𝓮𡚭𓀺 = "ok"; +>ⱱ𝓮𡚭𓀺 : "ok" +>"ok" : "ok" + +const 𝓮𡚭𓀺ⱱ = "ok"; +>𝓮𡚭𓀺ⱱ : "ok" +>"ok" : "ok" + diff --git a/tests/cases/compiler/extendedUnicodePlaneIdentifiers.ts b/tests/cases/compiler/extendedUnicodePlaneIdentifiers.ts index f207571549c..9210ee3b7c5 100644 --- a/tests/cases/compiler/extendedUnicodePlaneIdentifiers.ts +++ b/tests/cases/compiler/extendedUnicodePlaneIdentifiers.ts @@ -2,3 +2,37 @@ const 𝑚 = 4; const 𝑀 = 5; console.log(𝑀 + 𝑚); // 9 + +// lower 8 bits look like 'a' +const ၡ = 6; +console.log(ၡ ** ၡ); + +// lower 8 bits aren't a valid unicode character +const ဒ = 7; +console.log(ဒ ** ဒ); + +// a mix, for good measure +const ဒၡ𝑀 = 7; +console.log(ဒၡ𝑀 ** ဒၡ𝑀); + +const ၡ𝑀ဒ = 7; +console.log(ၡ𝑀ဒ ** ၡ𝑀ဒ); + +const 𝑀ဒၡ = 7; +console.log(𝑀ဒၡ ** 𝑀ဒၡ); + +const 𝓱𝓮𝓵𝓵𝓸 = "𝔀𝓸𝓻𝓵𝓭"; + +const Ɐⱱ = "ok"; // BMP + +const 𓀸𓀹𓀺 = "ok"; // SMP + +const 𡚭𡚮𡚯 = "ok"; // SIP + +const 𡚭𓀺ⱱ𝓮 = "ok"; + +const 𓀺ⱱ𝓮𡚭 = "ok"; + +const ⱱ𝓮𡚭𓀺 = "ok"; + +const 𝓮𡚭𓀺ⱱ = "ok";