mirror of
https://github.com/microsoft/TypeScript.git
synced 2026-03-15 14:05:47 -05:00
Adjust isIdentifierText to skip multiple characters when a code point is multiple chars long (#32720)
* Adjust isIdentifierText to skip multiple characters when a code point is multiple chars long * Add a few examples with mixed unicode characters * for posterity, add some unicode cursive script characters * Test some more planes more explicitly
This commit is contained in:
@@ -832,12 +832,13 @@ namespace ts {
|
||||
|
||||
/* @internal */
|
||||
export function isIdentifierText(name: string, languageVersion: ScriptTarget | undefined): boolean {
|
||||
if (!isIdentifierStart(name.charCodeAt(0), languageVersion)) {
|
||||
let ch = codePointAt(name, 0);
|
||||
if (!isIdentifierStart(ch, languageVersion)) {
|
||||
return false;
|
||||
}
|
||||
|
||||
for (let i = 1; i < name.length; i++) {
|
||||
if (!isIdentifierPart(name.charCodeAt(i), languageVersion)) {
|
||||
for (let i = charSize(ch); i < name.length; i += charSize(ch)) {
|
||||
if (!isIdentifierPart(ch = codePointAt(name, i), languageVersion)) {
|
||||
return false;
|
||||
}
|
||||
}
|
||||
@@ -1870,13 +1871,6 @@ namespace ts {
|
||||
}
|
||||
}
|
||||
|
||||
function charSize(ch: number) {
|
||||
if (ch > 0x10000) {
|
||||
return 2;
|
||||
}
|
||||
return 1;
|
||||
}
|
||||
|
||||
function reScanGreaterToken(): SyntaxKind {
|
||||
if (token === SyntaxKind.GreaterThanToken) {
|
||||
if (text.charCodeAt(pos) === CharacterCodes.greaterThan) {
|
||||
@@ -2238,4 +2232,12 @@ namespace ts {
|
||||
}
|
||||
return first;
|
||||
};
|
||||
|
||||
/* @internal */
|
||||
function charSize(ch: number) {
|
||||
if (ch > 0x10000) {
|
||||
return 2;
|
||||
}
|
||||
return 1;
|
||||
}
|
||||
}
|
||||
|
||||
@@ -2,9 +2,64 @@
|
||||
const 𝑚 = 4;
|
||||
const 𝑀 = 5;
|
||||
console.log(𝑀 + 𝑚); // 9
|
||||
|
||||
// lower 8 bits look like 'a'
|
||||
const ၡ = 6;
|
||||
console.log(ၡ ** ၡ);
|
||||
|
||||
// lower 8 bits aren't a valid unicode character
|
||||
const ဒ = 7;
|
||||
console.log(ဒ ** ဒ);
|
||||
|
||||
// a mix, for good measure
|
||||
const ဒၡ𝑀 = 7;
|
||||
console.log(ဒၡ𝑀 ** ဒၡ𝑀);
|
||||
|
||||
const ၡ𝑀ဒ = 7;
|
||||
console.log(ၡ𝑀ဒ ** ၡ𝑀ဒ);
|
||||
|
||||
const 𝑀ဒၡ = 7;
|
||||
console.log(𝑀ဒၡ ** 𝑀ဒၡ);
|
||||
|
||||
const 𝓱𝓮𝓵𝓵𝓸 = "𝔀𝓸𝓻𝓵𝓭";
|
||||
|
||||
const Ɐⱱ = "ok"; // BMP
|
||||
|
||||
const 𓀸𓀹𓀺 = "ok"; // SMP
|
||||
|
||||
const 𡚭𡚮𡚯 = "ok"; // SIP
|
||||
|
||||
const 𡚭𓀺ⱱ𝓮 = "ok";
|
||||
|
||||
const 𓀺ⱱ𝓮𡚭 = "ok";
|
||||
|
||||
const ⱱ𝓮𡚭𓀺 = "ok";
|
||||
|
||||
const 𝓮𡚭𓀺ⱱ = "ok";
|
||||
|
||||
|
||||
//// [extendedUnicodePlaneIdentifiers.js]
|
||||
const 𝑚 = 4;
|
||||
const 𝑀 = 5;
|
||||
console.log(𝑀 + 𝑚); // 9
|
||||
// lower 8 bits look like 'a'
|
||||
const ၡ = 6;
|
||||
console.log(ၡ ** ၡ);
|
||||
// lower 8 bits aren't a valid unicode character
|
||||
const ဒ = 7;
|
||||
console.log(ဒ ** ဒ);
|
||||
// a mix, for good measure
|
||||
const ဒၡ𝑀 = 7;
|
||||
console.log(ဒၡ𝑀 ** ဒၡ𝑀);
|
||||
const ၡ𝑀ဒ = 7;
|
||||
console.log(ၡ𝑀ဒ ** ၡ𝑀ဒ);
|
||||
const 𝑀ဒၡ = 7;
|
||||
console.log(𝑀ဒၡ ** 𝑀ဒၡ);
|
||||
const 𝓱𝓮𝓵𝓵𝓸 = "𝔀𝓸𝓻𝓵𝓭";
|
||||
const Ɐⱱ = "ok"; // BMP
|
||||
const 𓀸𓀹𓀺 = "ok"; // SMP
|
||||
const 𡚭𡚮𡚯 = "ok"; // SIP
|
||||
const 𡚭𓀺ⱱ𝓮 = "ok";
|
||||
const 𓀺ⱱ𝓮𡚭 = "ok";
|
||||
const ⱱ𝓮𡚭𓀺 = "ok";
|
||||
const 𝓮𡚭𓀺ⱱ = "ok";
|
||||
|
||||
@@ -12,3 +12,80 @@ console.log(𝑀 + 𝑚); // 9
|
||||
>𝑀 : Symbol(𝑀, Decl(extendedUnicodePlaneIdentifiers.ts, 1, 5))
|
||||
>𝑚 : Symbol(𝑚, Decl(extendedUnicodePlaneIdentifiers.ts, 0, 5))
|
||||
|
||||
// lower 8 bits look like 'a'
|
||||
const ၡ = 6;
|
||||
>ၡ : Symbol(ၡ, Decl(extendedUnicodePlaneIdentifiers.ts, 5, 5))
|
||||
|
||||
console.log(ၡ ** ၡ);
|
||||
>console.log : Symbol(Console.log, Decl(lib.dom.d.ts, --, --))
|
||||
>console : Symbol(console, Decl(lib.dom.d.ts, --, --))
|
||||
>log : Symbol(Console.log, Decl(lib.dom.d.ts, --, --))
|
||||
>ၡ : Symbol(ၡ, Decl(extendedUnicodePlaneIdentifiers.ts, 5, 5))
|
||||
>ၡ : Symbol(ၡ, Decl(extendedUnicodePlaneIdentifiers.ts, 5, 5))
|
||||
|
||||
// lower 8 bits aren't a valid unicode character
|
||||
const ဒ = 7;
|
||||
>ဒ : Symbol(ဒ, Decl(extendedUnicodePlaneIdentifiers.ts, 9, 5))
|
||||
|
||||
console.log(ဒ ** ဒ);
|
||||
>console.log : Symbol(Console.log, Decl(lib.dom.d.ts, --, --))
|
||||
>console : Symbol(console, Decl(lib.dom.d.ts, --, --))
|
||||
>log : Symbol(Console.log, Decl(lib.dom.d.ts, --, --))
|
||||
>ဒ : Symbol(ဒ, Decl(extendedUnicodePlaneIdentifiers.ts, 9, 5))
|
||||
>ဒ : Symbol(ဒ, Decl(extendedUnicodePlaneIdentifiers.ts, 9, 5))
|
||||
|
||||
// a mix, for good measure
|
||||
const ဒၡ𝑀 = 7;
|
||||
>ဒၡ𝑀 : Symbol(ဒၡ𝑀, Decl(extendedUnicodePlaneIdentifiers.ts, 13, 5))
|
||||
|
||||
console.log(ဒၡ𝑀 ** ဒၡ𝑀);
|
||||
>console.log : Symbol(Console.log, Decl(lib.dom.d.ts, --, --))
|
||||
>console : Symbol(console, Decl(lib.dom.d.ts, --, --))
|
||||
>log : Symbol(Console.log, Decl(lib.dom.d.ts, --, --))
|
||||
>ဒၡ𝑀 : Symbol(ဒၡ𝑀, Decl(extendedUnicodePlaneIdentifiers.ts, 13, 5))
|
||||
>ဒၡ𝑀 : Symbol(ဒၡ𝑀, Decl(extendedUnicodePlaneIdentifiers.ts, 13, 5))
|
||||
|
||||
const ၡ𝑀ဒ = 7;
|
||||
>ၡ𝑀ဒ : Symbol(ၡ𝑀ဒ, Decl(extendedUnicodePlaneIdentifiers.ts, 16, 5))
|
||||
|
||||
console.log(ၡ𝑀ဒ ** ၡ𝑀ဒ);
|
||||
>console.log : Symbol(Console.log, Decl(lib.dom.d.ts, --, --))
|
||||
>console : Symbol(console, Decl(lib.dom.d.ts, --, --))
|
||||
>log : Symbol(Console.log, Decl(lib.dom.d.ts, --, --))
|
||||
>ၡ𝑀ဒ : Symbol(ၡ𝑀ဒ, Decl(extendedUnicodePlaneIdentifiers.ts, 16, 5))
|
||||
>ၡ𝑀ဒ : Symbol(ၡ𝑀ဒ, Decl(extendedUnicodePlaneIdentifiers.ts, 16, 5))
|
||||
|
||||
const 𝑀ဒၡ = 7;
|
||||
>𝑀ဒၡ : Symbol(𝑀ဒၡ, Decl(extendedUnicodePlaneIdentifiers.ts, 19, 5))
|
||||
|
||||
console.log(𝑀ဒၡ ** 𝑀ဒၡ);
|
||||
>console.log : Symbol(Console.log, Decl(lib.dom.d.ts, --, --))
|
||||
>console : Symbol(console, Decl(lib.dom.d.ts, --, --))
|
||||
>log : Symbol(Console.log, Decl(lib.dom.d.ts, --, --))
|
||||
>𝑀ဒၡ : Symbol(𝑀ဒၡ, Decl(extendedUnicodePlaneIdentifiers.ts, 19, 5))
|
||||
>𝑀ဒၡ : Symbol(𝑀ဒၡ, Decl(extendedUnicodePlaneIdentifiers.ts, 19, 5))
|
||||
|
||||
const 𝓱𝓮𝓵𝓵𝓸 = "𝔀𝓸𝓻𝓵𝓭";
|
||||
>𝓱𝓮𝓵𝓵𝓸 : Symbol(𝓱𝓮𝓵𝓵𝓸, Decl(extendedUnicodePlaneIdentifiers.ts, 22, 5))
|
||||
|
||||
const Ɐⱱ = "ok"; // BMP
|
||||
>Ɐⱱ : Symbol(Ɐⱱ, Decl(extendedUnicodePlaneIdentifiers.ts, 24, 5))
|
||||
|
||||
const 𓀸𓀹𓀺 = "ok"; // SMP
|
||||
>𓀸𓀹𓀺 : Symbol(𓀸𓀹𓀺, Decl(extendedUnicodePlaneIdentifiers.ts, 26, 5))
|
||||
|
||||
const 𡚭𡚮𡚯 = "ok"; // SIP
|
||||
>𡚭𡚮𡚯 : Symbol(𡚭𡚮𡚯, Decl(extendedUnicodePlaneIdentifiers.ts, 28, 5))
|
||||
|
||||
const 𡚭𓀺ⱱ𝓮 = "ok";
|
||||
>𡚭𓀺ⱱ𝓮 : Symbol(𡚭𓀺ⱱ𝓮, Decl(extendedUnicodePlaneIdentifiers.ts, 30, 5))
|
||||
|
||||
const 𓀺ⱱ𝓮𡚭 = "ok";
|
||||
>𓀺ⱱ𝓮𡚭 : Symbol(𓀺ⱱ𝓮𡚭, Decl(extendedUnicodePlaneIdentifiers.ts, 32, 5))
|
||||
|
||||
const ⱱ𝓮𡚭𓀺 = "ok";
|
||||
>ⱱ𝓮𡚭𓀺 : Symbol(ⱱ𝓮𡚭𓀺, Decl(extendedUnicodePlaneIdentifiers.ts, 34, 5))
|
||||
|
||||
const 𝓮𡚭𓀺ⱱ = "ok";
|
||||
>𝓮𡚭𓀺ⱱ : Symbol(𝓮𡚭𓀺ⱱ, Decl(extendedUnicodePlaneIdentifiers.ts, 36, 5))
|
||||
|
||||
|
||||
@@ -16,3 +16,103 @@ console.log(𝑀 + 𝑚); // 9
|
||||
>𝑀 : 5
|
||||
>𝑚 : 4
|
||||
|
||||
// lower 8 bits look like 'a'
|
||||
const ၡ = 6;
|
||||
>ၡ : 6
|
||||
>6 : 6
|
||||
|
||||
console.log(ၡ ** ၡ);
|
||||
>console.log(ၡ ** ၡ) : void
|
||||
>console.log : (message?: any, ...optionalParams: any[]) => void
|
||||
>console : Console
|
||||
>log : (message?: any, ...optionalParams: any[]) => void
|
||||
>ၡ ** ၡ : number
|
||||
>ၡ : 6
|
||||
>ၡ : 6
|
||||
|
||||
// lower 8 bits aren't a valid unicode character
|
||||
const ဒ = 7;
|
||||
>ဒ : 7
|
||||
>7 : 7
|
||||
|
||||
console.log(ဒ ** ဒ);
|
||||
>console.log(ဒ ** ဒ) : void
|
||||
>console.log : (message?: any, ...optionalParams: any[]) => void
|
||||
>console : Console
|
||||
>log : (message?: any, ...optionalParams: any[]) => void
|
||||
>ဒ ** ဒ : number
|
||||
>ဒ : 7
|
||||
>ဒ : 7
|
||||
|
||||
// a mix, for good measure
|
||||
const ဒၡ𝑀 = 7;
|
||||
>ဒၡ𝑀 : 7
|
||||
>7 : 7
|
||||
|
||||
console.log(ဒၡ𝑀 ** ဒၡ𝑀);
|
||||
>console.log(ဒၡ𝑀 ** ဒၡ𝑀) : void
|
||||
>console.log : (message?: any, ...optionalParams: any[]) => void
|
||||
>console : Console
|
||||
>log : (message?: any, ...optionalParams: any[]) => void
|
||||
>ဒၡ𝑀 ** ဒၡ𝑀 : number
|
||||
>ဒၡ𝑀 : 7
|
||||
>ဒၡ𝑀 : 7
|
||||
|
||||
const ၡ𝑀ဒ = 7;
|
||||
>ၡ𝑀ဒ : 7
|
||||
>7 : 7
|
||||
|
||||
console.log(ၡ𝑀ဒ ** ၡ𝑀ဒ);
|
||||
>console.log(ၡ𝑀ဒ ** ၡ𝑀ဒ) : void
|
||||
>console.log : (message?: any, ...optionalParams: any[]) => void
|
||||
>console : Console
|
||||
>log : (message?: any, ...optionalParams: any[]) => void
|
||||
>ၡ𝑀ဒ ** ၡ𝑀ဒ : number
|
||||
>ၡ𝑀ဒ : 7
|
||||
>ၡ𝑀ဒ : 7
|
||||
|
||||
const 𝑀ဒၡ = 7;
|
||||
>𝑀ဒၡ : 7
|
||||
>7 : 7
|
||||
|
||||
console.log(𝑀ဒၡ ** 𝑀ဒၡ);
|
||||
>console.log(𝑀ဒၡ ** 𝑀ဒၡ) : void
|
||||
>console.log : (message?: any, ...optionalParams: any[]) => void
|
||||
>console : Console
|
||||
>log : (message?: any, ...optionalParams: any[]) => void
|
||||
>𝑀ဒၡ ** 𝑀ဒၡ : number
|
||||
>𝑀ဒၡ : 7
|
||||
>𝑀ဒၡ : 7
|
||||
|
||||
const 𝓱𝓮𝓵𝓵𝓸 = "𝔀𝓸𝓻𝓵𝓭";
|
||||
>𝓱𝓮𝓵𝓵𝓸 : "𝔀𝓸𝓻𝓵𝓭"
|
||||
>"𝔀𝓸𝓻𝓵𝓭" : "𝔀𝓸𝓻𝓵𝓭"
|
||||
|
||||
const Ɐⱱ = "ok"; // BMP
|
||||
>Ɐⱱ : "ok"
|
||||
>"ok" : "ok"
|
||||
|
||||
const 𓀸𓀹𓀺 = "ok"; // SMP
|
||||
>𓀸𓀹𓀺 : "ok"
|
||||
>"ok" : "ok"
|
||||
|
||||
const 𡚭𡚮𡚯 = "ok"; // SIP
|
||||
>𡚭𡚮𡚯 : "ok"
|
||||
>"ok" : "ok"
|
||||
|
||||
const 𡚭𓀺ⱱ𝓮 = "ok";
|
||||
>𡚭𓀺ⱱ𝓮 : "ok"
|
||||
>"ok" : "ok"
|
||||
|
||||
const 𓀺ⱱ𝓮𡚭 = "ok";
|
||||
>𓀺ⱱ𝓮𡚭 : "ok"
|
||||
>"ok" : "ok"
|
||||
|
||||
const ⱱ𝓮𡚭𓀺 = "ok";
|
||||
>ⱱ𝓮𡚭𓀺 : "ok"
|
||||
>"ok" : "ok"
|
||||
|
||||
const 𝓮𡚭𓀺ⱱ = "ok";
|
||||
>𝓮𡚭𓀺ⱱ : "ok"
|
||||
>"ok" : "ok"
|
||||
|
||||
|
||||
@@ -2,3 +2,37 @@
|
||||
const 𝑚 = 4;
|
||||
const 𝑀 = 5;
|
||||
console.log(𝑀 + 𝑚); // 9
|
||||
|
||||
// lower 8 bits look like 'a'
|
||||
const ၡ = 6;
|
||||
console.log(ၡ ** ၡ);
|
||||
|
||||
// lower 8 bits aren't a valid unicode character
|
||||
const ဒ = 7;
|
||||
console.log(ဒ ** ဒ);
|
||||
|
||||
// a mix, for good measure
|
||||
const ဒၡ𝑀 = 7;
|
||||
console.log(ဒၡ𝑀 ** ဒၡ𝑀);
|
||||
|
||||
const ၡ𝑀ဒ = 7;
|
||||
console.log(ၡ𝑀ဒ ** ၡ𝑀ဒ);
|
||||
|
||||
const 𝑀ဒၡ = 7;
|
||||
console.log(𝑀ဒၡ ** 𝑀ဒၡ);
|
||||
|
||||
const 𝓱𝓮𝓵𝓵𝓸 = "𝔀𝓸𝓻𝓵𝓭";
|
||||
|
||||
const Ɐⱱ = "ok"; // BMP
|
||||
|
||||
const 𓀸𓀹𓀺 = "ok"; // SMP
|
||||
|
||||
const 𡚭𡚮𡚯 = "ok"; // SIP
|
||||
|
||||
const 𡚭𓀺ⱱ𝓮 = "ok";
|
||||
|
||||
const 𓀺ⱱ𝓮𡚭 = "ok";
|
||||
|
||||
const ⱱ𝓮𡚭𓀺 = "ok";
|
||||
|
||||
const 𝓮𡚭𓀺ⱱ = "ok";
|
||||
|
||||
Reference in New Issue
Block a user