mirror of
https://github.com/microsoft/TypeScript.git
synced 2026-02-20 19:45:07 -06:00
Correct regular expression flags scanning for non-BMP characters (#58612)
Co-authored-by: Ron Buckton <ron.buckton@microsoft.com>
This commit is contained in:
parent
8d62e2f175
commit
dc1ffb1648
@ -282,16 +282,16 @@ const textToToken = new Map(Object.entries({
|
||||
"`": SyntaxKind.BacktickToken,
|
||||
}));
|
||||
|
||||
const charToRegExpFlag = new Map(Object.entries({
|
||||
d: RegularExpressionFlags.HasIndices,
|
||||
g: RegularExpressionFlags.Global,
|
||||
i: RegularExpressionFlags.IgnoreCase,
|
||||
m: RegularExpressionFlags.Multiline,
|
||||
s: RegularExpressionFlags.DotAll,
|
||||
u: RegularExpressionFlags.Unicode,
|
||||
v: RegularExpressionFlags.UnicodeSets,
|
||||
y: RegularExpressionFlags.Sticky,
|
||||
}));
|
||||
const charCodeToRegExpFlag = new Map<CharacterCodes, RegularExpressionFlags>([
|
||||
[CharacterCodes.d, RegularExpressionFlags.HasIndices],
|
||||
[CharacterCodes.g, RegularExpressionFlags.Global],
|
||||
[CharacterCodes.i, RegularExpressionFlags.IgnoreCase],
|
||||
[CharacterCodes.m, RegularExpressionFlags.Multiline],
|
||||
[CharacterCodes.s, RegularExpressionFlags.DotAll],
|
||||
[CharacterCodes.u, RegularExpressionFlags.Unicode],
|
||||
[CharacterCodes.v, RegularExpressionFlags.UnicodeSets],
|
||||
[CharacterCodes.y, RegularExpressionFlags.Sticky],
|
||||
]);
|
||||
|
||||
const regExpFlagToFirstAvailableLanguageVersion = new Map<RegularExpressionFlags, LanguageFeatureMinimumTarget>([
|
||||
[RegularExpressionFlags.HasIndices, LanguageFeatureMinimumTarget.RegularExpressionFlagsHasIndices],
|
||||
@ -394,8 +394,8 @@ function isUnicodeIdentifierPart(code: number, languageVersion: ScriptTarget | u
|
||||
lookupInUnicodeMap(code, unicodeES5IdentifierPart);
|
||||
}
|
||||
|
||||
function makeReverseMap(source: Map<string, number>): string[] {
|
||||
const result: string[] = [];
|
||||
function makeReverseMap<T>(source: Map<T, number>): T[] {
|
||||
const result: T[] = [];
|
||||
source.forEach((value, name) => {
|
||||
result[value] = name;
|
||||
});
|
||||
@ -416,16 +416,16 @@ export function stringToToken(s: string): SyntaxKind | undefined {
|
||||
return textToToken.get(s);
|
||||
}
|
||||
|
||||
const regExpFlagChars = makeReverseMap(charToRegExpFlag);
|
||||
const regExpFlagCharCodes = makeReverseMap(charCodeToRegExpFlag);
|
||||
|
||||
/** @internal */
|
||||
export function regularExpressionFlagToCharacter(f: RegularExpressionFlags): string | undefined {
|
||||
return regExpFlagChars[f];
|
||||
export function regularExpressionFlagToCharacterCode(f: RegularExpressionFlags): CharacterCodes | undefined {
|
||||
return regExpFlagCharCodes[f];
|
||||
}
|
||||
|
||||
/** @internal */
|
||||
export function characterToRegularExpressionFlag(c: string): RegularExpressionFlags | undefined {
|
||||
return charToRegExpFlag.get(c);
|
||||
export function characterCodeToRegularExpressionFlag(ch: CharacterCodes): RegularExpressionFlags | undefined {
|
||||
return charCodeToRegExpFlag.get(ch);
|
||||
}
|
||||
|
||||
/** @internal */
|
||||
@ -2558,27 +2558,28 @@ export function createScanner(languageVersion: ScriptTarget, skipTrivia: boolean
|
||||
pos++;
|
||||
let regExpFlags = RegularExpressionFlags.None;
|
||||
while (true) {
|
||||
const ch = charCodeChecked(pos);
|
||||
const ch = codePointChecked(pos);
|
||||
if (ch === CharacterCodes.EOF || !isIdentifierPart(ch, languageVersion)) {
|
||||
break;
|
||||
}
|
||||
const size = charSize(ch);
|
||||
if (reportErrors) {
|
||||
const flag = characterToRegularExpressionFlag(String.fromCharCode(ch));
|
||||
const flag = characterCodeToRegularExpressionFlag(ch);
|
||||
if (flag === undefined) {
|
||||
error(Diagnostics.Unknown_regular_expression_flag, pos, 1);
|
||||
error(Diagnostics.Unknown_regular_expression_flag, pos, size);
|
||||
}
|
||||
else if (regExpFlags & flag) {
|
||||
error(Diagnostics.Duplicate_regular_expression_flag, pos, 1);
|
||||
error(Diagnostics.Duplicate_regular_expression_flag, pos, size);
|
||||
}
|
||||
else if (((regExpFlags | flag) & RegularExpressionFlags.AnyUnicodeMode) === RegularExpressionFlags.AnyUnicodeMode) {
|
||||
error(Diagnostics.The_Unicode_u_flag_and_the_Unicode_Sets_v_flag_cannot_be_set_simultaneously, pos, 1);
|
||||
error(Diagnostics.The_Unicode_u_flag_and_the_Unicode_Sets_v_flag_cannot_be_set_simultaneously, pos, size);
|
||||
}
|
||||
else {
|
||||
regExpFlags |= flag;
|
||||
checkRegularExpressionFlagAvailable(flag, pos);
|
||||
checkRegularExpressionFlagAvailability(flag, size);
|
||||
}
|
||||
}
|
||||
pos++;
|
||||
pos += size;
|
||||
}
|
||||
if (reportErrors) {
|
||||
scanRange(startOfRegExpBody, endOfRegExpBody - startOfRegExpBody, () => {
|
||||
@ -2843,25 +2844,26 @@ export function createScanner(languageVersion: ScriptTarget, skipTrivia: boolean
|
||||
|
||||
function scanPatternModifiers(currFlags: RegularExpressionFlags): RegularExpressionFlags {
|
||||
while (true) {
|
||||
const ch = charCodeChecked(pos);
|
||||
const ch = codePointChecked(pos);
|
||||
if (ch === CharacterCodes.EOF || !isIdentifierPart(ch, languageVersion)) {
|
||||
break;
|
||||
}
|
||||
const flag = characterToRegularExpressionFlag(String.fromCharCode(ch));
|
||||
const size = charSize(ch);
|
||||
const flag = characterCodeToRegularExpressionFlag(ch);
|
||||
if (flag === undefined) {
|
||||
error(Diagnostics.Unknown_regular_expression_flag, pos, 1);
|
||||
error(Diagnostics.Unknown_regular_expression_flag, pos, size);
|
||||
}
|
||||
else if (currFlags & flag) {
|
||||
error(Diagnostics.Duplicate_regular_expression_flag, pos, 1);
|
||||
error(Diagnostics.Duplicate_regular_expression_flag, pos, size);
|
||||
}
|
||||
else if (!(flag & RegularExpressionFlags.Modifiers)) {
|
||||
error(Diagnostics.This_regular_expression_flag_cannot_be_toggled_within_a_subpattern, pos, 1);
|
||||
error(Diagnostics.This_regular_expression_flag_cannot_be_toggled_within_a_subpattern, pos, size);
|
||||
}
|
||||
else {
|
||||
currFlags |= flag;
|
||||
checkRegularExpressionFlagAvailable(flag, pos);
|
||||
checkRegularExpressionFlagAvailability(flag, size);
|
||||
}
|
||||
pos++;
|
||||
pos += size;
|
||||
}
|
||||
return currFlags;
|
||||
}
|
||||
@ -3583,10 +3585,10 @@ export function createScanner(languageVersion: ScriptTarget, skipTrivia: boolean
|
||||
});
|
||||
}
|
||||
|
||||
function checkRegularExpressionFlagAvailable(flag: RegularExpressionFlags, pos: number) {
|
||||
function checkRegularExpressionFlagAvailability(flag: RegularExpressionFlags, size: number) {
|
||||
const availableFrom = regExpFlagToFirstAvailableLanguageVersion.get(flag) as ScriptTarget | undefined;
|
||||
if (availableFrom && languageVersion < availableFrom) {
|
||||
error(Diagnostics.This_regular_expression_flag_is_only_available_when_targeting_0_or_later, pos, 1, getNameOfScriptTarget(availableFrom));
|
||||
error(Diagnostics.This_regular_expression_flag_is_only_available_when_targeting_0_or_later, pos, size, getNameOfScriptTarget(availableFrom));
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
@ -0,0 +1,29 @@
|
||||
regularExpressionWithNonBMPFlags.ts(7,23): error TS1499: Unknown regular expression flag.
|
||||
regularExpressionWithNonBMPFlags.ts(7,25): error TS1499: Unknown regular expression flag.
|
||||
regularExpressionWithNonBMPFlags.ts(7,28): error TS1499: Unknown regular expression flag.
|
||||
regularExpressionWithNonBMPFlags.ts(7,41): error TS1499: Unknown regular expression flag.
|
||||
regularExpressionWithNonBMPFlags.ts(7,43): error TS1499: Unknown regular expression flag.
|
||||
regularExpressionWithNonBMPFlags.ts(7,45): error TS1499: Unknown regular expression flag.
|
||||
|
||||
|
||||
==== regularExpressionWithNonBMPFlags.ts (6 errors) ====
|
||||
// The characters in the following regular expression are ASCII-lookalike characters found in Unicode, including:
|
||||
// - 𝘴 (U+1D634 Mathematical Sans-Serif Italic Small S)
|
||||
// - 𝘪 (U+1D62A Mathematical Sans-Serif Italic Small I)
|
||||
// - 𝘮 (U+1D62E Mathematical Sans-Serif Italic Small M)
|
||||
//
|
||||
// See https://en.wikipedia.org/wiki/Mathematical_Alphanumeric_Symbols
|
||||
const 𝘳𝘦𝘨𝘦𝘹 = /(?𝘴𝘪-𝘮:^𝘧𝘰𝘰.)/𝘨𝘮𝘶;
|
||||
~~
|
||||
!!! error TS1499: Unknown regular expression flag.
|
||||
~~
|
||||
!!! error TS1499: Unknown regular expression flag.
|
||||
~~
|
||||
!!! error TS1499: Unknown regular expression flag.
|
||||
~~
|
||||
!!! error TS1499: Unknown regular expression flag.
|
||||
~~
|
||||
!!! error TS1499: Unknown regular expression flag.
|
||||
~~
|
||||
!!! error TS1499: Unknown regular expression flag.
|
||||
|
||||
@ -0,0 +1,20 @@
|
||||
//// [tests/cases/compiler/regularExpressionWithNonBMPFlags.ts] ////
|
||||
|
||||
//// [regularExpressionWithNonBMPFlags.ts]
|
||||
// The characters in the following regular expression are ASCII-lookalike characters found in Unicode, including:
|
||||
// - 𝘴 (U+1D634 Mathematical Sans-Serif Italic Small S)
|
||||
// - 𝘪 (U+1D62A Mathematical Sans-Serif Italic Small I)
|
||||
// - 𝘮 (U+1D62E Mathematical Sans-Serif Italic Small M)
|
||||
//
|
||||
// See https://en.wikipedia.org/wiki/Mathematical_Alphanumeric_Symbols
|
||||
const 𝘳𝘦𝘨𝘦𝘹 = /(?𝘴𝘪-𝘮:^𝘧𝘰𝘰.)/𝘨𝘮𝘶;
|
||||
|
||||
|
||||
//// [regularExpressionWithNonBMPFlags.js]
|
||||
// The characters in the following regular expression are ASCII-lookalike characters found in Unicode, including:
|
||||
// - 𝘴 (U+1D634 Mathematical Sans-Serif Italic Small S)
|
||||
// - 𝘪 (U+1D62A Mathematical Sans-Serif Italic Small I)
|
||||
// - 𝘮 (U+1D62E Mathematical Sans-Serif Italic Small M)
|
||||
//
|
||||
// See https://en.wikipedia.org/wiki/Mathematical_Alphanumeric_Symbols
|
||||
const 𝘳𝘦𝘨𝘦𝘹 = /(?𝘴𝘪-𝘮:^𝘧𝘰𝘰.)/𝘨𝘮𝘶;
|
||||
@ -0,0 +1,12 @@
|
||||
//// [tests/cases/compiler/regularExpressionWithNonBMPFlags.ts] ////
|
||||
|
||||
=== regularExpressionWithNonBMPFlags.ts ===
|
||||
// The characters in the following regular expression are ASCII-lookalike characters found in Unicode, including:
|
||||
// - 𝘴 (U+1D634 Mathematical Sans-Serif Italic Small S)
|
||||
// - 𝘪 (U+1D62A Mathematical Sans-Serif Italic Small I)
|
||||
// - 𝘮 (U+1D62E Mathematical Sans-Serif Italic Small M)
|
||||
//
|
||||
// See https://en.wikipedia.org/wiki/Mathematical_Alphanumeric_Symbols
|
||||
const 𝘳𝘦𝘨𝘦𝘹 = /(?𝘴𝘪-𝘮:^𝘧𝘰𝘰.)/𝘨𝘮𝘶;
|
||||
>𝘳𝘦𝘨𝘦𝘹 : Symbol(𝘳𝘦𝘨𝘦𝘹, Decl(regularExpressionWithNonBMPFlags.ts, 6, 5))
|
||||
|
||||
@ -0,0 +1,15 @@
|
||||
//// [tests/cases/compiler/regularExpressionWithNonBMPFlags.ts] ////
|
||||
|
||||
=== regularExpressionWithNonBMPFlags.ts ===
|
||||
// The characters in the following regular expression are ASCII-lookalike characters found in Unicode, including:
|
||||
// - 𝘴 (U+1D634 Mathematical Sans-Serif Italic Small S)
|
||||
// - 𝘪 (U+1D62A Mathematical Sans-Serif Italic Small I)
|
||||
// - 𝘮 (U+1D62E Mathematical Sans-Serif Italic Small M)
|
||||
//
|
||||
// See https://en.wikipedia.org/wiki/Mathematical_Alphanumeric_Symbols
|
||||
const 𝘳𝘦𝘨𝘦𝘹 = /(?𝘴𝘪-𝘮:^𝘧𝘰𝘰.)/𝘨𝘮𝘶;
|
||||
>𝘳𝘦𝘨𝘦𝘹 : RegExp
|
||||
> : ^^^^^^
|
||||
>/(?𝘴𝘪-𝘮:^𝘧𝘰𝘰.)/𝘨𝘮𝘶 : RegExp
|
||||
> : ^^^^^^
|
||||
|
||||
9
tests/cases/compiler/regularExpressionWithNonBMPFlags.ts
Normal file
9
tests/cases/compiler/regularExpressionWithNonBMPFlags.ts
Normal file
@ -0,0 +1,9 @@
|
||||
// @target: esnext
|
||||
|
||||
// The characters in the following regular expression are ASCII-lookalike characters found in Unicode, including:
|
||||
// - 𝘴 (U+1D634 Mathematical Sans-Serif Italic Small S)
|
||||
// - 𝘪 (U+1D62A Mathematical Sans-Serif Italic Small I)
|
||||
// - 𝘮 (U+1D62E Mathematical Sans-Serif Italic Small M)
|
||||
//
|
||||
// See https://en.wikipedia.org/wiki/Mathematical_Alphanumeric_Symbols
|
||||
const 𝘳𝘦𝘨𝘦𝘹 = /(?𝘴𝘪-𝘮:^𝘧𝘰𝘰.)/𝘨𝘮𝘶;
|
||||
Loading…
x
Reference in New Issue
Block a user