Fix: False Positive "Range out of order in character class" in Regular Expressions in Unicode Modes (#58982)

This commit is contained in:
graphemecluster 2024-07-18 03:29:23 +08:00 committed by GitHub
parent 369f2b0fb8
commit e13ff2f26f
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
6 changed files with 203 additions and 2 deletions

View File

@ -1640,7 +1640,7 @@ export function createScanner(languageVersion: ScriptTarget, skipTrivia: boolean
const nextStart = pos;
let nextPos = pos + 2;
for (; nextPos < nextStart + 6; nextPos++) {
if (!isHexDigit(charCodeUnchecked(pos))) {
if (!isHexDigit(charCodeUnchecked(nextPos))) {
// leave the error to the next call
return escapedValueString;
}
@ -3552,7 +3552,7 @@ export function createScanner(languageVersion: ScriptTarget, skipTrivia: boolean
}
function scanSourceCharacter(): string {
const size = anyUnicodeMode ? charSize(charCodeChecked(pos)) : 1;
const size = anyUnicodeMode ? charSize(codePointChecked(pos)) : 1;
pos += size;
return size > 0 ? text.substring(pos - size, pos) : "";
}

View File

@ -0,0 +1,64 @@
regularExpressionCharacterClassRangeOrder.ts(7,5): error TS1517: Range out of order in character class.
regularExpressionCharacterClassRangeOrder.ts(7,12): error TS1517: Range out of order in character class.
regularExpressionCharacterClassRangeOrder.ts(8,11): error TS1517: Range out of order in character class.
regularExpressionCharacterClassRangeOrder.ts(9,11): error TS1517: Range out of order in character class.
regularExpressionCharacterClassRangeOrder.ts(11,6): error TS1125: Hexadecimal digit expected.
regularExpressionCharacterClassRangeOrder.ts(11,16): error TS1125: Hexadecimal digit expected.
regularExpressionCharacterClassRangeOrder.ts(11,27): error TS1125: Hexadecimal digit expected.
regularExpressionCharacterClassRangeOrder.ts(11,37): error TS1125: Hexadecimal digit expected.
regularExpressionCharacterClassRangeOrder.ts(12,25): error TS1517: Range out of order in character class.
regularExpressionCharacterClassRangeOrder.ts(13,25): error TS1517: Range out of order in character class.
regularExpressionCharacterClassRangeOrder.ts(15,10): error TS1517: Range out of order in character class.
regularExpressionCharacterClassRangeOrder.ts(15,37): error TS1517: Range out of order in character class.
regularExpressionCharacterClassRangeOrder.ts(16,31): error TS1517: Range out of order in character class.
regularExpressionCharacterClassRangeOrder.ts(17,31): error TS1517: Range out of order in character class.
==== regularExpressionCharacterClassRangeOrder.ts (14 errors) ====
// The characters in the following regular expressions are ASCII-lookalike characters found in Unicode, including:
// - 𝘈 (U+1D608 Mathematical Sans-Serif Italic Capital A)
// - 𝘡 (U+1D621 Mathematical Sans-Serif Italic Capital Z)
//
// See https://en.wikipedia.org/wiki/Mathematical_Alphanumeric_Symbols
const regexes: RegExp[] = [
/[𝘈-𝘡][𝘡-𝘈]/,
~~~
!!! error TS1517: Range out of order in character class.
~~~
!!! error TS1517: Range out of order in character class.
/[𝘈-𝘡][𝘡-𝘈]/u,
~~~~~
!!! error TS1517: Range out of order in character class.
/[𝘈-𝘡][𝘡-𝘈]/v,
~~~~~
!!! error TS1517: Range out of order in character class.
/[\u{1D608}-\u{1D621}][\u{1D621}-\u{1D608}]/,
!!! error TS1125: Hexadecimal digit expected.
!!! error TS1125: Hexadecimal digit expected.
!!! error TS1125: Hexadecimal digit expected.
!!! error TS1125: Hexadecimal digit expected.
/[\u{1D608}-\u{1D621}][\u{1D621}-\u{1D608}]/u,
~~~~~~~~~~~~~~~~~~~
!!! error TS1517: Range out of order in character class.
/[\u{1D608}-\u{1D621}][\u{1D621}-\u{1D608}]/v,
~~~~~~~~~~~~~~~~~~~
!!! error TS1517: Range out of order in character class.
/[\uD835\uDE08-\uD835\uDE21][\uD835\uDE21-\uD835\uDE08]/,
~~~~~~~~~~~~~
!!! error TS1517: Range out of order in character class.
~~~~~~~~~~~~~
!!! error TS1517: Range out of order in character class.
/[\uD835\uDE08-\uD835\uDE21][\uD835\uDE21-\uD835\uDE08]/u,
~~~~~~~~~~~~~~~~~~~~~~~~~
!!! error TS1517: Range out of order in character class.
/[\uD835\uDE08-\uD835\uDE21][\uD835\uDE21-\uD835\uDE08]/v,
~~~~~~~~~~~~~~~~~~~~~~~~~
!!! error TS1517: Range out of order in character class.
];

View File

@ -0,0 +1,40 @@
//// [tests/cases/compiler/regularExpressionCharacterClassRangeOrder.ts] ////
//// [regularExpressionCharacterClassRangeOrder.ts]
// The characters in the following regular expressions are ASCII-lookalike characters found in Unicode, including:
// - 𝘈 (U+1D608 Mathematical Sans-Serif Italic Capital A)
// - 𝘡 (U+1D621 Mathematical Sans-Serif Italic Capital Z)
//
// See https://en.wikipedia.org/wiki/Mathematical_Alphanumeric_Symbols
const regexes: RegExp[] = [
/[𝘈-𝘡][𝘡-𝘈]/,
/[𝘈-𝘡][𝘡-𝘈]/u,
/[𝘈-𝘡][𝘡-𝘈]/v,
/[\u{1D608}-\u{1D621}][\u{1D621}-\u{1D608}]/,
/[\u{1D608}-\u{1D621}][\u{1D621}-\u{1D608}]/u,
/[\u{1D608}-\u{1D621}][\u{1D621}-\u{1D608}]/v,
/[\uD835\uDE08-\uD835\uDE21][\uD835\uDE21-\uD835\uDE08]/,
/[\uD835\uDE08-\uD835\uDE21][\uD835\uDE21-\uD835\uDE08]/u,
/[\uD835\uDE08-\uD835\uDE21][\uD835\uDE21-\uD835\uDE08]/v,
];
//// [regularExpressionCharacterClassRangeOrder.js]
// The characters in the following regular expressions are ASCII-lookalike characters found in Unicode, including:
// - 𝘈 (U+1D608 Mathematical Sans-Serif Italic Capital A)
// - 𝘡 (U+1D621 Mathematical Sans-Serif Italic Capital Z)
//
// See https://en.wikipedia.org/wiki/Mathematical_Alphanumeric_Symbols
const regexes = [
/[𝘈-𝘡][𝘡-𝘈]/,
/[𝘈-𝘡][𝘡-𝘈]/u,
/[𝘈-𝘡][𝘡-𝘈]/v,
/[\u{1D608}-\u{1D621}][\u{1D621}-\u{1D608}]/,
/[\u{1D608}-\u{1D621}][\u{1D621}-\u{1D608}]/u,
/[\u{1D608}-\u{1D621}][\u{1D621}-\u{1D608}]/v,
/[\uD835\uDE08-\uD835\uDE21][\uD835\uDE21-\uD835\uDE08]/,
/[\uD835\uDE08-\uD835\uDE21][\uD835\uDE21-\uD835\uDE08]/u,
/[\uD835\uDE08-\uD835\uDE21][\uD835\uDE21-\uD835\uDE08]/v,
];

View File

@ -0,0 +1,25 @@
//// [tests/cases/compiler/regularExpressionCharacterClassRangeOrder.ts] ////
=== regularExpressionCharacterClassRangeOrder.ts ===
// The characters in the following regular expressions are ASCII-lookalike characters found in Unicode, including:
// - 𝘈 (U+1D608 Mathematical Sans-Serif Italic Capital A)
// - 𝘡 (U+1D621 Mathematical Sans-Serif Italic Capital Z)
//
// See https://en.wikipedia.org/wiki/Mathematical_Alphanumeric_Symbols
const regexes: RegExp[] = [
>regexes : Symbol(regexes, Decl(regularExpressionCharacterClassRangeOrder.ts, 5, 5))
>RegExp : Symbol(RegExp, Decl(lib.es5.d.ts, --, --), Decl(lib.es5.d.ts, --, --), Decl(lib.es2015.core.d.ts, --, --), Decl(lib.es2015.symbol.wellknown.d.ts, --, --), Decl(lib.es2018.regexp.d.ts, --, --) ... and 3 more)
/[𝘈-𝘡][𝘡-𝘈]/,
/[𝘈-𝘡][𝘡-𝘈]/u,
/[𝘈-𝘡][𝘡-𝘈]/v,
/[\u{1D608}-\u{1D621}][\u{1D621}-\u{1D608}]/,
/[\u{1D608}-\u{1D621}][\u{1D621}-\u{1D608}]/u,
/[\u{1D608}-\u{1D621}][\u{1D621}-\u{1D608}]/v,
/[\uD835\uDE08-\uD835\uDE21][\uD835\uDE21-\uD835\uDE08]/,
/[\uD835\uDE08-\uD835\uDE21][\uD835\uDE21-\uD835\uDE08]/u,
/[\uD835\uDE08-\uD835\uDE21][\uD835\uDE21-\uD835\uDE08]/v,
];

View File

@ -0,0 +1,52 @@
//// [tests/cases/compiler/regularExpressionCharacterClassRangeOrder.ts] ////
=== regularExpressionCharacterClassRangeOrder.ts ===
// The characters in the following regular expressions are ASCII-lookalike characters found in Unicode, including:
// - 𝘈 (U+1D608 Mathematical Sans-Serif Italic Capital A)
// - 𝘡 (U+1D621 Mathematical Sans-Serif Italic Capital Z)
//
// See https://en.wikipedia.org/wiki/Mathematical_Alphanumeric_Symbols
const regexes: RegExp[] = [
>regexes : RegExp[]
> : ^^^^^^^^
>[ /[𝘈-𝘡][𝘡-𝘈]/, /[𝘈-𝘡][𝘡-𝘈]/u, /[𝘈-𝘡][𝘡-𝘈]/v, /[\u{1D608}-\u{1D621}][\u{1D621}-\u{1D608}]/, /[\u{1D608}-\u{1D621}][\u{1D621}-\u{1D608}]/u, /[\u{1D608}-\u{1D621}][\u{1D621}-\u{1D608}]/v, /[\uD835\uDE08-\uD835\uDE21][\uD835\uDE21-\uD835\uDE08]/, /[\uD835\uDE08-\uD835\uDE21][\uD835\uDE21-\uD835\uDE08]/u, /[\uD835\uDE08-\uD835\uDE21][\uD835\uDE21-\uD835\uDE08]/v,] : RegExp[]
> : ^^^^^^^^
/[𝘈-𝘡][𝘡-𝘈]/,
>/[𝘈-𝘡][𝘡-𝘈]/ : RegExp
> : ^^^^^^
/[𝘈-𝘡][𝘡-𝘈]/u,
>/[𝘈-𝘡][𝘡-𝘈]/u : RegExp
> : ^^^^^^
/[𝘈-𝘡][𝘡-𝘈]/v,
>/[𝘈-𝘡][𝘡-𝘈]/v : RegExp
> : ^^^^^^
/[\u{1D608}-\u{1D621}][\u{1D621}-\u{1D608}]/,
>/[\u{1D608}-\u{1D621}][\u{1D621}-\u{1D608}]/ : RegExp
> : ^^^^^^
/[\u{1D608}-\u{1D621}][\u{1D621}-\u{1D608}]/u,
>/[\u{1D608}-\u{1D621}][\u{1D621}-\u{1D608}]/u : RegExp
> : ^^^^^^
/[\u{1D608}-\u{1D621}][\u{1D621}-\u{1D608}]/v,
>/[\u{1D608}-\u{1D621}][\u{1D621}-\u{1D608}]/v : RegExp
> : ^^^^^^
/[\uD835\uDE08-\uD835\uDE21][\uD835\uDE21-\uD835\uDE08]/,
>/[\uD835\uDE08-\uD835\uDE21][\uD835\uDE21-\uD835\uDE08]/ : RegExp
> : ^^^^^^
/[\uD835\uDE08-\uD835\uDE21][\uD835\uDE21-\uD835\uDE08]/u,
>/[\uD835\uDE08-\uD835\uDE21][\uD835\uDE21-\uD835\uDE08]/u : RegExp
> : ^^^^^^
/[\uD835\uDE08-\uD835\uDE21][\uD835\uDE21-\uD835\uDE08]/v,
>/[\uD835\uDE08-\uD835\uDE21][\uD835\uDE21-\uD835\uDE08]/v : RegExp
> : ^^^^^^
];

View File

@ -0,0 +1,20 @@
// @target: esnext
// The characters in the following regular expressions are ASCII-lookalike characters found in Unicode, including:
// - 𝘈 (U+1D608 Mathematical Sans-Serif Italic Capital A)
// - 𝘡 (U+1D621 Mathematical Sans-Serif Italic Capital Z)
//
// See https://en.wikipedia.org/wiki/Mathematical_Alphanumeric_Symbols
const regexes: RegExp[] = [
/[𝘈-𝘡][𝘡-𝘈]/,
/[𝘈-𝘡][𝘡-𝘈]/u,
/[𝘈-𝘡][𝘡-𝘈]/v,
/[\u{1D608}-\u{1D621}][\u{1D621}-\u{1D608}]/,
/[\u{1D608}-\u{1D621}][\u{1D621}-\u{1D608}]/u,
/[\u{1D608}-\u{1D621}][\u{1D621}-\u{1D608}]/v,
/[\uD835\uDE08-\uD835\uDE21][\uD835\uDE21-\uD835\uDE08]/,
/[\uD835\uDE08-\uD835\uDE21][\uD835\uDE21-\uD835\uDE08]/u,
/[\uD835\uDE08-\uD835\uDE21][\uD835\uDE21-\uD835\uDE08]/v,
];