From fcabb5c0cc5e67add5d1fa25b8e2da115ba95c17 Mon Sep 17 00:00:00 2001 From: Wesley Wigham Date: Mon, 24 May 2021 15:28:52 -0700 Subject: [PATCH] Simplify or optimize regexes with polynomial time worst cases (#44197) * Simplify or optimize regexes with polynomial time worst cases * PR feedback & cleanup Co-authored-by: David Michon * Use builtin scanner function for checking whitespace in fallback method (its faster) Co-authored-by: David Michon --- src/compiler/commandLineParser.ts | 63 +++++++++------------- src/compiler/core.ts | 48 +++++++++++++++-- src/compiler/debug.ts | 3 ++ src/compiler/parser.ts | 15 +++--- src/compiler/program.ts | 2 +- src/compiler/scanner.ts | 10 ++-- src/compiler/semver.ts | 9 ++-- src/compiler/sourcemap.ts | 4 +- src/compiler/utilities.ts | 51 ++++++++++++++---- src/services/classifier.ts | 29 +++++----- src/services/outliningElementsCollector.ts | 9 +++- 11 files changed, 158 insertions(+), 85 deletions(-) diff --git a/src/compiler/commandLineParser.ts b/src/compiler/commandLineParser.ts index ffd43b9d7d5..4fec5cd18ca 100644 --- a/src/compiler/commandLineParser.ts +++ b/src/compiler/commandLineParser.ts @@ -3034,10 +3034,6 @@ namespace ts { return filter(map(values, v => convertJsonOption(option.element, v, basePath, errors)), v => !!v); } - function trimString(s: string) { - return typeof s.trim === "function" ? s.trim() : s.replace(/^[\s]+|[\s]+$/g, ""); - } - /** * Tests for a path that ends in a recursive directory wildcard. * Matches **, \**, **\, and \**\, but not a**b. @@ -3051,36 +3047,6 @@ namespace ts { */ const invalidTrailingRecursionPattern = /(^|\/)\*\*\/?$/; - /** - * Tests for a path where .. appears after a recursive directory wildcard. - * Matches **\..\*, **\a\..\*, and **\.., but not ..\**\* - * - * NOTE: used \ in place of / above to avoid issues with multiline comments. - * - * Breakdown: - * (^|\/) # matches either the beginning of the string or a directory separator. - * \*\*\/ # matches a recursive directory wildcard "**" followed by a directory separator. - * (.*\/)? # optionally matches any number of characters followed by a directory separator. - * \.\. # matches a parent directory path component ".." - * ($|\/) # matches either the end of the string or a directory separator. - */ - const invalidDotDotAfterRecursiveWildcardPattern = /(^|\/)\*\*\/(.*\/)?\.\.($|\/)/; - - /** - * Tests for a path containing a wildcard character in a directory component of the path. - * Matches \*\, \?\, and \a*b\, but not \a\ or \a\*. - * - * NOTE: used \ in place of / above to avoid issues with multiline comments. - * - * Breakdown: - * \/ # matches a directory separator. - * [^/]*? # matches any number of characters excluding directory separators (non-greedy). - * [*?] # matches either a wildcard character (* or ?) - * [^/]* # matches any number of characters excluding directory separators (greedy). - * \/ # matches a directory separator. - */ - const watchRecursivePattern = /\/[^/]*?[*?][^/]*\//; - /** * Matches the portion of a wildcard path that does not contain wildcards. * Matches \a of \a\*, or \a\b\c of \a\b\c\?\d. @@ -3217,6 +3183,20 @@ namespace ts { return matchesExcludeWorker(pathToCheck, validatedExcludeSpecs, useCaseSensitiveFileNames, currentDirectory, basePath); } + function invalidDotDotAfterRecursiveWildcard(s: string) { + // We used to use the regex /(^|\/)\*\*\/(.*\/)?\.\.($|\/)/ to check for this case, but + // in v8, that has polynomial performance because the recursive wildcard match - **/ - + // can be matched in many arbitrary positions when multiple are present, resulting + // in bad backtracking (and we don't care which is matched - just that some /.. segment + // comes after some **/ segment). + const wildcardIndex = startsWith(s, "**/") ? 0 : s.indexOf("/**/"); + if (wildcardIndex === -1) { + return false; + } + const lastDotIndex = endsWith(s, "/..") ? s.length : s.lastIndexOf("/../"); + return lastDotIndex > wildcardIndex; + } + /* @internal */ export function matchesExclude( pathToCheck: string, @@ -3226,7 +3206,7 @@ namespace ts { ) { return matchesExcludeWorker( pathToCheck, - filter(excludeSpecs, spec => !invalidDotDotAfterRecursiveWildcardPattern.test(spec)), + filter(excludeSpecs, spec => !invalidDotDotAfterRecursiveWildcard(spec)), useCaseSensitiveFileNames, currentDirectory ); @@ -3268,7 +3248,7 @@ namespace ts { if (disallowTrailingRecursion && invalidTrailingRecursionPattern.test(spec)) { return [Diagnostics.File_specification_cannot_end_in_a_recursive_directory_wildcard_Asterisk_Asterisk_Colon_0, spec]; } - else if (invalidDotDotAfterRecursiveWildcardPattern.test(spec)) { + else if (invalidDotDotAfterRecursiveWildcard(spec)) { return [Diagnostics.File_specification_cannot_contain_a_parent_directory_that_appears_after_a_recursive_directory_wildcard_Asterisk_Asterisk_Colon_0, spec]; } } @@ -3331,9 +3311,18 @@ namespace ts { function getWildcardDirectoryFromSpec(spec: string, useCaseSensitiveFileNames: boolean): { key: string, flags: WatchDirectoryFlags } | undefined { const match = wildcardDirectoryPattern.exec(spec); if (match) { + // We check this with a few `indexOf` calls because 3 `indexOf`/`lastIndexOf` calls is + // less algorithmically complex (roughly O(3n) worst-case) than the regex we used to use, + // \/[^/]*?[*?][^/]*\/ which was polynominal in v8, since arbitrary sequences of wildcard + // characters could match any of the central patterns, resulting in bad backtracking. + const questionWildcardIndex = spec.indexOf("?"); + const starWildcardIndex = spec.indexOf("*"); + const lastDirectorySeperatorIndex = spec.lastIndexOf(directorySeparator); return { key: useCaseSensitiveFileNames ? match[0] : toFileNameLowerCase(match[0]), - flags: watchRecursivePattern.test(spec) ? WatchDirectoryFlags.Recursive : WatchDirectoryFlags.None + flags: (questionWildcardIndex !== -1 && questionWildcardIndex < lastDirectorySeperatorIndex) + || (starWildcardIndex !== -1 && starWildcardIndex < lastDirectorySeperatorIndex) + ? WatchDirectoryFlags.Recursive : WatchDirectoryFlags.None }; } if (isImplicitGlob(spec)) { diff --git a/src/compiler/core.ts b/src/compiler/core.ts index 0c95903cc27..cb514f59ab2 100644 --- a/src/compiler/core.ts +++ b/src/compiler/core.ts @@ -2035,11 +2035,51 @@ namespace ts { * Takes a string like "jquery-min.4.2.3" and returns "jquery" */ export function removeMinAndVersionNumbers(fileName: string) { - // Match a "." or "-" followed by a version number or 'min' at the end of the name - const trailingMinOrVersion = /[.-]((min)|(\d+(\.\d+)*))$/; + // We used to use the regex /[.-]((min)|(\d+(\.\d+)*))$/ and would just .replace it twice. + // Unfortunately, that regex has O(n^2) performance because v8 doesn't match from the end of the string. + // Instead, we now essentially scan the filename (backwards) ourselves. - // The "min" or version may both be present, in either order, so try applying the above twice. - return fileName.replace(trailingMinOrVersion, "").replace(trailingMinOrVersion, ""); + let end: number = fileName.length; + + for (let pos = end - 1; pos > 0; pos--) { + let ch: number = fileName.charCodeAt(pos); + if (ch >= CharacterCodes._0 && ch <= CharacterCodes._9) { + // Match a \d+ segment + do { + --pos; + ch = fileName.charCodeAt(pos); + } while (pos > 0 && ch >= CharacterCodes._0 && ch <= CharacterCodes._9); + } + else if (pos > 4 && (ch === CharacterCodes.n || ch === CharacterCodes.N)) { + // Looking for "min" or "min" + // Already matched the 'n' + --pos; + ch = fileName.charCodeAt(pos); + if (ch !== CharacterCodes.i && ch !== CharacterCodes.I) { + break; + } + --pos; + ch = fileName.charCodeAt(pos); + if (ch !== CharacterCodes.m && ch !== CharacterCodes.M) { + break; + } + --pos; + ch = fileName.charCodeAt(pos); + } + else { + // This character is not part of either suffix pattern + break; + } + + if (ch !== CharacterCodes.minus && ch !== CharacterCodes.dot) { + break; + } + + end = pos; + } + + // end might be fileName.length, in which case this should internally no-op + return end === fileName.length ? fileName : fileName.slice(0, end); } /** Remove an item from an array, moving everything to its right one space left. */ diff --git a/src/compiler/debug.ts b/src/compiler/debug.ts index 97cb4f3687f..fc7b1aa4cdb 100644 --- a/src/compiler/debug.ts +++ b/src/compiler/debug.ts @@ -471,6 +471,9 @@ namespace ts { // An `Array` with extra properties is rendered as `[A, B, prop1: 1, prop2: 2]`. Most of // these aren't immediately useful so we trim off the `prop1: ..., prop2: ...` part from the // formatted string. + // This regex can trigger slow backtracking because of overlapping potential captures. + // We don't care, this is debug code that's only enabled with a debugger attached - + // we're just taking note of it for anyone checking regex performance in the future. defaultValue = String(defaultValue).replace(/(?:,[\s\w\d_]+:[^,]+)+\]$/, "]"); return `NodeArray ${defaultValue}`; } diff --git a/src/compiler/parser.ts b/src/compiler/parser.ts index cf983ee5067..ed68dc053c2 100644 --- a/src/compiler/parser.ts +++ b/src/compiler/parser.ts @@ -9094,7 +9094,7 @@ namespace ts { if (namedArgRegExCache.has(name)) { return namedArgRegExCache.get(name)!; } - const result = new RegExp(`(\\s${name}\\s*=\\s*)('|")(.+?)\\2`, "im"); + const result = new RegExp(`(\\s${name}\\s*=\\s*)(?:(?:'([^']*)')|(?:"([^"]*)"))`, "im"); namedArgRegExCache.set(name, result); return result; } @@ -9118,16 +9118,17 @@ namespace ts { return; // Missing required argument, don't parse } else if (matchResult) { + const value = matchResult[2] || matchResult[3]; if (arg.captureSpan) { - const startPos = range.pos + matchResult.index + matchResult[1].length + matchResult[2].length; + const startPos = range.pos + matchResult.index + matchResult[1].length + 1; argument[arg.name] = { - value: matchResult[3], + value, pos: startPos, - end: startPos + matchResult[3].length + end: startPos + value.length }; } else { - argument[arg.name] = matchResult[3]; + argument[arg.name] = value; } } } @@ -9145,7 +9146,7 @@ namespace ts { } if (range.kind === SyntaxKind.MultiLineCommentTrivia) { - const multiLinePragmaRegEx = /\s*@(\S+)\s*(.*)\s*$/gim; // Defined inline since it uses the "g" flag, which keeps a persistent index (for iterating) + const multiLinePragmaRegEx = /@(\S+)(\s+.*)?$/gim; // Defined inline since it uses the "g" flag, which keeps a persistent index (for iterating) let multiLineMatch: RegExpExecArray | null; while (multiLineMatch = multiLinePragmaRegEx.exec(text)) { addPragmaForMatch(pragmas, range, PragmaKindFlags.MultiLine, multiLineMatch); @@ -9170,7 +9171,7 @@ namespace ts { function getNamedPragmaArguments(pragma: PragmaDefinition, text: string | undefined): {[index: string]: string} | "fail" { if (!text) return {}; if (!pragma.args) return {}; - const args = text.split(/\s+/); + const args = trimString(text).split(/\s+/); const argMap: {[index: string]: string} = {}; for (let i = 0; i < pragma.args.length; i++) { const argument = pragma.args[i]; diff --git a/src/compiler/program.ts b/src/compiler/program.ts index caf3ebb1c7d..102517c4ce6 100644 --- a/src/compiler/program.ts +++ b/src/compiler/program.ts @@ -406,7 +406,7 @@ namespace ts { const lineStart = getPositionOfLineAndCharacter(file, i, 0); const lineEnd = i < lastLineInFile ? getPositionOfLineAndCharacter(file, i + 1, 0) : file.text.length; let lineContent = file.text.slice(lineStart, lineEnd); - lineContent = lineContent.replace(/\s+$/g, ""); // trim from end + lineContent = trimStringEnd(lineContent); // trim from end lineContent = lineContent.replace(/\t/g, " "); // convert tabs to single spaces // Output the gutter and the actual contents of the line. diff --git a/src/compiler/scanner.ts b/src/compiler/scanner.ts index 6e5e54d7a52..62574e51bff 100644 --- a/src/compiler/scanner.ts +++ b/src/compiler/scanner.ts @@ -283,14 +283,14 @@ namespace ts { const unicodeESNextIdentifierPart = [48, 57, 65, 90, 95, 95, 97, 122, 170, 170, 181, 181, 183, 183, 186, 186, 192, 214, 216, 246, 248, 705, 710, 721, 736, 740, 748, 748, 750, 750, 768, 884, 886, 887, 890, 893, 895, 895, 902, 906, 908, 908, 910, 929, 931, 1013, 1015, 1153, 1155, 1159, 1162, 1327, 1329, 1366, 1369, 1369, 1376, 1416, 1425, 1469, 1471, 1471, 1473, 1474, 1476, 1477, 1479, 1479, 1488, 1514, 1519, 1522, 1552, 1562, 1568, 1641, 1646, 1747, 1749, 1756, 1759, 1768, 1770, 1788, 1791, 1791, 1808, 1866, 1869, 1969, 1984, 2037, 2042, 2042, 2045, 2045, 2048, 2093, 2112, 2139, 2144, 2154, 2208, 2228, 2230, 2237, 2259, 2273, 2275, 2403, 2406, 2415, 2417, 2435, 2437, 2444, 2447, 2448, 2451, 2472, 2474, 2480, 2482, 2482, 2486, 2489, 2492, 2500, 2503, 2504, 2507, 2510, 2519, 2519, 2524, 2525, 2527, 2531, 2534, 2545, 2556, 2556, 2558, 2558, 2561, 2563, 2565, 2570, 2575, 2576, 2579, 2600, 2602, 2608, 2610, 2611, 2613, 2614, 2616, 2617, 2620, 2620, 2622, 2626, 2631, 2632, 2635, 2637, 2641, 2641, 2649, 2652, 2654, 2654, 2662, 2677, 2689, 2691, 2693, 2701, 2703, 2705, 2707, 2728, 2730, 2736, 2738, 2739, 2741, 2745, 2748, 2757, 2759, 2761, 2763, 2765, 2768, 2768, 2784, 2787, 2790, 2799, 2809, 2815, 2817, 2819, 2821, 2828, 2831, 2832, 2835, 2856, 2858, 2864, 2866, 2867, 2869, 2873, 2876, 2884, 2887, 2888, 2891, 2893, 2902, 2903, 2908, 2909, 2911, 2915, 2918, 2927, 2929, 2929, 2946, 2947, 2949, 2954, 2958, 2960, 2962, 2965, 2969, 2970, 2972, 2972, 2974, 2975, 2979, 2980, 2984, 2986, 2990, 3001, 3006, 3010, 3014, 3016, 3018, 3021, 3024, 3024, 3031, 3031, 3046, 3055, 3072, 3084, 3086, 3088, 3090, 3112, 3114, 3129, 3133, 3140, 3142, 3144, 3146, 3149, 3157, 3158, 3160, 3162, 3168, 3171, 3174, 3183, 3200, 3203, 3205, 3212, 3214, 3216, 3218, 3240, 3242, 3251, 3253, 3257, 3260, 3268, 3270, 3272, 3274, 3277, 3285, 3286, 3294, 3294, 3296, 3299, 3302, 3311, 3313, 3314, 3328, 3331, 3333, 3340, 3342, 3344, 3346, 3396, 3398, 3400, 3402, 3406, 3412, 3415, 3423, 3427, 3430, 3439, 3450, 3455, 3458, 3459, 3461, 3478, 3482, 3505, 3507, 3515, 3517, 3517, 3520, 3526, 3530, 3530, 3535, 3540, 3542, 3542, 3544, 3551, 3558, 3567, 3570, 3571, 3585, 3642, 3648, 3662, 3664, 3673, 3713, 3714, 3716, 3716, 3718, 3722, 3724, 3747, 3749, 3749, 3751, 3773, 3776, 3780, 3782, 3782, 3784, 3789, 3792, 3801, 3804, 3807, 3840, 3840, 3864, 3865, 3872, 3881, 3893, 3893, 3895, 3895, 3897, 3897, 3902, 3911, 3913, 3948, 3953, 3972, 3974, 3991, 3993, 4028, 4038, 4038, 4096, 4169, 4176, 4253, 4256, 4293, 4295, 4295, 4301, 4301, 4304, 4346, 4348, 4680, 4682, 4685, 4688, 4694, 4696, 4696, 4698, 4701, 4704, 4744, 4746, 4749, 4752, 4784, 4786, 4789, 4792, 4798, 4800, 4800, 4802, 4805, 4808, 4822, 4824, 4880, 4882, 4885, 4888, 4954, 4957, 4959, 4969, 4977, 4992, 5007, 5024, 5109, 5112, 5117, 5121, 5740, 5743, 5759, 5761, 5786, 5792, 5866, 5870, 5880, 5888, 5900, 5902, 5908, 5920, 5940, 5952, 5971, 5984, 5996, 5998, 6000, 6002, 6003, 6016, 6099, 6103, 6103, 6108, 6109, 6112, 6121, 6155, 6157, 6160, 6169, 6176, 6264, 6272, 6314, 6320, 6389, 6400, 6430, 6432, 6443, 6448, 6459, 6470, 6509, 6512, 6516, 6528, 6571, 6576, 6601, 6608, 6618, 6656, 6683, 6688, 6750, 6752, 6780, 6783, 6793, 6800, 6809, 6823, 6823, 6832, 6845, 6912, 6987, 6992, 7001, 7019, 7027, 7040, 7155, 7168, 7223, 7232, 7241, 7245, 7293, 7296, 7304, 7312, 7354, 7357, 7359, 7376, 7378, 7380, 7418, 7424, 7673, 7675, 7957, 7960, 7965, 7968, 8005, 8008, 8013, 8016, 8023, 8025, 8025, 8027, 8027, 8029, 8029, 8031, 8061, 8064, 8116, 8118, 8124, 8126, 8126, 8130, 8132, 8134, 8140, 8144, 8147, 8150, 8155, 8160, 8172, 8178, 8180, 8182, 8188, 8255, 8256, 8276, 8276, 8305, 8305, 8319, 8319, 8336, 8348, 8400, 8412, 8417, 8417, 8421, 8432, 8450, 8450, 8455, 8455, 8458, 8467, 8469, 8469, 8472, 8477, 8484, 8484, 8486, 8486, 8488, 8488, 8490, 8505, 8508, 8511, 8517, 8521, 8526, 8526, 8544, 8584, 11264, 11310, 11312, 11358, 11360, 11492, 11499, 11507, 11520, 11557, 11559, 11559, 11565, 11565, 11568, 11623, 11631, 11631, 11647, 11670, 11680, 11686, 11688, 11694, 11696, 11702, 11704, 11710, 11712, 11718, 11720, 11726, 11728, 11734, 11736, 11742, 11744, 11775, 12293, 12295, 12321, 12335, 12337, 12341, 12344, 12348, 12353, 12438, 12441, 12447, 12449, 12538, 12540, 12543, 12549, 12591, 12593, 12686, 12704, 12730, 12784, 12799, 13312, 19893, 19968, 40943, 40960, 42124, 42192, 42237, 42240, 42508, 42512, 42539, 42560, 42607, 42612, 42621, 42623, 42737, 42775, 42783, 42786, 42888, 42891, 42943, 42946, 42950, 42999, 43047, 43072, 43123, 43136, 43205, 43216, 43225, 43232, 43255, 43259, 43259, 43261, 43309, 43312, 43347, 43360, 43388, 43392, 43456, 43471, 43481, 43488, 43518, 43520, 43574, 43584, 43597, 43600, 43609, 43616, 43638, 43642, 43714, 43739, 43741, 43744, 43759, 43762, 43766, 43777, 43782, 43785, 43790, 43793, 43798, 43808, 43814, 43816, 43822, 43824, 43866, 43868, 43879, 43888, 44010, 44012, 44013, 44016, 44025, 44032, 55203, 55216, 55238, 55243, 55291, 63744, 64109, 64112, 64217, 64256, 64262, 64275, 64279, 64285, 64296, 64298, 64310, 64312, 64316, 64318, 64318, 64320, 64321, 64323, 64324, 64326, 64433, 64467, 64829, 64848, 64911, 64914, 64967, 65008, 65019, 65024, 65039, 65056, 65071, 65075, 65076, 65101, 65103, 65136, 65140, 65142, 65276, 65296, 65305, 65313, 65338, 65343, 65343, 65345, 65370, 65382, 65470, 65474, 65479, 65482, 65487, 65490, 65495, 65498, 65500, 65536, 65547, 65549, 65574, 65576, 65594, 65596, 65597, 65599, 65613, 65616, 65629, 65664, 65786, 65856, 65908, 66045, 66045, 66176, 66204, 66208, 66256, 66272, 66272, 66304, 66335, 66349, 66378, 66384, 66426, 66432, 66461, 66464, 66499, 66504, 66511, 66513, 66517, 66560, 66717, 66720, 66729, 66736, 66771, 66776, 66811, 66816, 66855, 66864, 66915, 67072, 67382, 67392, 67413, 67424, 67431, 67584, 67589, 67592, 67592, 67594, 67637, 67639, 67640, 67644, 67644, 67647, 67669, 67680, 67702, 67712, 67742, 67808, 67826, 67828, 67829, 67840, 67861, 67872, 67897, 67968, 68023, 68030, 68031, 68096, 68099, 68101, 68102, 68108, 68115, 68117, 68119, 68121, 68149, 68152, 68154, 68159, 68159, 68192, 68220, 68224, 68252, 68288, 68295, 68297, 68326, 68352, 68405, 68416, 68437, 68448, 68466, 68480, 68497, 68608, 68680, 68736, 68786, 68800, 68850, 68864, 68903, 68912, 68921, 69376, 69404, 69415, 69415, 69424, 69456, 69600, 69622, 69632, 69702, 69734, 69743, 69759, 69818, 69840, 69864, 69872, 69881, 69888, 69940, 69942, 69951, 69956, 69958, 69968, 70003, 70006, 70006, 70016, 70084, 70089, 70092, 70096, 70106, 70108, 70108, 70144, 70161, 70163, 70199, 70206, 70206, 70272, 70278, 70280, 70280, 70282, 70285, 70287, 70301, 70303, 70312, 70320, 70378, 70384, 70393, 70400, 70403, 70405, 70412, 70415, 70416, 70419, 70440, 70442, 70448, 70450, 70451, 70453, 70457, 70459, 70468, 70471, 70472, 70475, 70477, 70480, 70480, 70487, 70487, 70493, 70499, 70502, 70508, 70512, 70516, 70656, 70730, 70736, 70745, 70750, 70751, 70784, 70853, 70855, 70855, 70864, 70873, 71040, 71093, 71096, 71104, 71128, 71133, 71168, 71232, 71236, 71236, 71248, 71257, 71296, 71352, 71360, 71369, 71424, 71450, 71453, 71467, 71472, 71481, 71680, 71738, 71840, 71913, 71935, 71935, 72096, 72103, 72106, 72151, 72154, 72161, 72163, 72164, 72192, 72254, 72263, 72263, 72272, 72345, 72349, 72349, 72384, 72440, 72704, 72712, 72714, 72758, 72760, 72768, 72784, 72793, 72818, 72847, 72850, 72871, 72873, 72886, 72960, 72966, 72968, 72969, 72971, 73014, 73018, 73018, 73020, 73021, 73023, 73031, 73040, 73049, 73056, 73061, 73063, 73064, 73066, 73102, 73104, 73105, 73107, 73112, 73120, 73129, 73440, 73462, 73728, 74649, 74752, 74862, 74880, 75075, 77824, 78894, 82944, 83526, 92160, 92728, 92736, 92766, 92768, 92777, 92880, 92909, 92912, 92916, 92928, 92982, 92992, 92995, 93008, 93017, 93027, 93047, 93053, 93071, 93760, 93823, 93952, 94026, 94031, 94087, 94095, 94111, 94176, 94177, 94179, 94179, 94208, 100343, 100352, 101106, 110592, 110878, 110928, 110930, 110948, 110951, 110960, 111355, 113664, 113770, 113776, 113788, 113792, 113800, 113808, 113817, 113821, 113822, 119141, 119145, 119149, 119154, 119163, 119170, 119173, 119179, 119210, 119213, 119362, 119364, 119808, 119892, 119894, 119964, 119966, 119967, 119970, 119970, 119973, 119974, 119977, 119980, 119982, 119993, 119995, 119995, 119997, 120003, 120005, 120069, 120071, 120074, 120077, 120084, 120086, 120092, 120094, 120121, 120123, 120126, 120128, 120132, 120134, 120134, 120138, 120144, 120146, 120485, 120488, 120512, 120514, 120538, 120540, 120570, 120572, 120596, 120598, 120628, 120630, 120654, 120656, 120686, 120688, 120712, 120714, 120744, 120746, 120770, 120772, 120779, 120782, 120831, 121344, 121398, 121403, 121452, 121461, 121461, 121476, 121476, 121499, 121503, 121505, 121519, 122880, 122886, 122888, 122904, 122907, 122913, 122915, 122916, 122918, 122922, 123136, 123180, 123184, 123197, 123200, 123209, 123214, 123214, 123584, 123641, 124928, 125124, 125136, 125142, 125184, 125259, 125264, 125273, 126464, 126467, 126469, 126495, 126497, 126498, 126500, 126500, 126503, 126503, 126505, 126514, 126516, 126519, 126521, 126521, 126523, 126523, 126530, 126530, 126535, 126535, 126537, 126537, 126539, 126539, 126541, 126543, 126545, 126546, 126548, 126548, 126551, 126551, 126553, 126553, 126555, 126555, 126557, 126557, 126559, 126559, 126561, 126562, 126564, 126564, 126567, 126570, 126572, 126578, 126580, 126583, 126585, 126588, 126590, 126590, 126592, 126601, 126603, 126619, 126625, 126627, 126629, 126633, 126635, 126651, 131072, 173782, 173824, 177972, 177984, 178205, 178208, 183969, 183984, 191456, 194560, 195101, 917760, 917999]; /** - * Test for whether a single line comment's text contains a directive. + * Test for whether a single line comment with leading whitespace trimmed's text contains a directive. */ - const commentDirectiveRegExSingleLine = /^\s*\/\/\/?\s*@(ts-expect-error|ts-ignore)/; + const commentDirectiveRegExSingleLine = /^\/\/\/?\s*@(ts-expect-error|ts-ignore)/; /** - * Test for whether a multi-line comment's last line contains a directive. + * Test for whether a multi-line comment with leading whitespace trimmed's last line contains a directive. */ - const commentDirectiveRegExMultiLine = /^\s*(?:\/|\*)*\s*@(ts-expect-error|ts-ignore)/; + const commentDirectiveRegExMultiLine = /^(?:\/|\*)*\s*@(ts-expect-error|ts-ignore)/; function lookupInUnicodeMap(code: number, map: readonly number[]): boolean { // Bail out quickly if it couldn't possibly be in the map. @@ -2185,7 +2185,7 @@ namespace ts { commentDirectiveRegEx: RegExp, lineStart: number, ) { - const type = getDirectiveFromComment(text, commentDirectiveRegEx); + const type = getDirectiveFromComment(trimStringStart(text), commentDirectiveRegEx); if (type === undefined) { return commentDirectives; } diff --git a/src/compiler/semver.ts b/src/compiler/semver.ts index 8827962deaf..76d4cf2d65e 100644 --- a/src/compiler/semver.ts +++ b/src/compiler/semver.ts @@ -204,7 +204,7 @@ namespace ts { // range-set ::= range ( logical-or range ) * // range ::= hyphen | simple ( ' ' simple ) * | '' // logical-or ::= ( ' ' ) * '||' ( ' ' ) * - const logicalOrRegExp = /\s*\|\|\s*/g; + const logicalOrRegExp = /\|\|/g; const whitespaceRegExp = /\s+/g; // https://github.com/npm/node-semver#range-grammar @@ -230,20 +230,21 @@ namespace ts { // primitive ::= ( '<' | '>' | '>=' | '<=' | '=' ) partial // tilde ::= '~' partial // caret ::= '^' partial - const rangeRegExp = /^\s*(~|\^|<|<=|>|>=|=)?\s*([a-z0-9-+.*]+)$/i; + const rangeRegExp = /^(~|\^|<|<=|>|>=|=)?\s*([a-z0-9-+.*]+)$/i; function parseRange(text: string) { const alternatives: Comparator[][] = []; - for (const range of text.trim().split(logicalOrRegExp)) { + for (let range of trimString(text).split(logicalOrRegExp)) { if (!range) continue; const comparators: Comparator[] = []; + range = trimString(range); const match = hyphenRegExp.exec(range); if (match) { if (!parseHyphen(match[1], match[2], comparators)) return undefined; } else { for (const simple of range.split(whitespaceRegExp)) { - const match = rangeRegExp.exec(simple); + const match = rangeRegExp.exec(trimString(simple)); if (!match || !parseComparator(match[1], match[2], comparators)) return undefined; } } diff --git a/src/compiler/sourcemap.ts b/src/compiler/sourcemap.ts index 6ca09906db4..ed75cf081d9 100644 --- a/src/compiler/sourcemap.ts +++ b/src/compiler/sourcemap.ts @@ -322,7 +322,7 @@ namespace ts { } // Sometimes tools can see the following line as a source mapping url comment, so we mangle it a bit (the [M]) - const sourceMapCommentRegExp = /^\/\/[@#] source[M]appingURL=(.+)\s*$/; + const sourceMapCommentRegExp = /^\/\/[@#] source[M]appingURL=(.+)$/; const whitespaceOrMapCommentRegExp = /^\s*(\/\/[@#] .*)?$/; export interface LineInfo { @@ -345,7 +345,7 @@ namespace ts { const line = lineInfo.getLineText(index); const comment = sourceMapCommentRegExp.exec(line); if (comment) { - return comment[1]; + return trimStringEnd(comment[1]); } // If we see a non-whitespace/map comment-like line, break, to avoid scanning up the entire file else if (!line.match(whitespaceOrMapCommentRegExp)) { diff --git a/src/compiler/utilities.ts b/src/compiler/utilities.ts index 0e1fde7d4cd..8271e6adfc2 100644 --- a/src/compiler/utilities.ts +++ b/src/compiler/utilities.ts @@ -414,10 +414,10 @@ namespace ts { commentPos + 2 < commentEnd && text.charCodeAt(commentPos + 2) === CharacterCodes.slash) { const textSubStr = text.substring(commentPos, commentEnd); - return textSubStr.match(fullTripleSlashReferencePathRegEx) || - textSubStr.match(fullTripleSlashAMDReferencePathRegEx) || - textSubStr.match(fullTripleSlashReferenceTypeReferenceDirectiveRegEx) || - textSubStr.match(defaultLibReferenceRegEx) ? + return fullTripleSlashReferencePathRegEx.test(textSubStr) || + fullTripleSlashAMDReferencePathRegEx.test(textSubStr) || + fullTripleSlashReferenceTypeReferenceDirectiveRegEx.test(textSubStr) || + defaultLibReferenceRegEx.test(textSubStr) ? true : false; } return false; @@ -517,12 +517,43 @@ namespace ts { if (isJSDocTypeExpressionOrChild(node)) { // strip space + asterisk at line start - text = text.replace(/(^|\r?\n|\r)\s*\*\s*/g, "$1"); + text = text.split(/\r\n|\n|\r/).map(line => trimStringStart(line.replace(/^\s*\*/, ""))).join("\n"); } return text; } + /** + * Removes the leading and trailing white space and line terminator characters from a string. + */ + export const trimString = !!String.prototype.trim ? ((s: string) => s.trim()) : (s: string) => trimStringEnd(trimStringStart(s)); + + /** + * Returns a copy with trailing whitespace removed. + */ + export const trimStringEnd = !!String.prototype.trimEnd ? ((s: string) => s.trimEnd()) : trimEndImpl; + + + /** + * Returns a copy with leading whitespace removed. + */ + export const trimStringStart = !!String.prototype.trimStart ? ((s: string) => s.trimStart()) : (s: string) => s.replace(/^\s+/g, ""); + + /** + * https://jsbench.me/gjkoxld4au/1 + * The simple regex for this, /\s+$/g is O(n^2) in v8. + * The native .trimEnd method is by far best, but since that's technically ES2019, + * we provide a (still much faster than the simple regex) fallback. + */ + function trimEndImpl(s: string) { + let end = s.length - 1; + while (end >= 0) { + if (!isWhiteSpaceLike(s.charCodeAt(end))) break; + end--; + } + return s.slice(0, end + 1); + } + export function getTextOfNode(node: Node, includeTrivia = false): string { return getSourceTextOfNodeFromSourceFile(getSourceFileOfNode(node), node, includeTrivia); } @@ -1226,10 +1257,10 @@ namespace ts { text.charCodeAt(comment.pos + 3) !== CharacterCodes.slash); } - export const fullTripleSlashReferencePathRegEx = /^(\/\/\/\s*/; - const fullTripleSlashReferenceTypeReferenceDirectiveRegEx = /^(\/\/\/\s*/; - export const fullTripleSlashAMDReferencePathRegEx = /^(\/\/\/\s*/; - const defaultLibReferenceRegEx = /^(\/\/\/\s*/; + export const fullTripleSlashReferencePathRegEx = /^(\/\/\/\s*/; + const fullTripleSlashReferenceTypeReferenceDirectiveRegEx = /^(\/\/\/\s*/; + export const fullTripleSlashAMDReferencePathRegEx = /^(\/\/\/\s*/; + const defaultLibReferenceRegEx = /^(\/\/\/\s*/; export function isPartOfTypeNode(node: Node): boolean { if (SyntaxKind.FirstTypeNode <= node.kind && node.kind <= SyntaxKind.LastTypeNode) { @@ -4630,7 +4661,7 @@ namespace ts { function writeTrimmedCurrentLine(text: string, commentEnd: number, writer: EmitTextWriter, newLine: string, pos: number, nextLineStart: number) { const end = Math.min(commentEnd, nextLineStart - 1); - const currentLineText = text.substring(pos, end).replace(/^\s+|\s+$/g, ""); + const currentLineText = trimString(text.substring(pos, end)); if (currentLineText) { // trimmed forward and ending spaces text writer.writeComment(currentLineText); diff --git a/src/services/classifier.ts b/src/services/classifier.ts index 45d9f499004..764f22ab92f 100644 --- a/src/services/classifier.ts +++ b/src/services/classifier.ts @@ -806,7 +806,8 @@ namespace ts { function tryClassifyTripleSlashComment(start: number, width: number): boolean { const tripleSlashXMLCommentRegEx = /^(\/\/\/\s*)(<)(?:(\S+)((?:[^/]|\/[^>])*)(\/>)?)?/im; - const attributeRegex = /(\S+)(\s*)(=)(\s*)('[^']+'|"[^"]+")/img; + // Require a leading whitespace character (the parser already does) to prevent terrible backtracking performance + const attributeRegex = /(\s)(\S+)(\s*)(=)(\s*)('[^']+'|"[^"]+")/img; const text = sourceFile.text.substr(start, width); const match = tripleSlashXMLCommentRegEx.exec(text); @@ -842,30 +843,30 @@ namespace ts { break; } - const newAttrPos = pos + attrMatch.index; + const newAttrPos = pos + attrMatch.index + attrMatch[1].length; // whitespace if (newAttrPos > attrPos) { pushCommentRange(attrPos, newAttrPos - attrPos); attrPos = newAttrPos; } - pushClassification(attrPos, attrMatch[1].length, ClassificationType.jsxAttribute); // attribute name - attrPos += attrMatch[1].length; + pushClassification(attrPos, attrMatch[2].length, ClassificationType.jsxAttribute); // attribute name + attrPos += attrMatch[2].length; - if (attrMatch[2].length) { - pushCommentRange(attrPos, attrMatch[2].length); // whitespace - attrPos += attrMatch[2].length; + if (attrMatch[3].length) { + pushCommentRange(attrPos, attrMatch[3].length); // whitespace + attrPos += attrMatch[3].length; } - pushClassification(attrPos, attrMatch[3].length, ClassificationType.operator); // = - attrPos += attrMatch[3].length; + pushClassification(attrPos, attrMatch[4].length, ClassificationType.operator); // = + attrPos += attrMatch[4].length; - if (attrMatch[4].length) { - pushCommentRange(attrPos, attrMatch[4].length); // whitespace - attrPos += attrMatch[4].length; + if (attrMatch[5].length) { + pushCommentRange(attrPos, attrMatch[5].length); // whitespace + attrPos += attrMatch[5].length; } - pushClassification(attrPos, attrMatch[5].length, ClassificationType.jsxAttributeStringLiteralValue); // attribute value - attrPos += attrMatch[5].length; + pushClassification(attrPos, attrMatch[6].length, ClassificationType.jsxAttributeStringLiteralValue); // attribute value + attrPos += attrMatch[6].length; } pos += match[4].length; diff --git a/src/services/outliningElementsCollector.ts b/src/services/outliningElementsCollector.ts index 9f1e41c0ea1..53ad23c69d0 100644 --- a/src/services/outliningElementsCollector.ts +++ b/src/services/outliningElementsCollector.ts @@ -94,8 +94,15 @@ namespace ts.OutliningElementsCollector { } } - const regionDelimiterRegExp = /^\s*\/\/\s*#(end)?region(?:\s+(.*))?(?:\r)?$/; + const regionDelimiterRegExp = /^#(end)?region(?:\s+(.*))?(?:\r)?$/; function isRegionDelimiter(lineText: string) { + // We trim the leading whitespace and // without the regex since the + // multiple potential whitespace matches can make for some gnarly backtracking behavior + lineText = trimStringStart(lineText); + if (!startsWith(lineText, "\/\/")) { + return null; // eslint-disable-line no-null/no-null + } + lineText = trimString(lineText.slice(2)); return regionDelimiterRegExp.exec(lineText); }