mirror of
https://github.com/microsoft/TypeScript.git
synced 2026-06-13 11:46:08 -05:00
Simplify or optimize regexes with polynomial time worst cases (#44197)
* Simplify or optimize regexes with polynomial time worst cases * PR feedback & cleanup Co-authored-by: David Michon <dmichon-msft@users.noreply.github.com> * Use builtin scanner function for checking whitespace in fallback method (its faster) Co-authored-by: David Michon <dmichon-msft@users.noreply.github.com>
This commit is contained in:
@@ -3034,10 +3034,6 @@ namespace ts {
|
||||
return filter(map(values, v => convertJsonOption(option.element, v, basePath, errors)), v => !!v);
|
||||
}
|
||||
|
||||
function trimString(s: string) {
|
||||
return typeof s.trim === "function" ? s.trim() : s.replace(/^[\s]+|[\s]+$/g, "");
|
||||
}
|
||||
|
||||
/**
|
||||
* Tests for a path that ends in a recursive directory wildcard.
|
||||
* Matches **, \**, **\, and \**\, but not a**b.
|
||||
@@ -3051,36 +3047,6 @@ namespace ts {
|
||||
*/
|
||||
const invalidTrailingRecursionPattern = /(^|\/)\*\*\/?$/;
|
||||
|
||||
/**
|
||||
* Tests for a path where .. appears after a recursive directory wildcard.
|
||||
* Matches **\..\*, **\a\..\*, and **\.., but not ..\**\*
|
||||
*
|
||||
* NOTE: used \ in place of / above to avoid issues with multiline comments.
|
||||
*
|
||||
* Breakdown:
|
||||
* (^|\/) # matches either the beginning of the string or a directory separator.
|
||||
* \*\*\/ # matches a recursive directory wildcard "**" followed by a directory separator.
|
||||
* (.*\/)? # optionally matches any number of characters followed by a directory separator.
|
||||
* \.\. # matches a parent directory path component ".."
|
||||
* ($|\/) # matches either the end of the string or a directory separator.
|
||||
*/
|
||||
const invalidDotDotAfterRecursiveWildcardPattern = /(^|\/)\*\*\/(.*\/)?\.\.($|\/)/;
|
||||
|
||||
/**
|
||||
* Tests for a path containing a wildcard character in a directory component of the path.
|
||||
* Matches \*\, \?\, and \a*b\, but not \a\ or \a\*.
|
||||
*
|
||||
* NOTE: used \ in place of / above to avoid issues with multiline comments.
|
||||
*
|
||||
* Breakdown:
|
||||
* \/ # matches a directory separator.
|
||||
* [^/]*? # matches any number of characters excluding directory separators (non-greedy).
|
||||
* [*?] # matches either a wildcard character (* or ?)
|
||||
* [^/]* # matches any number of characters excluding directory separators (greedy).
|
||||
* \/ # matches a directory separator.
|
||||
*/
|
||||
const watchRecursivePattern = /\/[^/]*?[*?][^/]*\//;
|
||||
|
||||
/**
|
||||
* Matches the portion of a wildcard path that does not contain wildcards.
|
||||
* Matches \a of \a\*, or \a\b\c of \a\b\c\?\d.
|
||||
@@ -3217,6 +3183,20 @@ namespace ts {
|
||||
return matchesExcludeWorker(pathToCheck, validatedExcludeSpecs, useCaseSensitiveFileNames, currentDirectory, basePath);
|
||||
}
|
||||
|
||||
function invalidDotDotAfterRecursiveWildcard(s: string) {
|
||||
// We used to use the regex /(^|\/)\*\*\/(.*\/)?\.\.($|\/)/ to check for this case, but
|
||||
// in v8, that has polynomial performance because the recursive wildcard match - **/ -
|
||||
// can be matched in many arbitrary positions when multiple are present, resulting
|
||||
// in bad backtracking (and we don't care which is matched - just that some /.. segment
|
||||
// comes after some **/ segment).
|
||||
const wildcardIndex = startsWith(s, "**/") ? 0 : s.indexOf("/**/");
|
||||
if (wildcardIndex === -1) {
|
||||
return false;
|
||||
}
|
||||
const lastDotIndex = endsWith(s, "/..") ? s.length : s.lastIndexOf("/../");
|
||||
return lastDotIndex > wildcardIndex;
|
||||
}
|
||||
|
||||
/* @internal */
|
||||
export function matchesExclude(
|
||||
pathToCheck: string,
|
||||
@@ -3226,7 +3206,7 @@ namespace ts {
|
||||
) {
|
||||
return matchesExcludeWorker(
|
||||
pathToCheck,
|
||||
filter(excludeSpecs, spec => !invalidDotDotAfterRecursiveWildcardPattern.test(spec)),
|
||||
filter(excludeSpecs, spec => !invalidDotDotAfterRecursiveWildcard(spec)),
|
||||
useCaseSensitiveFileNames,
|
||||
currentDirectory
|
||||
);
|
||||
@@ -3268,7 +3248,7 @@ namespace ts {
|
||||
if (disallowTrailingRecursion && invalidTrailingRecursionPattern.test(spec)) {
|
||||
return [Diagnostics.File_specification_cannot_end_in_a_recursive_directory_wildcard_Asterisk_Asterisk_Colon_0, spec];
|
||||
}
|
||||
else if (invalidDotDotAfterRecursiveWildcardPattern.test(spec)) {
|
||||
else if (invalidDotDotAfterRecursiveWildcard(spec)) {
|
||||
return [Diagnostics.File_specification_cannot_contain_a_parent_directory_that_appears_after_a_recursive_directory_wildcard_Asterisk_Asterisk_Colon_0, spec];
|
||||
}
|
||||
}
|
||||
@@ -3331,9 +3311,18 @@ namespace ts {
|
||||
function getWildcardDirectoryFromSpec(spec: string, useCaseSensitiveFileNames: boolean): { key: string, flags: WatchDirectoryFlags } | undefined {
|
||||
const match = wildcardDirectoryPattern.exec(spec);
|
||||
if (match) {
|
||||
// We check this with a few `indexOf` calls because 3 `indexOf`/`lastIndexOf` calls is
|
||||
// less algorithmically complex (roughly O(3n) worst-case) than the regex we used to use,
|
||||
// \/[^/]*?[*?][^/]*\/ which was polynominal in v8, since arbitrary sequences of wildcard
|
||||
// characters could match any of the central patterns, resulting in bad backtracking.
|
||||
const questionWildcardIndex = spec.indexOf("?");
|
||||
const starWildcardIndex = spec.indexOf("*");
|
||||
const lastDirectorySeperatorIndex = spec.lastIndexOf(directorySeparator);
|
||||
return {
|
||||
key: useCaseSensitiveFileNames ? match[0] : toFileNameLowerCase(match[0]),
|
||||
flags: watchRecursivePattern.test(spec) ? WatchDirectoryFlags.Recursive : WatchDirectoryFlags.None
|
||||
flags: (questionWildcardIndex !== -1 && questionWildcardIndex < lastDirectorySeperatorIndex)
|
||||
|| (starWildcardIndex !== -1 && starWildcardIndex < lastDirectorySeperatorIndex)
|
||||
? WatchDirectoryFlags.Recursive : WatchDirectoryFlags.None
|
||||
};
|
||||
}
|
||||
if (isImplicitGlob(spec)) {
|
||||
|
||||
Reference in New Issue
Block a user