Simplify or optimize regexes with polynomial time worst cases (#44197)

* Simplify or optimize regexes with polynomial time worst cases

* PR feedback & cleanup

Co-authored-by: David Michon <dmichon-msft@users.noreply.github.com>

* Use builtin scanner function for checking whitespace in fallback method (its faster)

Co-authored-by: David Michon <dmichon-msft@users.noreply.github.com>
This commit is contained in:
Wesley Wigham
2021-05-24 15:28:52 -07:00
committed by GitHub
parent 2203228b62
commit fcabb5c0cc
11 changed files with 158 additions and 85 deletions

View File

@@ -3034,10 +3034,6 @@ namespace ts {
return filter(map(values, v => convertJsonOption(option.element, v, basePath, errors)), v => !!v);
}
function trimString(s: string) {
return typeof s.trim === "function" ? s.trim() : s.replace(/^[\s]+|[\s]+$/g, "");
}
/**
* Tests for a path that ends in a recursive directory wildcard.
* Matches **, \**, **\, and \**\, but not a**b.
@@ -3051,36 +3047,6 @@ namespace ts {
*/
const invalidTrailingRecursionPattern = /(^|\/)\*\*\/?$/;
/**
* Tests for a path where .. appears after a recursive directory wildcard.
* Matches **\..\*, **\a\..\*, and **\.., but not ..\**\*
*
* NOTE: used \ in place of / above to avoid issues with multiline comments.
*
* Breakdown:
* (^|\/) # matches either the beginning of the string or a directory separator.
* \*\*\/ # matches a recursive directory wildcard "**" followed by a directory separator.
* (.*\/)? # optionally matches any number of characters followed by a directory separator.
* \.\. # matches a parent directory path component ".."
* ($|\/) # matches either the end of the string or a directory separator.
*/
const invalidDotDotAfterRecursiveWildcardPattern = /(^|\/)\*\*\/(.*\/)?\.\.($|\/)/;
/**
* Tests for a path containing a wildcard character in a directory component of the path.
* Matches \*\, \?\, and \a*b\, but not \a\ or \a\*.
*
* NOTE: used \ in place of / above to avoid issues with multiline comments.
*
* Breakdown:
* \/ # matches a directory separator.
* [^/]*? # matches any number of characters excluding directory separators (non-greedy).
* [*?] # matches either a wildcard character (* or ?)
* [^/]* # matches any number of characters excluding directory separators (greedy).
* \/ # matches a directory separator.
*/
const watchRecursivePattern = /\/[^/]*?[*?][^/]*\//;
/**
* Matches the portion of a wildcard path that does not contain wildcards.
* Matches \a of \a\*, or \a\b\c of \a\b\c\?\d.
@@ -3217,6 +3183,20 @@ namespace ts {
return matchesExcludeWorker(pathToCheck, validatedExcludeSpecs, useCaseSensitiveFileNames, currentDirectory, basePath);
}
function invalidDotDotAfterRecursiveWildcard(s: string) {
// We used to use the regex /(^|\/)\*\*\/(.*\/)?\.\.($|\/)/ to check for this case, but
// in v8, that has polynomial performance because the recursive wildcard match - **/ -
// can be matched in many arbitrary positions when multiple are present, resulting
// in bad backtracking (and we don't care which is matched - just that some /.. segment
// comes after some **/ segment).
const wildcardIndex = startsWith(s, "**/") ? 0 : s.indexOf("/**/");
if (wildcardIndex === -1) {
return false;
}
const lastDotIndex = endsWith(s, "/..") ? s.length : s.lastIndexOf("/../");
return lastDotIndex > wildcardIndex;
}
/* @internal */
export function matchesExclude(
pathToCheck: string,
@@ -3226,7 +3206,7 @@ namespace ts {
) {
return matchesExcludeWorker(
pathToCheck,
filter(excludeSpecs, spec => !invalidDotDotAfterRecursiveWildcardPattern.test(spec)),
filter(excludeSpecs, spec => !invalidDotDotAfterRecursiveWildcard(spec)),
useCaseSensitiveFileNames,
currentDirectory
);
@@ -3268,7 +3248,7 @@ namespace ts {
if (disallowTrailingRecursion && invalidTrailingRecursionPattern.test(spec)) {
return [Diagnostics.File_specification_cannot_end_in_a_recursive_directory_wildcard_Asterisk_Asterisk_Colon_0, spec];
}
else if (invalidDotDotAfterRecursiveWildcardPattern.test(spec)) {
else if (invalidDotDotAfterRecursiveWildcard(spec)) {
return [Diagnostics.File_specification_cannot_contain_a_parent_directory_that_appears_after_a_recursive_directory_wildcard_Asterisk_Asterisk_Colon_0, spec];
}
}
@@ -3331,9 +3311,18 @@ namespace ts {
function getWildcardDirectoryFromSpec(spec: string, useCaseSensitiveFileNames: boolean): { key: string, flags: WatchDirectoryFlags } | undefined {
const match = wildcardDirectoryPattern.exec(spec);
if (match) {
// We check this with a few `indexOf` calls because 3 `indexOf`/`lastIndexOf` calls is
// less algorithmically complex (roughly O(3n) worst-case) than the regex we used to use,
// \/[^/]*?[*?][^/]*\/ which was polynominal in v8, since arbitrary sequences of wildcard
// characters could match any of the central patterns, resulting in bad backtracking.
const questionWildcardIndex = spec.indexOf("?");
const starWildcardIndex = spec.indexOf("*");
const lastDirectorySeperatorIndex = spec.lastIndexOf(directorySeparator);
return {
key: useCaseSensitiveFileNames ? match[0] : toFileNameLowerCase(match[0]),
flags: watchRecursivePattern.test(spec) ? WatchDirectoryFlags.Recursive : WatchDirectoryFlags.None
flags: (questionWildcardIndex !== -1 && questionWildcardIndex < lastDirectorySeperatorIndex)
|| (starWildcardIndex !== -1 && starWildcardIndex < lastDirectorySeperatorIndex)
? WatchDirectoryFlags.Recursive : WatchDirectoryFlags.None
};
}
if (isImplicitGlob(spec)) {