From 7dd30d32fecdbd0b38504de808639ae72b804251 Mon Sep 17 00:00:00 2001 From: Cyrus Najmabadi Date: Mon, 17 Nov 2014 17:52:50 -0800 Subject: [PATCH] Simplify how regexs are incrementally parsed. Conflicts: tests/Fidelity/incremental/IncrementalParserTests.ts tests/Fidelity/parser/ecmascript5/MissingTokens/MissingToken2.ts.expected tests/Fidelity/parser/ecmascript5/RegressionTests/645086_1.ts.expected tests/Fidelity/parser/ecmascript5/RegressionTests/645086_2.ts.expected tests/Fidelity/parser/ecmascript5/RegularExpressions/RegularExpressionDivideAmbiguity4.ts.expected tests/Fidelity/parser/ecmascript5/SyntaxWalker.generated.ts.expected tests/Fidelity/program.js tests/Fidelity/program.js.map tests/Fidelity/scanner/ecmascript5/StringLiterals.ts.expected tests/Fidelity/test262/suite/ch07/7.3/S7.3_A2.1_T2.js.expected tests/Fidelity/test262/suite/ch07/7.3/S7.3_A2.2_T2.js.expected tests/Fidelity/test262/suite/ch07/7.4/S7.4_A3.js.expected tests/Fidelity/test262/suite/ch07/7.4/S7.4_A4_T1.js.expected tests/Fidelity/test262/suite/ch07/7.4/S7.4_A4_T4.js.expected tests/Fidelity/test262/suite/ch07/7.8/7.8.4/S7.8.4_A1.1_T1.js.expected tests/Fidelity/test262/suite/ch07/7.8/7.8.4/S7.8.4_A1.1_T2.js.expected tests/Fidelity/test262/suite/ch07/7.8/7.8.4/S7.8.4_A1.2_T1.js.expected tests/Fidelity/test262/suite/ch07/7.8/7.8.4/S7.8.4_A1.2_T2.js.expected tests/Fidelity/test262/suite/ch07/7.8/7.8.4/S7.8.4_A3.1_T1.js.expected tests/Fidelity/test262/suite/ch07/7.8/7.8.4/S7.8.4_A3.1_T2.js.expected tests/Fidelity/test262/suite/ch07/7.8/7.8.4/S7.8.4_A3.2_T1.js.expected tests/Fidelity/test262/suite/ch07/7.8/7.8.4/S7.8.4_A3.2_T2.js.expected tests/Fidelity/test262/suite/ch07/7.8/7.8.5/S7.8.5_A1.2_T2.js.expected tests/Fidelity/test262/suite/ch07/7.8/7.8.5/S7.8.5_A1.3_T1.js.expected tests/Fidelity/test262/suite/ch07/7.8/7.8.5/S7.8.5_A1.3_T3.js.expected tests/Fidelity/test262/suite/ch07/7.8/7.8.5/S7.8.5_A1.5_T1.js.expected tests/Fidelity/test262/suite/ch07/7.8/7.8.5/S7.8.5_A1.5_T3.js.expected tests/Fidelity/test262/suite/ch07/7.8/7.8.5/S7.8.5_A2.2_T1.js.expected tests/Fidelity/test262/suite/ch07/7.8/7.8.5/S7.8.5_A2.3_T1.js.expected tests/Fidelity/test262/suite/ch07/7.8/7.8.5/S7.8.5_A2.3_T3.js.expected tests/Fidelity/test262/suite/ch07/7.8/7.8.5/S7.8.5_A2.5_T1.js.expected tests/Fidelity/test262/suite/ch07/7.8/7.8.5/S7.8.5_A2.5_T3.js.expected tests/Fidelity/test262/suite/ch08/8.4/S8.4_A13_T1.js.expected tests/Fidelity/test262/suite/ch08/8.4/S8.4_A13_T2.js.expected tests/Fidelity/test262/suite/ch08/8.4/S8.4_A14_T1.js.expected tests/Fidelity/test262/suite/ch08/8.4/S8.4_A14_T2.js.expected --- .../resources/diagnosticCode.generated.ts | 4 +- .../diagnosticInformationMap.generated.ts | 4 +- .../resources/diagnosticMessages.json | 10 ++++- src/services/syntax/parser.ts | 41 +++++-------------- src/services/syntax/scanner.ts | 22 +++++----- 5 files changed, 36 insertions(+), 45 deletions(-) diff --git a/src/services/resources/diagnosticCode.generated.ts b/src/services/resources/diagnosticCode.generated.ts index 1b140105b16..e5878978fcc 100644 --- a/src/services/resources/diagnosticCode.generated.ts +++ b/src/services/resources/diagnosticCode.generated.ts @@ -5,7 +5,7 @@ module TypeScript { warning_TS_0_1: "warning TS{0}: {1}", Unrecognized_escape_sequence: "Unrecognized escape sequence.", Unexpected_character_0: "Unexpected character {0}.", - Missing_close_quote_character: "Missing close quote character.", + Unterminated_string_literal: "Unterminated string literal.", Identifier_expected: "Identifier expected.", _0_keyword_expected: "'{0}' keyword expected.", _0_expected: "'{0}' expected.", @@ -97,6 +97,8 @@ module TypeScript { Template_literal_cannot_be_used_as_an_element_name: "Template literal cannot be used as an element name.", Computed_property_names_cannot_be_used_here: "Computed property names cannot be used here.", yield_expression_must_be_contained_within_a_generator_declaration: "'yield' expression must be contained within a generator declaration.", + Unterminated_regular_expression_literal: "Unterminated regular expression literal.", + Unterminated_template_literal: "Unterminated template literal.", Duplicate_identifier_0: "Duplicate identifier '{0}'.", The_name_0_does_not_exist_in_the_current_scope: "The name '{0}' does not exist in the current scope.", The_name_0_does_not_refer_to_a_value: "The name '{0}' does not refer to a value.", diff --git a/src/services/resources/diagnosticInformationMap.generated.ts b/src/services/resources/diagnosticInformationMap.generated.ts index 8e911f469ef..d6597a008df 100644 --- a/src/services/resources/diagnosticInformationMap.generated.ts +++ b/src/services/resources/diagnosticInformationMap.generated.ts @@ -6,7 +6,7 @@ module TypeScript { "warning TS{0}: {1}": { "code": 1, "category": DiagnosticCategory.NoPrefix }, "Unrecognized escape sequence.": { "code": 1000, "category": DiagnosticCategory.Error }, "Unexpected character {0}.": { "code": 1001, "category": DiagnosticCategory.Error }, - "Missing close quote character.": { "code": 1002, "category": DiagnosticCategory.Error }, + "Unterminated string literal.": { "code": 1002, "category": DiagnosticCategory.Error }, "Identifier expected.": { "code": 1003, "category": DiagnosticCategory.Error }, "'{0}' keyword expected.": { "code": 1004, "category": DiagnosticCategory.Error }, "'{0}' expected.": { "code": 1005, "category": DiagnosticCategory.Error }, @@ -99,6 +99,8 @@ module TypeScript { "Template literal cannot be used as an element name.": { "code": 1111, "category": DiagnosticCategory.Error }, "Computed property names cannot be used here.": { "code": 1112, "category": DiagnosticCategory.Error }, "'yield' expression must be contained within a generator declaration.": { "code": 1113, "category": DiagnosticCategory.Error }, + "Unterminated regular expression literal.": { "code": 1114, "category": DiagnosticCategory.Error }, + "Unterminated template literal.": { "code": 1115, "category": DiagnosticCategory.Error }, "Duplicate identifier '{0}'.": { "code": 2000, "category": DiagnosticCategory.Error }, "The name '{0}' does not exist in the current scope.": { "code": 2001, "category": DiagnosticCategory.Error }, "The name '{0}' does not refer to a value.": { "code": 2002, "category": DiagnosticCategory.Error }, diff --git a/src/services/resources/diagnosticMessages.json b/src/services/resources/diagnosticMessages.json index c238568eac6..724edbd8f3b 100644 --- a/src/services/resources/diagnosticMessages.json +++ b/src/services/resources/diagnosticMessages.json @@ -15,7 +15,7 @@ "category": "Error", "code": 1001 }, - "Missing close quote character.": { + "Unterminated string literal.": { "category": "Error", "code": 1002 }, @@ -383,6 +383,14 @@ "category": "Error", "code": 1113 }, + "Unterminated regular expression literal.": { + "category": "Error", + "code": 1114 + }, + "Unterminated template literal.": { + "category": "Error", + "code": 1115 + }, "Duplicate identifier '{0}'.": { "category": "Error", "code": 2000 diff --git a/src/services/syntax/parser.ts b/src/services/syntax/parser.ts index 8f8bae4e549..80c813ca162 100644 --- a/src/services/syntax/parser.ts +++ b/src/services/syntax/parser.ts @@ -1373,7 +1373,8 @@ module TypeScript.Parser { function parseFunctionDeclarationWorker(modifiers: ISyntaxToken[], functionKeyword: ISyntaxToken, asteriskToken: ISyntaxToken): FunctionDeclarationSyntax { // GeneratorDeclaration[Yield, Default] : - // function * BindingIdentifier[?Yield](FormalParameters[Yield, GeneratorParameter]) { GeneratorBody[Yield] } + // function * BindingIdentifier[?Yield](FormalParameters[Yield, GeneratorParameter]) { GeneratorBody[Yield] } + var isGenerator = asteriskToken !== undefined; return new FunctionDeclarationSyntax(parseNodeData, modifiers, @@ -2167,14 +2168,9 @@ module TypeScript.Parser { case SyntaxKind.SlashToken: case SyntaxKind.SlashEqualsToken: - // Note: if we see a / or /= token then we always consider this an expression. Why? - // Well, either that / or /= is actually a regular expression, in which case we're - // definitely an expression. Or, it's actually a divide. In which case, we *still* - // want to think of ourself as an expression. "But wait", you say. '/' doesn't - // start an expression. That's true. BUt like the above check for =>, for error - // tolerance, we will consider ourselves in an expression. We'll then parse out an - // missing identifier and then will consume the / token naturally as a binary - // expression. + // Note: if we see a / or /= token then we always consider this an expression. + // The / or /= will actually be the start of a regex that we will contextually + // rescan. // Simple epxressions. case SyntaxKind.SuperKeyword: @@ -2976,15 +2972,9 @@ module TypeScript.Parser { case SyntaxKind.SlashToken: case SyntaxKind.SlashEqualsToken: - // If we see a standalone / or /= and we're expecting a term, then try to reparse + // If we see a standalone / or /= and we're expecting an expression, then reparse // it as a regular expression. - var result = tryReparseDivideAsRegularExpression(); - - // If we get a result, then use it. Otherwise, create a missing identifier so - // that parsing can continue. Note: we do this even if 'force' is false. That's - // because we *do* want to consider a standalone / as an expression that should be - // returned from tryParseExpression even when 'force' is set to false. - return result || eatIdentifierToken(DiagnosticCode.Expression_expected); + return reparseDivideAsRegularExpression(); } if (!force) { @@ -2995,7 +2985,7 @@ module TypeScript.Parser { return eatIdentifierToken(DiagnosticCode.Expression_expected); } - function tryReparseDivideAsRegularExpression(): IPrimaryExpressionSyntax { + function reparseDivideAsRegularExpression(): IPrimaryExpressionSyntax { // If we see a / or /= token, then that may actually be the start of a regex in certain // contexts. @@ -3012,18 +3002,9 @@ module TypeScript.Parser { // Debug.assert(SyntaxFacts.isAnyDivideOrRegularExpressionToken(currentToken.kind)); var tokenKind = currentToken.kind; - if (tokenKind === SyntaxKind.SlashToken || tokenKind === SyntaxKind.SlashEqualsToken) { - // Still came back as a / or /=. This is not a regular expression literal. - return undefined; - } - else if (tokenKind === SyntaxKind.RegularExpressionLiteral) { - return consumeToken(currentToken); - } - else { - // Something *very* wrong happened. This is an internal parser fault that we need - // to figure out and fix. - throw Errors.invalidOperation(); - } + Debug.assert(tokenKind === SyntaxKind.RegularExpressionLiteral); + + return consumeToken(currentToken); } function parseTypeOfExpression(typeOfKeyword: ISyntaxToken): TypeOfExpressionSyntax { diff --git a/src/services/syntax/scanner.ts b/src/services/syntax/scanner.ts index 54f05890e3e..17b127eb6a9 100644 --- a/src/services/syntax/scanner.ts +++ b/src/services/syntax/scanner.ts @@ -281,7 +281,7 @@ module TypeScript.Scanner { LargeScannerToken.prototype.childCount = 0; export interface DiagnosticCallback { - (position: number, width: number, key: string, arguments: any[]): void; + (position: number, width: number, key: string, arguments?: any[]): void; } interface TokenInfo { @@ -1008,7 +1008,7 @@ module TypeScript.Scanner { while (true) { if (index === end) { // Hit the end of the file. - reportDiagnostic(end, 0, DiagnosticCode._0_expected, ["`"]); + reportDiagnostic(end, 0, DiagnosticCode.Unterminated_template_literal); break; } @@ -1144,10 +1144,7 @@ module TypeScript.Scanner { // term, and it sees one of these then it may restart us asking specifically if we could // scan out a regex. if (allowContextualToken) { - var result = tryScanRegularExpressionToken(); - if (result !== SyntaxKind.None) { - return result; - } + return scanRegularExpressionToken(); } if (str.charCodeAt(index) === CharacterCodes.equals) { @@ -1159,7 +1156,7 @@ module TypeScript.Scanner { } } - function tryScanRegularExpressionToken(): SyntaxKind { + function scanRegularExpressionToken(): SyntaxKind { var startIndex = index; var inEscape = false; @@ -1168,8 +1165,9 @@ module TypeScript.Scanner { var ch = str.charCodeAt(index); if (isNaN(ch) || isNewLineCharacter(ch)) { - index = startIndex; - return SyntaxKind.None; + // Hit the end of line, or end of the file. This is not a legal regex. + reportDiagnostic(index, 0, DiagnosticCode.Unterminated_regular_expression_literal); + break; } index++; @@ -1193,7 +1191,7 @@ module TypeScript.Scanner { continue; case CharacterCodes.closeBracket: - // If we ever hit a cloe bracket then we're now no longer in a character + // If we ever hit a close bracket then we're now no longer in a character // class. If we weren't in a character class to begin with, then this has // no effect. inCharacterClass = false; @@ -1219,7 +1217,7 @@ module TypeScript.Scanner { // TODO: The grammar says any identifier part is allowed here. Do we need to support // \u identifiers here? The existing typescript parser does not. - while (isIdentifierPartCharacter[str.charCodeAt(index)]) { + while (index < end && isIdentifierPartCharacter[str.charCodeAt(index)]) { index++; } @@ -1322,7 +1320,7 @@ module TypeScript.Scanner { break; } else if (isNaN(ch) || isNewLineCharacter(ch)) { - reportDiagnostic(Math.min(index, end), 1, DiagnosticCode.Missing_close_quote_character, undefined); + reportDiagnostic(index, 0, DiagnosticCode.Unterminated_string_literal); break; } else {