Merge pull request #731 from Microsoft/acknowledgeTrivia

Use the 'skipTrivia' scanner flag for lexical classification
2026-02-15 03:23:08 -06:00 · 2014-09-24 13:39:50 -04:00 · 2014-09-24 13:39:50 -04:00 · dc9c3e168b
commit dc9c3e168b
parent 8be8e1f5be 25170ef5dd
3 changed files with 71 additions and 51 deletions
--- a/src/compiler/types.ts
+++ b/src/compiler/types.ts
@ -228,7 +228,9 @@ module ts {
        FirstPunctuation = OpenBraceToken,
        LastPunctuation = CaretEqualsToken,
        FirstToken = EndOfFileToken,
-        LastToken = StringKeyword
+        LastToken = StringKeyword,
+        FirstTriviaToken = SingleLineCommentTrivia,
+        LastTriviaToken = WhitespaceTrivia
    }

    export enum NodeFlags {
--- a/src/services/services.ts
+++ b/src/services/services.ts
@ -4378,13 +4378,13 @@ module ts {

    /// Classifier
    export function createClassifier(host: Logger): Classifier {
-        var scanner: Scanner;
-        var noRegexTable: boolean[];
+        var scanner = createScanner(ScriptTarget.ES5, /*skipTrivia*/ false);

        /// We do not have a full parser support to know when we should parse a regex or not
        /// If we consider every slash token to be a regex, we could be missing cases like "1/2/3", where
        /// we have a series of divide operator. this list allows us to be more accurate by ruling out 
        /// locations where a regexp cannot exist.
+        var noRegexTable: boolean[];
        if (!noRegexTable) {
            noRegexTable = [];
            noRegexTable[SyntaxKind.Identifier] = true;
@ -4404,8 +4404,7 @@ module ts {
        function getClassificationsForLine(text: string, lexState: EndOfLineState): ClassificationResult {
            var offset = 0;
            var lastTokenOrCommentEnd = 0;
-            var lastToken = SyntaxKind.Unknown;
-            var inUnterminatedMultiLineComment = false;
+            var lastNonTriviaToken = SyntaxKind.Unknown;

            // If we're in a string literal, then prepend: "\
            // (and a newline).  That way when we lex we'll think we're still in a string literal.
@ -4427,27 +4426,31 @@ module ts {
                    break;
            }

+            scanner.setText(text);
+
            var result: ClassificationResult = {
                finalLexState: EndOfLineState.Start,
                entries: []
            };

-            scanner = createScanner(ScriptTarget.ES5, /*skipTrivia*/ true, text, onError, processComment);
-
+            
            var token = SyntaxKind.Unknown;
            do {
                token = scanner.scan();

-                if ((token === SyntaxKind.SlashToken || token === SyntaxKind.SlashEqualsToken) && !noRegexTable[lastToken]) {
+                if ((token === SyntaxKind.SlashToken || token === SyntaxKind.SlashEqualsToken) && !noRegexTable[lastNonTriviaToken]) {
                    if (scanner.reScanSlashToken() === SyntaxKind.RegularExpressionLiteral) {
                        token = SyntaxKind.RegularExpressionLiteral;
                    }
                }
-                else if (lastToken === SyntaxKind.DotToken) {
+                else if (lastNonTriviaToken === SyntaxKind.DotToken) {
                    token = SyntaxKind.Identifier;
                }

-                lastToken = token;
+                // Only recall the token if it was *not* trivia.
+                if (!(SyntaxKind.FirstTriviaToken <= token && token <= SyntaxKind.LastTriviaToken)) {
+                    lastNonTriviaToken = token;
+                }

                processToken();
            }
@ -4455,35 +4458,17 @@ module ts {

            return result;

-
-            function onError(message: DiagnosticMessage): void {
-                inUnterminatedMultiLineComment = message.key === Diagnostics.Asterisk_Slash_expected.key;
-            }
-
-            function processComment(start: number, end: number) {
-                // add Leading white spaces
-                addLeadingWhiteSpace(start, end);
-
-                // add the comment
-                addResult(end - start, TokenClass.Comment);
-            }
-
            function processToken(): void {
                var start = scanner.getTokenPos();
                var end = scanner.getTextPos();

-                // add Leading white spaces
-                addLeadingWhiteSpace(start, end);
-
                // add the token
                addResult(end - start, classFromKind(token));

                if (end >= text.length) {
                    // We're at the end.
-                    if (inUnterminatedMultiLineComment) {
-                        result.finalLexState = EndOfLineState.InMultiLineCommentTrivia;
-                    }
-                    else if (token === SyntaxKind.StringLiteral) {
+                    if (token === SyntaxKind.StringLiteral) {
+                        // Check to see if we finished up on a multiline string literal.
                        var tokenText = scanner.getTokenText();
                        if (tokenText.length > 0 && tokenText.charCodeAt(tokenText.length - 1) === CharacterCodes.backslash) {
                            var quoteChar = tokenText.charCodeAt(0);
@ -4492,18 +4477,18 @@ module ts {
                                : EndOfLineState.InSingleQuoteStringLiteral;
                        }
                    }
+                    else if (token === SyntaxKind.MultiLineCommentTrivia) {
+                        // Check to see if the multiline comment was unclosed.
+                        var tokenText = scanner.getTokenText()
+                        if (!(tokenText.length > 3 && // need to avoid catching '/*/'
+                            tokenText.charCodeAt(tokenText.length - 2) === CharacterCodes.asterisk &&
+                            tokenText.charCodeAt(tokenText.length - 1) === CharacterCodes.slash)) {
+                            result.finalLexState = EndOfLineState.InMultiLineCommentTrivia;
+                        }
+                    }
                }
            }

-            function addLeadingWhiteSpace(start: number, end: number): void {
-                if (start > lastTokenOrCommentEnd) {
-                    addResult(start - lastTokenOrCommentEnd, TokenClass.Whitespace);
-                }
-
-                // Remember the end of the last token
-                lastTokenOrCommentEnd = end;
-            }
-
            function addResult(length: number, classification: TokenClass): void {
                if (length > 0) {
                    // If this is the first classification we're adding to the list, then remove any 
@ -4596,6 +4581,11 @@ module ts {
                    return TokenClass.StringLiteral;
                case SyntaxKind.RegularExpressionLiteral:
                    return TokenClass.RegExpLiteral;
+                case SyntaxKind.MultiLineCommentTrivia:
+                case SyntaxKind.SingleLineCommentTrivia:
+                    return TokenClass.Comment;
+                case SyntaxKind.WhitespaceTrivia:
+                    return TokenClass.Whitespace;
                case SyntaxKind.Identifier:
                default:
                    return TokenClass.Identifier;
--- a/tests/cases/unittests/services/colorization.ts
+++ b/tests/cases/unittests/services/colorization.ts
@ -36,7 +36,7 @@ describe('Colorization', function () {
        }
        var finalEndOfLineState = classResult[classResult.length - 1];

-        assert.equal(position, code.length, "Expected accumilative length of all entries to match the length of the source. expected: " + code.length + ", but got: " + position);
+        assert.equal(position, code.length, "Expected cumulative length of all entries to match the length of the source. expected: " + code.length + ", but got: " + position);

        return {
            tuples: tuples,
@ -84,8 +84,8 @@ describe('Colorization', function () {
                var actualEntry = getEntryAtPosistion(result, actualEntryPosition);

                assert(actualEntry, "Could not find classification entry for '" + expectedEntry.value + "' at position: " + actualEntryPosition);
-                assert.equal(actualEntry.length, expectedEntry.value.length, "Classification class does not match expected.");
-                assert.equal(actualEntry.class, expectedEntry.class,  "Classification class does not match expected.");
+                assert.equal(actualEntry.class, expectedEntry.class, "Classification class does not match expected. Expected: " + ts.TokenClass[expectedEntry.class] + ", Actual: " + ts.TokenClass[actualEntry.class]);
+                assert.equal(actualEntry.length, expectedEntry.value.length, "Classification length does not match expected. Expected: " + ts.TokenClass[expectedEntry.value.length] + ", Actual: " + ts.TokenClass[actualEntry.length]);
            }
        }
    }
@ -105,7 +105,7 @@ describe('Colorization', function () {
                punctuation(";"));
        });

-        it("classifies correctelly a comment after a divide operator", function () {
+        it("correctly classifies a comment after a divide operator", function () {
            test("1 / 2 // comment",
                ts.EndOfLineState.Start,
                numberLiteral("1"),
@ -115,7 +115,7 @@ describe('Colorization', function () {
                comment("// comment"));
        });

-        it("classifies correctelly a literal after a divide operator", function () {
+        it("correctly classifies a literal after a divide operator", function () {
            test("1 / 2, 3 / 4",
                ts.EndOfLineState.Start,
                numberLiteral("1"),
@ -127,48 +127,76 @@ describe('Colorization', function () {
                operator(","));
        });

-        it("classifies correctelly an unterminated multi-line string", function () {
+        it("correctly classifies an unterminated multi-line string", function () {
            test("'line1\\",
                ts.EndOfLineState.Start,
                stringLiteral("'line1\\"),
                finalEndOfLineState(ts.EndOfLineState.InSingleQuoteStringLiteral));
        });

-        it("classifies correctelly the second line of an unterminated multi-line string", function () {
+        it("correctly classifies the second line of an unterminated multi-line string", function () {
            test("\\",
                ts.EndOfLineState.InDoubleQuoteStringLiteral,
                stringLiteral("\\"),
                finalEndOfLineState(ts.EndOfLineState.InDoubleQuoteStringLiteral));
        });

-        it("classifies correctelly the last line of a multi-line string", function () {
+        it("correctly classifies the last line of a multi-line string", function () {
            test("'",
                ts.EndOfLineState.InSingleQuoteStringLiteral,
                stringLiteral("'"),
                finalEndOfLineState(ts.EndOfLineState.Start));
        });

-        it("classifies correctelly an unterminated multiline comment", function () {
+        it("correctly classifies an unterminated multiline comment", function () {
            test("/*",
                ts.EndOfLineState.Start,
                comment("/*"),
                finalEndOfLineState(ts.EndOfLineState.InMultiLineCommentTrivia));
        });

-        it("classifies correctelly an unterminated multiline comment with trailing space", function () {
+        it("correctly classifies the termination of a multiline comment", function () {
+            test("   */     ",
+                ts.EndOfLineState.InMultiLineCommentTrivia,
+                comment("   */"),
+                finalEndOfLineState(ts.EndOfLineState.Start));
+        });
+
+        it("correctly classifies the continuation of a multiline comment", function () {
+            test("LOREM IPSUM DOLOR   ",
+                ts.EndOfLineState.InMultiLineCommentTrivia,
+                comment("LOREM IPSUM DOLOR   "),
+                finalEndOfLineState(ts.EndOfLineState.InMultiLineCommentTrivia));
+        });
+
+        it("correctly classifies an unterminated multiline comment on a line ending in '/*/'", function () {
+            test("   /*/",
+                ts.EndOfLineState.Start,
+                comment("/*/"),
+                finalEndOfLineState(ts.EndOfLineState.InMultiLineCommentTrivia));
+        });
+
+        it("correctly classifies an unterminated multiline comment with trailing space", function () {
            test("/* ",
                ts.EndOfLineState.Start,
                comment("/* "),
                finalEndOfLineState(ts.EndOfLineState.InMultiLineCommentTrivia));
        });

-        it("classifies correctelly a keyword after a dot", function () {
+        it("correctly classifies a keyword after a dot", function () {
            test("a.var",
                ts.EndOfLineState.Start,
                identifier("var"));
        });

-        it("classifies keyword after a dot on previous line", function () {
+        it("classifies a property access with whitespace around the dot", function () {
+            test("   x  .\tfoo ()",
+                ts.EndOfLineState.Start,
+                identifier("x"),
+                identifier("foo"));
+        });
+
+        it("classifies a keyword after a dot on previous line", function () {
            test("var",
                ts.EndOfLineState.Start,
                keyword("var"),