Have the parser give real positions for empty tokens.

Previously this was difficult because we didn't know where empty tokens would go due to the presense of skipped tokens. Thanks to the recent work i did to place skipped tokens on the *next* real token we hit, this became much simpler.
2026-05-19 20:37:00 -05:00 · 2014-11-20 16:19:19 -08:00
parent d11660c81d
commit b8a8c35f3f
3 changed files with 34 additions and 79 deletions
--- a/src/services/syntax/incrementalParser.ts
+++ b/src/services/syntax/incrementalParser.ts
@@ -592,6 +592,7 @@ module TypeScript.IncrementalParser {
            text: text,
            fileName: fileName,
            languageVersion: languageVersion,
+            absolutePosition: absolutePosition,
            currentNode: currentNode,
            currentToken: currentToken,
            currentContextualToken: currentContextualToken,
--- a/src/services/syntax/parser.ts
+++ b/src/services/syntax/parser.ts
@@ -45,6 +45,9 @@ module TypeScript.Parser {
        // but can affect the diagnostics produced while parsing.
        languageVersion: ts.ScriptTarget;

+        // The place in the source text that we're currently pointing at.
+        absolutePosition(): number;
+
        // The current syntax node the source is pointing at.  Only available in incremental settings.
        // The source can point at a node if that node doesn't intersect any of the text changes in
        // the file, and doesn't contain certain unacceptable constructs.  For example, if the node
@@ -541,13 +544,32 @@ module TypeScript.Parser {
            return eatToken(SyntaxKind.SemicolonToken);
        }

+        function createEmptyToken(kind: SyntaxKind): ISyntaxToken {
+            // The position of the empty token we're creating is not necessarily the position that 
+            // the parser is at now.  This is because we may have seen some existing missing tokens
+            // before finally deciding we needed a missing token.  For example, if you have:
+            //
+            //      Foo(a, #    <eof>
+            //
+            // We will need to create a empty token for the missing ")".  However, we will have
+            // skipped the "#" token, and thus will be right after the "#".  Because the "#" token
+            // will actually become *skipped* trivia on the *next* token we see, the close paren
+            // should not be considered to be after #, and should instead be after the ",".
+            //
+            // So, if we have any skipped tokens, then the position of the empty token should be
+            // the position of the first skipped token we have.  Otherwise it's just at the position
+            // of the parser.
+            var fullStart = _skippedTokens ? _skippedTokens[0].fullStart() : source.absolutePosition();
+            return Syntax.emptyToken(kind, fullStart);
+        }
+
        function createMissingToken(expectedKind: SyntaxKind, actual: ISyntaxToken, diagnosticCode?: string): ISyntaxToken {
            var diagnostic = getExpectedTokenDiagnostic(expectedKind, actual, diagnosticCode);
            addDiagnostic(diagnostic);

            // The missing token will be at the full start of the current token.  That way empty tokens
            // will always be between real tokens and not inside an actual token.
-            return Syntax.emptyToken(expectedKind);
+            return createEmptyToken(expectedKind);
        }

        function getExpectedTokenDiagnostic(expectedKind: SyntaxKind, actual?: ISyntaxToken, diagnosticCode?: string): Diagnostic {
@@ -2871,7 +2893,7 @@ module TypeScript.Parser {
                    addDiagnostic(diagnostic);

                    return new ArgumentListSyntax(parseNodeData, typeArgumentList,
-                        Syntax.emptyToken(SyntaxKind.OpenParenToken), <any>[], Syntax.emptyToken(SyntaxKind.CloseParenToken));
+                        createEmptyToken(SyntaxKind.OpenParenToken), <any>[], createEmptyToken(SyntaxKind.CloseParenToken));
                }
                else {
                    Debug.assert(token0.kind === SyntaxKind.OpenParenToken);
@@ -2931,7 +2953,7 @@ module TypeScript.Parser {
                    DiagnosticCode.new_T_cannot_be_used_to_create_an_array_Use_new_Array_T_instead, undefined);
                addDiagnostic(diagnostic);

-                return Syntax.emptyToken(SyntaxKind.IdentifierName);
+                return createEmptyToken(SyntaxKind.IdentifierName);
            }
            else {
                return allowInAnd(parseExpression);
@@ -3086,7 +3108,7 @@ module TypeScript.Parser {
            else {
                var diagnostic = getExpectedTokenDiagnostic(SyntaxKind.CloseBraceToken);
                addDiagnostic(diagnostic);
-                token = Syntax.emptyToken(SyntaxKind.TemplateEndToken);
+                token = createEmptyToken(SyntaxKind.TemplateEndToken);
            }

            return new TemplateClauseSyntax(parseNodeData, expression, token);
@@ -4219,7 +4241,7 @@ module TypeScript.Parser {
                // consume the '}' just fine.  So ASI doesn't apply.

                if (allowAutomaticSemicolonInsertion && canEatAutomaticSemicolon(/*allowWithoutNewline:*/ false)) {
-                    var semicolonToken = eatExplicitOrAutomaticSemicolon(/*allowWithoutNewline:*/ false) || Syntax.emptyToken(SyntaxKind.SemicolonToken);
+                    var semicolonToken = eatExplicitOrAutomaticSemicolon(/*allowWithoutNewline:*/ false) || createEmptyToken(SyntaxKind.SemicolonToken);
                    nodesAndSeparators.push(semicolonToken);
                    // Debug.assert(items.length % 2 === 0);
                    continue;
--- a/src/services/syntax/syntaxToken.ts
+++ b/src/services/syntax/syntaxToken.ts
@@ -290,8 +290,8 @@ module TypeScript.Syntax {
        return new RealizedToken(token.fullStart(), token.kind, token.isKeywordConvertedToIdentifier(), leadingTrivia, token.text());
    }

-    export function emptyToken(kind: SyntaxKind): ISyntaxToken {
-        return new EmptyToken(kind);
+    export function emptyToken(kind: SyntaxKind, fullStart: number): ISyntaxToken {
+        return new EmptyToken(kind, fullStart);
    }

    class EmptyToken implements ISyntaxToken {
@@ -300,17 +300,17 @@ module TypeScript.Syntax {
        public parent: ISyntaxElement;
        public childCount: number;

-        constructor(public kind: SyntaxKind) {
+        constructor(public kind: SyntaxKind, private _fullStart: number) {
        }

        public setFullStart(fullStart: number): void {
-            // An empty token is always at the -1 position.
+            this._fullStart = fullStart;
        }

        public childAt(index: number): ISyntaxElement { throw Errors.invalidOperation() }

        public clone(): ISyntaxToken {
-            return new EmptyToken(this.kind);
+            return new EmptyToken(this.kind, this._fullStart);
        }

        // Empty tokens are never incrementally reusable.
@@ -321,75 +321,7 @@ module TypeScript.Syntax {
        }

        public fullWidth() { return 0; }
-
-        private position(): number {
-            // It's hard for us to tell the position of an empty token at the eact time we create 
-            // it.  For example, we may have:
-            //
-            //      a / finally
-            //
-            // There will be a missing token detected after the forward slash, so it would be 
-            // tempting to set its position as the full-end of hte slash token. However, 
-            // immediately after that, the 'finally' token will be skipped and will be attached
-            // as skipped text to the forward slash.  This means the 'full-end' of the forward
-            // slash will change, and thus the empty token will now appear to be embedded inside
-            // another token.  This violates are rule that all tokens must only touch at the end,
-            // and makes enforcing invariants much harder.
-            //
-            // To address this we create the empty token with no known position, and then we 
-            // determine what it's position should be based on where it lies in the tree.  
-            // Specifically, we find the previous non-zero-width syntax element, and we consider
-            // the full-start of this token to be at the full-end of that element.
-
-            var previousElement = this.previousNonZeroWidthElement();
-            return !previousElement ? 0 : fullStart(previousElement) + fullWidth(previousElement);
-        }
-
-        private previousNonZeroWidthElement(): ISyntaxElement {
-            var current: ISyntaxElement = this;
-            while (true) {
-                var parent = current.parent;
-                if (parent === undefined) {
-                    Debug.assert(current.kind === SyntaxKind.SourceUnit, "We had a node without a parent that was not the root node!");
-
-                    // We walked all the way to the top, and never found a previous element.  This 
-                    // can happen with code like:
-                    //
-                    //      / b;
-                    //
-                    // We will have an empty identifier token as the first token in the tree.  In
-                    // this case, return undefined so that the position of the empty token will be 
-                    // considered to be 0.
-                    return undefined;
-                }
-
-                // Ok.  We have a parent.  First, find out which slot we're at in the parent.
-                for (var i = 0, n = childCount(parent); i < n; i++) {
-                    if (childAt(parent, i) === current) {
-                        break;
-                    }
-                }
-
-                Debug.assert(i !== n, "Could not find current element in parent's child list!");
-
-                // Walk backward from this element, looking for a non-zero-width sibling.
-                for (var j = i - 1; j >= 0; j--) {
-                    var sibling = childAt(parent, j);
-                    if (sibling && fullWidth(sibling) > 0) {
-                        return sibling;
-                    }
-                }
-
-                // We couldn't find a non-zero-width sibling.  We were either the first element, or
-                // all preceding elements are empty.  So, move up to our parent so we we can find
-                // its preceding sibling.
-                current = current.parent;
-            }
-        }
-
-        public fullStart(): number {
-            return this.position();
-        }
+        public fullStart(): number { return this._fullStart; }

        public text() { return ""; }
        public fullText(): string { return ""; }