Provide better error recovery when we encounter merge markers in the source.

Previously we would just treat each merge marker as trivia and then continue scanning and parsing like normal. This worked well in some scenarios, but fell down in others like: ``` class C { public foo() { <<<<<<< HEAD this.bar(); } ======= this.baz(); } >>>>>>> Branch public bar() { } } ``` The problem stems from the previous approach trying to incorporate both branches of the merge into the final tree. In a case like this, that approach breaks down entirely. The the parser ends up seeing the close curly in both included sections, and it considers the class finished. Then, it starts erroring when it encounters "public bar()". The fix is to only incorporate one of these sections into the tree. Specifically, we only include the first section. The second sectoin is treated like trivia and does not affect the parse at all. To make the experience more pleasant we do *lexically* classify the second section. That way it does not appear as just plain black text in the editor. Instead, it will have appropriate lexicla classifications for keywords, literals, comments, operators, punctuation, etc. However, any syntactic or semantic feature will not work in the second block due to this being trivia as far as any feature is concerned. This experience is still much better than what we had originally (where merge markers would absolutely) destroy the parse tree. And it is better than what we checked in last week, which could easily create a borked tree for many types of merges. Now, almost all merges should still leave the tree in good shape. All LS features will work in the first section, and lexical classification will work in the second.
2026-05-17 21:06:50 -05:00 · 2014-12-18 19:18:13 -08:00
parent 828b33aae7
commit 48bef4698b
7 changed files with 270 additions and 163 deletions
--- a/src/compiler/scanner.ts
+++ b/src/compiler/scanner.ts
@@ -7,10 +7,6 @@ module ts {
        (message: DiagnosticMessage, length: number): void;
    }

-    export interface CommentCallback {
-        (pos: number, end: number): void;
-    }
-
    export interface Scanner {
        getStartPos(): number;
        getToken(): SyntaxKind;
@@ -396,8 +392,10 @@ module ts {
    var mergeConflictMarkerLength = "<<<<<<<".length;

    function isConflictMarkerTrivia(text: string, pos: number) {
+        Debug.assert(pos >= 0);
+
        // Conflict markers must be at the start of a line.
-        if (pos > 0 && isLineBreak(text.charCodeAt(pos - 1))) {
+        if (pos === 0 || isLineBreak(text.charCodeAt(pos - 1))) {
            var ch = text.charCodeAt(pos);

            if ((pos + mergeConflictMarkerLength) < text.length) {
@@ -415,10 +413,31 @@ module ts {
        return false;
    }

-    function scanConflictMarkerTrivia(text: string, pos: number) {
-        var len = text.length;
-        while (pos < len && !isLineBreak(text.charCodeAt(pos))) {
-            pos++;
+    function scanConflictMarkerTrivia(text: string, pos: number, error?: ErrorCallback) {
+        if (error) {
+            error(Diagnostics.Merge_conflict_marker_encountered, mergeConflictMarkerLength);
+        }
+
+        var ch = text.charCodeAt(pos);
+        if (ch === CharacterCodes.lessThan || ch === CharacterCodes.greaterThan) {
+            var len = text.length;
+            while (pos < len && !isLineBreak(text.charCodeAt(pos))) {
+                pos++;
+            }
+        }
+        else {
+            Debug.assert(ch === CharacterCodes.equals);
+            // Consume everything from the start of the mid-conlict marker to the start of the next
+            // end-conflict marker.
+            var len = text.length;
+            while (pos < len) {
+                var ch = text.charCodeAt(pos);
+                if (ch === CharacterCodes.greaterThan && isConflictMarkerTrivia(text, pos)) {
+                    break;
+                }
+
+                pos++;
+            }
        }

        return pos;
@@ -1057,8 +1076,7 @@ module ts {
                        return pos++, token = SyntaxKind.SemicolonToken;
                    case CharacterCodes.lessThan:
                        if (isConflictMarkerTrivia(text, pos)) {
-                            mergeConflictError();
-                            pos = scanConflictMarkerTrivia(text, pos);
+                            pos = scanConflictMarkerTrivia(text, pos, error);
                            if (skipTrivia) {
                                continue;
                            }
@@ -1079,8 +1097,7 @@ module ts {
                        return pos++, token = SyntaxKind.LessThanToken;
                    case CharacterCodes.equals:
                        if (isConflictMarkerTrivia(text, pos)) {
-                            mergeConflictError();
-                            pos = scanConflictMarkerTrivia(text, pos);
+                            pos = scanConflictMarkerTrivia(text, pos, error);
                            if (skipTrivia) {
                                continue;
                            }
@@ -1101,8 +1118,7 @@ module ts {
                        return pos++, token = SyntaxKind.EqualsToken;
                    case CharacterCodes.greaterThan:
                        if (isConflictMarkerTrivia(text, pos)) {
-                            mergeConflictError();
-                            pos = scanConflictMarkerTrivia(text, pos);
+                            pos = scanConflictMarkerTrivia(text, pos, error);
                            if (skipTrivia) {
                                continue;
                            }
@@ -1171,10 +1187,6 @@ module ts {
            }
        }

-        function mergeConflictError() {
-            error(Diagnostics.Merge_conflict_marker_encountered, mergeConflictMarkerLength);
-        }
-
        function reScanGreaterToken(): SyntaxKind {
            if (token === SyntaxKind.GreaterThanToken) {
                if (text.charCodeAt(pos) === CharacterCodes.greaterThan) {