mirror of
https://github.com/microsoft/vscode.git
synced 2026-04-11 03:35:04 -05:00
492 lines
11 KiB
TypeScript
492 lines
11 KiB
TypeScript
/*---------------------------------------------------------------------------------------------
|
|
* Copyright (c) Microsoft Corporation. All rights reserved.
|
|
* Licensed under the MIT License. See License.txt in the project root for license information.
|
|
*--------------------------------------------------------------------------------------------*/
|
|
'use strict';
|
|
|
|
import { JSONScanner, ScanError, SyntaxKind } from './scannerTypes';
|
|
|
|
/**
|
|
* Creates a JSON scanner on the given text.
|
|
* If ignoreTrivia is set, whitespaces or comments are ignored.
|
|
*/
|
|
export function createScanner(text: string, ignoreTrivia: boolean = false): JSONScanner {
|
|
|
|
const len = text.length;
|
|
let pos = 0,
|
|
value: string = '',
|
|
tokenOffset = 0,
|
|
token: SyntaxKind = SyntaxKind.Unknown,
|
|
lineNumber = 0,
|
|
lineStartOffset = 0,
|
|
tokenLineStartOffset = 0,
|
|
prevTokenLineStartOffset = 0,
|
|
scanError: ScanError = ScanError.None;
|
|
|
|
function scanHexDigits(count: number, exact?: boolean): number {
|
|
let digits = 0;
|
|
let value = 0;
|
|
while (digits < count || !exact) {
|
|
let ch = text.charCodeAt(pos);
|
|
if (ch >= CharacterCodes._0 && ch <= CharacterCodes._9) {
|
|
value = value * 16 + ch - CharacterCodes._0;
|
|
}
|
|
else if (ch >= CharacterCodes.A && ch <= CharacterCodes.F) {
|
|
value = value * 16 + ch - CharacterCodes.A + 10;
|
|
}
|
|
else if (ch >= CharacterCodes.a && ch <= CharacterCodes.f) {
|
|
value = value * 16 + ch - CharacterCodes.a + 10;
|
|
}
|
|
else {
|
|
break;
|
|
}
|
|
pos++;
|
|
digits++;
|
|
}
|
|
if (digits < count) {
|
|
value = -1;
|
|
}
|
|
return value;
|
|
}
|
|
|
|
function setPosition(newPosition: number) {
|
|
pos = newPosition;
|
|
value = '';
|
|
tokenOffset = 0;
|
|
token = SyntaxKind.Unknown;
|
|
scanError = ScanError.None;
|
|
}
|
|
|
|
function scanNumber(): string {
|
|
let start = pos;
|
|
if (text.charCodeAt(pos) === CharacterCodes._0) {
|
|
pos++;
|
|
} else {
|
|
pos++;
|
|
while (pos < text.length && isDigit(text.charCodeAt(pos))) {
|
|
pos++;
|
|
}
|
|
}
|
|
if (pos < text.length && text.charCodeAt(pos) === CharacterCodes.dot) {
|
|
pos++;
|
|
if (pos < text.length && isDigit(text.charCodeAt(pos))) {
|
|
pos++;
|
|
while (pos < text.length && isDigit(text.charCodeAt(pos))) {
|
|
pos++;
|
|
}
|
|
} else {
|
|
scanError = ScanError.UnexpectedEndOfNumber;
|
|
return text.substring(start, pos);
|
|
}
|
|
}
|
|
let end = pos;
|
|
if (pos < text.length && (text.charCodeAt(pos) === CharacterCodes.E || text.charCodeAt(pos) === CharacterCodes.e)) {
|
|
pos++;
|
|
if (pos < text.length && text.charCodeAt(pos) === CharacterCodes.plus || text.charCodeAt(pos) === CharacterCodes.minus) {
|
|
pos++;
|
|
}
|
|
if (pos < text.length && isDigit(text.charCodeAt(pos))) {
|
|
pos++;
|
|
while (pos < text.length && isDigit(text.charCodeAt(pos))) {
|
|
pos++;
|
|
}
|
|
end = pos;
|
|
} else {
|
|
scanError = ScanError.UnexpectedEndOfNumber;
|
|
}
|
|
}
|
|
return text.substring(start, end);
|
|
}
|
|
|
|
function scanString(): string {
|
|
|
|
let result = '',
|
|
start = pos;
|
|
|
|
while (true) {
|
|
if (pos >= len) {
|
|
result += text.substring(start, pos);
|
|
scanError = ScanError.UnexpectedEndOfString;
|
|
break;
|
|
}
|
|
const ch = text.charCodeAt(pos);
|
|
if (ch === CharacterCodes.doubleQuote) {
|
|
result += text.substring(start, pos);
|
|
pos++;
|
|
break;
|
|
}
|
|
if (ch === CharacterCodes.backslash) {
|
|
result += text.substring(start, pos);
|
|
pos++;
|
|
if (pos >= len) {
|
|
scanError = ScanError.UnexpectedEndOfString;
|
|
break;
|
|
}
|
|
const ch2 = text.charCodeAt(pos++);
|
|
switch (ch2) {
|
|
case CharacterCodes.doubleQuote:
|
|
result += '\"';
|
|
break;
|
|
case CharacterCodes.backslash:
|
|
result += '\\';
|
|
break;
|
|
case CharacterCodes.slash:
|
|
result += '/';
|
|
break;
|
|
case CharacterCodes.b:
|
|
result += '\b';
|
|
break;
|
|
case CharacterCodes.f:
|
|
result += '\f';
|
|
break;
|
|
case CharacterCodes.n:
|
|
result += '\n';
|
|
break;
|
|
case CharacterCodes.r:
|
|
result += '\r';
|
|
break;
|
|
case CharacterCodes.t:
|
|
result += '\t';
|
|
break;
|
|
case CharacterCodes.u:
|
|
const ch3 = scanHexDigits(4, true);
|
|
if (ch3 >= 0) {
|
|
result += String.fromCharCode(ch3);
|
|
} else {
|
|
scanError = ScanError.InvalidUnicode;
|
|
}
|
|
break;
|
|
default:
|
|
scanError = ScanError.InvalidEscapeCharacter;
|
|
}
|
|
start = pos;
|
|
continue;
|
|
}
|
|
if (ch >= 0 && ch <= 0x1f) {
|
|
if (isLineBreak(ch)) {
|
|
result += text.substring(start, pos);
|
|
scanError = ScanError.UnexpectedEndOfString;
|
|
break;
|
|
} else {
|
|
scanError = ScanError.InvalidCharacter;
|
|
// mark as error but continue with string
|
|
}
|
|
}
|
|
pos++;
|
|
}
|
|
return result;
|
|
}
|
|
|
|
function scanNext(): SyntaxKind {
|
|
|
|
value = '';
|
|
scanError = ScanError.None;
|
|
|
|
tokenOffset = pos;
|
|
lineStartOffset = lineNumber;
|
|
prevTokenLineStartOffset = tokenLineStartOffset;
|
|
|
|
if (pos >= len) {
|
|
// at the end
|
|
tokenOffset = len;
|
|
return token = SyntaxKind.EOF;
|
|
}
|
|
|
|
let code = text.charCodeAt(pos);
|
|
// trivia: whitespace
|
|
if (isWhiteSpace(code)) {
|
|
do {
|
|
pos++;
|
|
value += String.fromCharCode(code);
|
|
code = text.charCodeAt(pos);
|
|
} while (isWhiteSpace(code));
|
|
|
|
return token = SyntaxKind.Trivia;
|
|
}
|
|
|
|
// trivia: newlines
|
|
if (isLineBreak(code)) {
|
|
pos++;
|
|
value += String.fromCharCode(code);
|
|
if (code === CharacterCodes.carriageReturn && text.charCodeAt(pos) === CharacterCodes.lineFeed) {
|
|
pos++;
|
|
value += '\n';
|
|
}
|
|
lineNumber++;
|
|
tokenLineStartOffset = pos;
|
|
return token = SyntaxKind.LineBreakTrivia;
|
|
}
|
|
|
|
switch (code) {
|
|
// tokens: []{}:,
|
|
case CharacterCodes.openBrace:
|
|
pos++;
|
|
return token = SyntaxKind.OpenBraceToken;
|
|
case CharacterCodes.closeBrace:
|
|
pos++;
|
|
return token = SyntaxKind.CloseBraceToken;
|
|
case CharacterCodes.openBracket:
|
|
pos++;
|
|
return token = SyntaxKind.OpenBracketToken;
|
|
case CharacterCodes.closeBracket:
|
|
pos++;
|
|
return token = SyntaxKind.CloseBracketToken;
|
|
case CharacterCodes.colon:
|
|
pos++;
|
|
return token = SyntaxKind.ColonToken;
|
|
case CharacterCodes.comma:
|
|
pos++;
|
|
return token = SyntaxKind.CommaToken;
|
|
|
|
// strings
|
|
case CharacterCodes.doubleQuote:
|
|
pos++;
|
|
value = scanString();
|
|
return token = SyntaxKind.StringLiteral;
|
|
|
|
// comments
|
|
case CharacterCodes.slash:
|
|
const start = pos - 1;
|
|
// Single-line comment
|
|
if (text.charCodeAt(pos + 1) === CharacterCodes.slash) {
|
|
pos += 2;
|
|
|
|
while (pos < len) {
|
|
if (isLineBreak(text.charCodeAt(pos))) {
|
|
break;
|
|
}
|
|
pos++;
|
|
|
|
}
|
|
value = text.substring(start, pos);
|
|
return token = SyntaxKind.LineCommentTrivia;
|
|
}
|
|
|
|
// Multi-line comment
|
|
if (text.charCodeAt(pos + 1) === CharacterCodes.asterisk) {
|
|
pos += 2;
|
|
|
|
const safeLength = len - 1; // For lookahead.
|
|
let commentClosed = false;
|
|
while (pos < safeLength) {
|
|
const ch = text.charCodeAt(pos);
|
|
|
|
if (ch === CharacterCodes.asterisk && text.charCodeAt(pos + 1) === CharacterCodes.slash) {
|
|
pos += 2;
|
|
commentClosed = true;
|
|
break;
|
|
}
|
|
|
|
pos++;
|
|
|
|
if (isLineBreak(ch)) {
|
|
if (ch === CharacterCodes.carriageReturn && text.charCodeAt(pos) === CharacterCodes.lineFeed) {
|
|
pos++;
|
|
}
|
|
|
|
lineNumber++;
|
|
tokenLineStartOffset = pos;
|
|
}
|
|
}
|
|
|
|
if (!commentClosed) {
|
|
pos++;
|
|
scanError = ScanError.UnexpectedEndOfComment;
|
|
}
|
|
|
|
value = text.substring(start, pos);
|
|
return token = SyntaxKind.BlockCommentTrivia;
|
|
}
|
|
// just a single slash
|
|
value += String.fromCharCode(code);
|
|
pos++;
|
|
return token = SyntaxKind.Unknown;
|
|
|
|
// numbers
|
|
case CharacterCodes.minus:
|
|
value += String.fromCharCode(code);
|
|
pos++;
|
|
if (pos === len || !isDigit(text.charCodeAt(pos))) {
|
|
return token = SyntaxKind.Unknown;
|
|
}
|
|
// found a minus, followed by a number so
|
|
// we fall through to proceed with scanning
|
|
// numbers
|
|
case CharacterCodes._0:
|
|
case CharacterCodes._1:
|
|
case CharacterCodes._2:
|
|
case CharacterCodes._3:
|
|
case CharacterCodes._4:
|
|
case CharacterCodes._5:
|
|
case CharacterCodes._6:
|
|
case CharacterCodes._7:
|
|
case CharacterCodes._8:
|
|
case CharacterCodes._9:
|
|
value += scanNumber();
|
|
return token = SyntaxKind.NumericLiteral;
|
|
// literals and unknown symbols
|
|
default:
|
|
// is a literal? Read the full word.
|
|
while (pos < len && isUnknownContentCharacter(code)) {
|
|
pos++;
|
|
code = text.charCodeAt(pos);
|
|
}
|
|
if (tokenOffset !== pos) {
|
|
value = text.substring(tokenOffset, pos);
|
|
// keywords: true, false, null
|
|
switch (value) {
|
|
case 'true': return token = SyntaxKind.TrueKeyword;
|
|
case 'false': return token = SyntaxKind.FalseKeyword;
|
|
case 'null': return token = SyntaxKind.NullKeyword;
|
|
}
|
|
return token = SyntaxKind.Unknown;
|
|
}
|
|
// some
|
|
value += String.fromCharCode(code);
|
|
pos++;
|
|
return token = SyntaxKind.Unknown;
|
|
}
|
|
}
|
|
|
|
function isUnknownContentCharacter(code: CharacterCodes) {
|
|
if (isWhiteSpace(code) || isLineBreak(code)) {
|
|
return false;
|
|
}
|
|
switch (code) {
|
|
case CharacterCodes.closeBrace:
|
|
case CharacterCodes.closeBracket:
|
|
case CharacterCodes.openBrace:
|
|
case CharacterCodes.openBracket:
|
|
case CharacterCodes.doubleQuote:
|
|
case CharacterCodes.colon:
|
|
case CharacterCodes.comma:
|
|
case CharacterCodes.slash:
|
|
return false;
|
|
}
|
|
return true;
|
|
}
|
|
|
|
|
|
function scanNextNonTrivia(): SyntaxKind {
|
|
let result: SyntaxKind;
|
|
do {
|
|
result = scanNext();
|
|
} while (result >= SyntaxKind.LineCommentTrivia && result <= SyntaxKind.Trivia);
|
|
return result;
|
|
}
|
|
|
|
return {
|
|
setPosition: setPosition,
|
|
getPosition: () => pos,
|
|
scan: ignoreTrivia ? scanNextNonTrivia : scanNext,
|
|
getToken: () => token,
|
|
getTokenValue: () => value,
|
|
getTokenOffset: () => tokenOffset,
|
|
getTokenLength: () => pos - tokenOffset,
|
|
getTokenStartLine: () => lineStartOffset,
|
|
getTokenStartCharacter: () => tokenOffset - prevTokenLineStartOffset,
|
|
getTokenError: () => scanError,
|
|
};
|
|
}
|
|
|
|
function isWhiteSpace(ch: number): boolean {
|
|
return ch === CharacterCodes.space || ch === CharacterCodes.tab;
|
|
}
|
|
|
|
function isLineBreak(ch: number): boolean {
|
|
return ch === CharacterCodes.lineFeed || ch === CharacterCodes.carriageReturn;
|
|
}
|
|
|
|
function isDigit(ch: number): boolean {
|
|
return ch >= CharacterCodes._0 && ch <= CharacterCodes._9;
|
|
}
|
|
|
|
const enum CharacterCodes {
|
|
lineFeed = 0x0A, // \n
|
|
carriageReturn = 0x0D, // \r
|
|
|
|
space = 0x0020, // " "
|
|
|
|
_0 = 0x30,
|
|
_1 = 0x31,
|
|
_2 = 0x32,
|
|
_3 = 0x33,
|
|
_4 = 0x34,
|
|
_5 = 0x35,
|
|
_6 = 0x36,
|
|
_7 = 0x37,
|
|
_8 = 0x38,
|
|
_9 = 0x39,
|
|
|
|
a = 0x61,
|
|
b = 0x62,
|
|
c = 0x63,
|
|
d = 0x64,
|
|
e = 0x65,
|
|
f = 0x66,
|
|
g = 0x67,
|
|
h = 0x68,
|
|
i = 0x69,
|
|
j = 0x6A,
|
|
k = 0x6B,
|
|
l = 0x6C,
|
|
m = 0x6D,
|
|
n = 0x6E,
|
|
o = 0x6F,
|
|
p = 0x70,
|
|
q = 0x71,
|
|
r = 0x72,
|
|
s = 0x73,
|
|
t = 0x74,
|
|
u = 0x75,
|
|
v = 0x76,
|
|
w = 0x77,
|
|
x = 0x78,
|
|
y = 0x79,
|
|
z = 0x7A,
|
|
|
|
A = 0x41,
|
|
B = 0x42,
|
|
C = 0x43,
|
|
D = 0x44,
|
|
E = 0x45,
|
|
F = 0x46,
|
|
G = 0x47,
|
|
H = 0x48,
|
|
I = 0x49,
|
|
J = 0x4A,
|
|
K = 0x4B,
|
|
L = 0x4C,
|
|
M = 0x4D,
|
|
N = 0x4E,
|
|
O = 0x4F,
|
|
P = 0x50,
|
|
Q = 0x51,
|
|
R = 0x52,
|
|
S = 0x53,
|
|
T = 0x54,
|
|
U = 0x55,
|
|
V = 0x56,
|
|
W = 0x57,
|
|
X = 0x58,
|
|
Y = 0x59,
|
|
Z = 0x5a,
|
|
|
|
asterisk = 0x2A, // *
|
|
backslash = 0x5C, // \
|
|
closeBrace = 0x7D, // }
|
|
closeBracket = 0x5D, // ]
|
|
colon = 0x3A, // :
|
|
comma = 0x2C, // ,
|
|
dot = 0x2E, // .
|
|
doubleQuote = 0x22, // "
|
|
minus = 0x2D, // -
|
|
openBrace = 0x7B, // {
|
|
openBracket = 0x5B, // [
|
|
plus = 0x2B, // +
|
|
slash = 0x2F, // /
|
|
|
|
formFeed = 0x0C, // \f
|
|
tab = 0x09, // \t
|
|
}
|