Merge pull request #731 from Microsoft/acknowledgeTrivia

Use the 'skipTrivia' scanner flag for lexical classification
This commit is contained in:
Daniel Rosenwasser
2014-09-24 13:39:50 -04:00
3 changed files with 71 additions and 51 deletions

View File

@@ -228,7 +228,9 @@ module ts {
FirstPunctuation = OpenBraceToken,
LastPunctuation = CaretEqualsToken,
FirstToken = EndOfFileToken,
LastToken = StringKeyword
LastToken = StringKeyword,
FirstTriviaToken = SingleLineCommentTrivia,
LastTriviaToken = WhitespaceTrivia
}
export enum NodeFlags {

View File

@@ -4378,13 +4378,13 @@ module ts {
/// Classifier
export function createClassifier(host: Logger): Classifier {
var scanner: Scanner;
var noRegexTable: boolean[];
var scanner = createScanner(ScriptTarget.ES5, /*skipTrivia*/ false);
/// We do not have a full parser support to know when we should parse a regex or not
/// If we consider every slash token to be a regex, we could be missing cases like "1/2/3", where
/// we have a series of divide operator. this list allows us to be more accurate by ruling out
/// locations where a regexp cannot exist.
var noRegexTable: boolean[];
if (!noRegexTable) {
noRegexTable = [];
noRegexTable[SyntaxKind.Identifier] = true;
@@ -4404,8 +4404,7 @@ module ts {
function getClassificationsForLine(text: string, lexState: EndOfLineState): ClassificationResult {
var offset = 0;
var lastTokenOrCommentEnd = 0;
var lastToken = SyntaxKind.Unknown;
var inUnterminatedMultiLineComment = false;
var lastNonTriviaToken = SyntaxKind.Unknown;
// If we're in a string literal, then prepend: "\
// (and a newline). That way when we lex we'll think we're still in a string literal.
@@ -4427,27 +4426,31 @@ module ts {
break;
}
scanner.setText(text);
var result: ClassificationResult = {
finalLexState: EndOfLineState.Start,
entries: []
};
scanner = createScanner(ScriptTarget.ES5, /*skipTrivia*/ true, text, onError, processComment);
var token = SyntaxKind.Unknown;
do {
token = scanner.scan();
if ((token === SyntaxKind.SlashToken || token === SyntaxKind.SlashEqualsToken) && !noRegexTable[lastToken]) {
if ((token === SyntaxKind.SlashToken || token === SyntaxKind.SlashEqualsToken) && !noRegexTable[lastNonTriviaToken]) {
if (scanner.reScanSlashToken() === SyntaxKind.RegularExpressionLiteral) {
token = SyntaxKind.RegularExpressionLiteral;
}
}
else if (lastToken === SyntaxKind.DotToken) {
else if (lastNonTriviaToken === SyntaxKind.DotToken) {
token = SyntaxKind.Identifier;
}
lastToken = token;
// Only recall the token if it was *not* trivia.
if (!(SyntaxKind.FirstTriviaToken <= token && token <= SyntaxKind.LastTriviaToken)) {
lastNonTriviaToken = token;
}
processToken();
}
@@ -4455,35 +4458,17 @@ module ts {
return result;
function onError(message: DiagnosticMessage): void {
inUnterminatedMultiLineComment = message.key === Diagnostics.Asterisk_Slash_expected.key;
}
function processComment(start: number, end: number) {
// add Leading white spaces
addLeadingWhiteSpace(start, end);
// add the comment
addResult(end - start, TokenClass.Comment);
}
function processToken(): void {
var start = scanner.getTokenPos();
var end = scanner.getTextPos();
// add Leading white spaces
addLeadingWhiteSpace(start, end);
// add the token
addResult(end - start, classFromKind(token));
if (end >= text.length) {
// We're at the end.
if (inUnterminatedMultiLineComment) {
result.finalLexState = EndOfLineState.InMultiLineCommentTrivia;
}
else if (token === SyntaxKind.StringLiteral) {
if (token === SyntaxKind.StringLiteral) {
// Check to see if we finished up on a multiline string literal.
var tokenText = scanner.getTokenText();
if (tokenText.length > 0 && tokenText.charCodeAt(tokenText.length - 1) === CharacterCodes.backslash) {
var quoteChar = tokenText.charCodeAt(0);
@@ -4492,18 +4477,18 @@ module ts {
: EndOfLineState.InSingleQuoteStringLiteral;
}
}
else if (token === SyntaxKind.MultiLineCommentTrivia) {
// Check to see if the multiline comment was unclosed.
var tokenText = scanner.getTokenText()
if (!(tokenText.length > 3 && // need to avoid catching '/*/'
tokenText.charCodeAt(tokenText.length - 2) === CharacterCodes.asterisk &&
tokenText.charCodeAt(tokenText.length - 1) === CharacterCodes.slash)) {
result.finalLexState = EndOfLineState.InMultiLineCommentTrivia;
}
}
}
}
function addLeadingWhiteSpace(start: number, end: number): void {
if (start > lastTokenOrCommentEnd) {
addResult(start - lastTokenOrCommentEnd, TokenClass.Whitespace);
}
// Remember the end of the last token
lastTokenOrCommentEnd = end;
}
function addResult(length: number, classification: TokenClass): void {
if (length > 0) {
// If this is the first classification we're adding to the list, then remove any
@@ -4596,6 +4581,11 @@ module ts {
return TokenClass.StringLiteral;
case SyntaxKind.RegularExpressionLiteral:
return TokenClass.RegExpLiteral;
case SyntaxKind.MultiLineCommentTrivia:
case SyntaxKind.SingleLineCommentTrivia:
return TokenClass.Comment;
case SyntaxKind.WhitespaceTrivia:
return TokenClass.Whitespace;
case SyntaxKind.Identifier:
default:
return TokenClass.Identifier;