mirror of
https://github.com/microsoft/TypeScript.git
synced 2026-02-04 21:53:42 -06:00
* Enable '--strictNullChecks' * Fix API baselines * Make sys.getEnvironmentVariable non-nullable * make properties optional instead of using `| undefined` in thier type * reportDiagnostics should be required * Declare firstAccessor as non-nullable * Make `some` a type guard * Fix `getEnvironmentVariable` definition in tests * Pretend transformFlags are always defined * Fix one more use of sys.getEnvironmentVariable * `requiredResponse` accepts undefined, remove assertions * Mark optional properties as optional instead of using `| undefined` * Mark optional properties as optional instead of using ` | undefined` * Remove unnecessary null assertions * Put the bang on the declaration instead of every use * Make `createMapFromTemplate` require a parameter * Mark `EmitResult.emittedFiles` and `EmitResult.sourceMaps` as optional * Plumb through undefined in emitLsit and EmitExpressionList * `ElementAccessExpression.argumentExpression` can not be `undefined` * Add overloads for `writeTokenText` * Make `shouldWriteSeparatingLineTerminator` argument non-nullable * Make `synthesizedNodeStartsOnNewLine` argument required * `PropertyAssignment.initializer` cannot be undefined * Use one `!` at declaration site instead of on every use site * Capture host in a constant and avoid null assertions * Remove few more unused assertions * Update baselines * Use parameter defaults * Update baselines * Fix lint * Make Symbol#valueDeclaration and Symbol#declarations non-optional to reduce assertions * Make Node#symbol and Type#symbol non-optional to reduce assertions * Make `flags` non-nullable to reduce assertions * Convert some asserts to type guards * Make `isNonLocalAlias` a type guard * Add overload for `getSymbolOfNode` for `Declaration` * Some more `getSymbolOfNode` changes * Push undefined suppression into `typeToTypeNodeHelper` * `NodeBuilderContext.tracker` is never `undefined` * use `Debug.assertDefined` * Remove unnecessary tag * Mark `LiteralType.freshType` and `LiteralTupe.regularType` as required
962 lines
47 KiB
TypeScript
962 lines
47 KiB
TypeScript
namespace ts {
|
|
export function createClassifier(): Classifier {
|
|
const scanner = createScanner(ScriptTarget.Latest, /*skipTrivia*/ false);
|
|
|
|
function getClassificationsForLine(text: string, lexState: EndOfLineState, syntacticClassifierAbsent: boolean): ClassificationResult {
|
|
return convertClassificationsToResult(getEncodedLexicalClassifications(text, lexState, syntacticClassifierAbsent), text);
|
|
}
|
|
|
|
// If there is a syntactic classifier ('syntacticClassifierAbsent' is false),
|
|
// we will be more conservative in order to avoid conflicting with the syntactic classifier.
|
|
function getEncodedLexicalClassifications(text: string, lexState: EndOfLineState, syntacticClassifierAbsent: boolean): Classifications {
|
|
let token = SyntaxKind.Unknown;
|
|
let lastNonTriviaToken = SyntaxKind.Unknown;
|
|
|
|
// Just a stack of TemplateHeads and OpenCurlyBraces, used to perform rudimentary (inexact)
|
|
// classification on template strings. Because of the context free nature of templates,
|
|
// the only precise way to classify a template portion would be by propagating the stack across
|
|
// lines, just as we do with the end-of-line state. However, this is a burden for implementers,
|
|
// and the behavior is entirely subsumed by the syntactic classifier anyway, so we instead
|
|
// flatten any nesting when the template stack is non-empty and encode it in the end-of-line state.
|
|
// Situations in which this fails are
|
|
// 1) When template strings are nested across different lines:
|
|
// `hello ${ `world
|
|
// ` }`
|
|
//
|
|
// Where on the second line, you will get the closing of a template,
|
|
// a closing curly, and a new template.
|
|
//
|
|
// 2) When substitution expressions have curly braces and the curly brace falls on the next line:
|
|
// `hello ${ () => {
|
|
// return "world" } } `
|
|
//
|
|
// Where on the second line, you will get the 'return' keyword,
|
|
// a string literal, and a template end consisting of '} } `'.
|
|
const templateStack: SyntaxKind[] = [];
|
|
|
|
const { prefix, pushTemplate } = getPrefixFromLexState(lexState);
|
|
text = prefix + text;
|
|
const offset = prefix.length;
|
|
if (pushTemplate) {
|
|
templateStack.push(SyntaxKind.TemplateHead);
|
|
}
|
|
|
|
scanner.setText(text);
|
|
|
|
let endOfLineState = EndOfLineState.None;
|
|
const spans: number[] = [];
|
|
|
|
// We can run into an unfortunate interaction between the lexical and syntactic classifier
|
|
// when the user is typing something generic. Consider the case where the user types:
|
|
//
|
|
// Foo<number
|
|
//
|
|
// From the lexical classifier's perspective, 'number' is a keyword, and so the word will
|
|
// be classified as such. However, from the syntactic classifier's tree-based perspective
|
|
// this is simply an expression with the identifier 'number' on the RHS of the less than
|
|
// token. So the classification will go back to being an identifier. The moment the user
|
|
// types again, number will become a keyword, then an identifier, etc. etc.
|
|
//
|
|
// To try to avoid this problem, we avoid classifying contextual keywords as keywords
|
|
// when the user is potentially typing something generic. We just can't do a good enough
|
|
// job at the lexical level, and so well leave it up to the syntactic classifier to make
|
|
// the determination.
|
|
//
|
|
// In order to determine if the user is potentially typing something generic, we use a
|
|
// weak heuristic where we track < and > tokens. It's a weak heuristic, but should
|
|
// work well enough in practice.
|
|
let angleBracketStack = 0;
|
|
|
|
do {
|
|
token = scanner.scan();
|
|
if (!isTrivia(token)) {
|
|
handleToken();
|
|
lastNonTriviaToken = token;
|
|
}
|
|
const end = scanner.getTextPos();
|
|
pushEncodedClassification(scanner.getTokenPos(), end, offset, classFromKind(token), spans);
|
|
if (end >= text.length) {
|
|
const end = getNewEndOfLineState(scanner, token, lastOrUndefined(templateStack));
|
|
if (end !== undefined) {
|
|
endOfLineState = end;
|
|
}
|
|
}
|
|
} while (token !== SyntaxKind.EndOfFileToken);
|
|
|
|
function handleToken(): void {
|
|
switch (token) {
|
|
case SyntaxKind.SlashToken:
|
|
case SyntaxKind.SlashEqualsToken:
|
|
if (!noRegexTable[lastNonTriviaToken] && scanner.reScanSlashToken() === SyntaxKind.RegularExpressionLiteral) {
|
|
token = SyntaxKind.RegularExpressionLiteral;
|
|
}
|
|
break;
|
|
case SyntaxKind.LessThanToken:
|
|
if (lastNonTriviaToken === SyntaxKind.Identifier) {
|
|
// Could be the start of something generic. Keep track of that by bumping
|
|
// up the current count of generic contexts we may be in.
|
|
angleBracketStack++;
|
|
}
|
|
break;
|
|
case SyntaxKind.GreaterThanToken:
|
|
if (angleBracketStack > 0) {
|
|
// If we think we're currently in something generic, then mark that that
|
|
// generic entity is complete.
|
|
angleBracketStack--;
|
|
}
|
|
break;
|
|
case SyntaxKind.AnyKeyword:
|
|
case SyntaxKind.StringKeyword:
|
|
case SyntaxKind.NumberKeyword:
|
|
case SyntaxKind.BooleanKeyword:
|
|
case SyntaxKind.SymbolKeyword:
|
|
if (angleBracketStack > 0 && !syntacticClassifierAbsent) {
|
|
// If it looks like we're could be in something generic, don't classify this
|
|
// as a keyword. We may just get overwritten by the syntactic classifier,
|
|
// causing a noisy experience for the user.
|
|
token = SyntaxKind.Identifier;
|
|
}
|
|
break;
|
|
case SyntaxKind.TemplateHead:
|
|
templateStack.push(token);
|
|
break;
|
|
case SyntaxKind.OpenBraceToken:
|
|
// If we don't have anything on the template stack,
|
|
// then we aren't trying to keep track of a previously scanned template head.
|
|
if (templateStack.length > 0) {
|
|
templateStack.push(token);
|
|
}
|
|
break;
|
|
case SyntaxKind.CloseBraceToken:
|
|
// If we don't have anything on the template stack,
|
|
// then we aren't trying to keep track of a previously scanned template head.
|
|
if (templateStack.length > 0) {
|
|
const lastTemplateStackToken = lastOrUndefined(templateStack);
|
|
|
|
if (lastTemplateStackToken === SyntaxKind.TemplateHead) {
|
|
token = scanner.reScanTemplateToken();
|
|
|
|
// Only pop on a TemplateTail; a TemplateMiddle indicates there is more for us.
|
|
if (token === SyntaxKind.TemplateTail) {
|
|
templateStack.pop();
|
|
}
|
|
else {
|
|
Debug.assertEqual(token, SyntaxKind.TemplateMiddle, "Should have been a template middle.");
|
|
}
|
|
}
|
|
else {
|
|
Debug.assertEqual(lastTemplateStackToken, SyntaxKind.OpenBraceToken, "Should have been an open brace");
|
|
templateStack.pop();
|
|
}
|
|
}
|
|
break;
|
|
default:
|
|
if (!isKeyword(token)) {
|
|
break;
|
|
}
|
|
|
|
if (lastNonTriviaToken === SyntaxKind.DotToken) {
|
|
token = SyntaxKind.Identifier;
|
|
}
|
|
else if (isKeyword(lastNonTriviaToken) && isKeyword(token) && !canFollow(lastNonTriviaToken, token)) {
|
|
// We have two keywords in a row. Only treat the second as a keyword if
|
|
// it's a sequence that could legally occur in the language. Otherwise
|
|
// treat it as an identifier. This way, if someone writes "private var"
|
|
// we recognize that 'var' is actually an identifier here.
|
|
token = SyntaxKind.Identifier;
|
|
}
|
|
}
|
|
}
|
|
|
|
return { endOfLineState, spans };
|
|
}
|
|
|
|
return { getClassificationsForLine, getEncodedLexicalClassifications };
|
|
}
|
|
|
|
/// We do not have a full parser support to know when we should parse a regex or not
|
|
/// If we consider every slash token to be a regex, we could be missing cases like "1/2/3", where
|
|
/// we have a series of divide operator. this list allows us to be more accurate by ruling out
|
|
/// locations where a regexp cannot exist.
|
|
const noRegexTable: true[] = arrayToNumericMap<SyntaxKind, true>([
|
|
SyntaxKind.Identifier,
|
|
SyntaxKind.StringLiteral,
|
|
SyntaxKind.NumericLiteral,
|
|
SyntaxKind.RegularExpressionLiteral,
|
|
SyntaxKind.ThisKeyword,
|
|
SyntaxKind.PlusPlusToken,
|
|
SyntaxKind.MinusMinusToken,
|
|
SyntaxKind.CloseParenToken,
|
|
SyntaxKind.CloseBracketToken,
|
|
SyntaxKind.CloseBraceToken,
|
|
SyntaxKind.TrueKeyword,
|
|
SyntaxKind.FalseKeyword,
|
|
], token => token, () => true);
|
|
|
|
function getNewEndOfLineState(scanner: Scanner, token: SyntaxKind, lastOnTemplateStack: SyntaxKind | undefined): EndOfLineState | undefined {
|
|
switch (token) {
|
|
case SyntaxKind.StringLiteral: {
|
|
// Check to see if we finished up on a multiline string literal.
|
|
if (!scanner.isUnterminated()) return undefined;
|
|
|
|
const tokenText = scanner.getTokenText();
|
|
const lastCharIndex = tokenText.length - 1;
|
|
let numBackslashes = 0;
|
|
while (tokenText.charCodeAt(lastCharIndex - numBackslashes) === CharacterCodes.backslash) {
|
|
numBackslashes++;
|
|
}
|
|
|
|
// If we have an odd number of backslashes, then the multiline string is unclosed
|
|
if ((numBackslashes & 1) === 0) return undefined;
|
|
return tokenText.charCodeAt(0) === CharacterCodes.doubleQuote ? EndOfLineState.InDoubleQuoteStringLiteral : EndOfLineState.InSingleQuoteStringLiteral;
|
|
}
|
|
case SyntaxKind.MultiLineCommentTrivia:
|
|
// Check to see if the multiline comment was unclosed.
|
|
return scanner.isUnterminated() ? EndOfLineState.InMultiLineCommentTrivia : undefined;
|
|
default:
|
|
if (isTemplateLiteralKind(token)) {
|
|
if (!scanner.isUnterminated()) {
|
|
return undefined;
|
|
}
|
|
switch (token) {
|
|
case SyntaxKind.TemplateTail:
|
|
return EndOfLineState.InTemplateMiddleOrTail;
|
|
case SyntaxKind.NoSubstitutionTemplateLiteral:
|
|
return EndOfLineState.InTemplateHeadOrNoSubstitutionTemplate;
|
|
default:
|
|
return Debug.fail("Only 'NoSubstitutionTemplateLiteral's and 'TemplateTail's can be unterminated; got SyntaxKind #" + token);
|
|
}
|
|
}
|
|
return lastOnTemplateStack === SyntaxKind.TemplateHead ? EndOfLineState.InTemplateSubstitutionPosition : undefined;
|
|
}
|
|
}
|
|
|
|
function pushEncodedClassification(start: number, end: number, offset: number, classification: ClassificationType, result: Push<number>): void {
|
|
if (classification === ClassificationType.whiteSpace) {
|
|
// Don't bother with whitespace classifications. They're not needed.
|
|
return;
|
|
}
|
|
|
|
if (start === 0 && offset > 0) {
|
|
// We're classifying the first token, and this was a case where we prepended text.
|
|
// We should consider the start of this token to be at the start of the original text.
|
|
start += offset;
|
|
}
|
|
|
|
const length = end - start;
|
|
if (length > 0) {
|
|
// All our tokens are in relation to the augmented text. Move them back to be
|
|
// relative to the original text.
|
|
result.push(start - offset, length, classification);
|
|
}
|
|
}
|
|
|
|
function convertClassificationsToResult(classifications: Classifications, text: string): ClassificationResult {
|
|
const entries: ClassificationInfo[] = [];
|
|
const dense = classifications.spans;
|
|
let lastEnd = 0;
|
|
|
|
for (let i = 0; i < dense.length; i += 3) {
|
|
const start = dense[i];
|
|
const length = dense[i + 1];
|
|
const type = <ClassificationType>dense[i + 2];
|
|
|
|
// Make a whitespace entry between the last item and this one.
|
|
if (lastEnd >= 0) {
|
|
const whitespaceLength = start - lastEnd;
|
|
if (whitespaceLength > 0) {
|
|
entries.push({ length: whitespaceLength, classification: TokenClass.Whitespace });
|
|
}
|
|
}
|
|
|
|
entries.push({ length, classification: convertClassification(type) });
|
|
lastEnd = start + length;
|
|
}
|
|
|
|
const whitespaceLength = text.length - lastEnd;
|
|
if (whitespaceLength > 0) {
|
|
entries.push({ length: whitespaceLength, classification: TokenClass.Whitespace });
|
|
}
|
|
|
|
return { entries, finalLexState: classifications.endOfLineState };
|
|
}
|
|
|
|
function convertClassification(type: ClassificationType): TokenClass {
|
|
switch (type) {
|
|
case ClassificationType.comment: return TokenClass.Comment;
|
|
case ClassificationType.keyword: return TokenClass.Keyword;
|
|
case ClassificationType.numericLiteral: return TokenClass.NumberLiteral;
|
|
case ClassificationType.operator: return TokenClass.Operator;
|
|
case ClassificationType.stringLiteral: return TokenClass.StringLiteral;
|
|
case ClassificationType.whiteSpace: return TokenClass.Whitespace;
|
|
case ClassificationType.punctuation: return TokenClass.Punctuation;
|
|
case ClassificationType.identifier:
|
|
case ClassificationType.className:
|
|
case ClassificationType.enumName:
|
|
case ClassificationType.interfaceName:
|
|
case ClassificationType.moduleName:
|
|
case ClassificationType.typeParameterName:
|
|
case ClassificationType.typeAliasName:
|
|
case ClassificationType.text:
|
|
case ClassificationType.parameterName:
|
|
return TokenClass.Identifier;
|
|
default:
|
|
return undefined!; // TODO: GH#18217 Debug.assertNever(type);
|
|
}
|
|
}
|
|
|
|
/** Returns true if 'keyword2' can legally follow 'keyword1' in any language construct. */
|
|
function canFollow(keyword1: SyntaxKind, keyword2: SyntaxKind): boolean {
|
|
if (!isAccessibilityModifier(keyword1)) {
|
|
// Assume any other keyword combination is legal.
|
|
// This can be refined in the future if there are more cases we want the classifier to be better at.
|
|
return true;
|
|
}
|
|
switch (keyword2) {
|
|
case SyntaxKind.GetKeyword:
|
|
case SyntaxKind.SetKeyword:
|
|
case SyntaxKind.ConstructorKeyword:
|
|
case SyntaxKind.StaticKeyword:
|
|
return true; // Allow things like "public get", "public constructor" and "public static".
|
|
default:
|
|
return false; // Any other keyword following "public" is actually an identifier, not a real keyword.
|
|
}
|
|
}
|
|
|
|
function getPrefixFromLexState(lexState: EndOfLineState): { readonly prefix: string, readonly pushTemplate?: true } {
|
|
// If we're in a string literal, then prepend: "\
|
|
// (and a newline). That way when we lex we'll think we're still in a string literal.
|
|
//
|
|
// If we're in a multiline comment, then prepend: /*
|
|
// (and a newline). That way when we lex we'll think we're still in a multiline comment.
|
|
switch (lexState) {
|
|
case EndOfLineState.InDoubleQuoteStringLiteral:
|
|
return { prefix: "\"\\\n" };
|
|
case EndOfLineState.InSingleQuoteStringLiteral:
|
|
return { prefix: "'\\\n" };
|
|
case EndOfLineState.InMultiLineCommentTrivia:
|
|
return { prefix: "/*\n" };
|
|
case EndOfLineState.InTemplateHeadOrNoSubstitutionTemplate:
|
|
return { prefix: "`\n" };
|
|
case EndOfLineState.InTemplateMiddleOrTail:
|
|
return { prefix: "}\n", pushTemplate: true };
|
|
case EndOfLineState.InTemplateSubstitutionPosition:
|
|
return { prefix: "", pushTemplate: true };
|
|
case EndOfLineState.None:
|
|
return { prefix: "" };
|
|
default:
|
|
return Debug.assertNever(lexState);
|
|
}
|
|
}
|
|
|
|
function isBinaryExpressionOperatorToken(token: SyntaxKind): boolean {
|
|
switch (token) {
|
|
case SyntaxKind.AsteriskToken:
|
|
case SyntaxKind.SlashToken:
|
|
case SyntaxKind.PercentToken:
|
|
case SyntaxKind.PlusToken:
|
|
case SyntaxKind.MinusToken:
|
|
case SyntaxKind.LessThanLessThanToken:
|
|
case SyntaxKind.GreaterThanGreaterThanToken:
|
|
case SyntaxKind.GreaterThanGreaterThanGreaterThanToken:
|
|
case SyntaxKind.LessThanToken:
|
|
case SyntaxKind.GreaterThanToken:
|
|
case SyntaxKind.LessThanEqualsToken:
|
|
case SyntaxKind.GreaterThanEqualsToken:
|
|
case SyntaxKind.InstanceOfKeyword:
|
|
case SyntaxKind.InKeyword:
|
|
case SyntaxKind.AsKeyword:
|
|
case SyntaxKind.EqualsEqualsToken:
|
|
case SyntaxKind.ExclamationEqualsToken:
|
|
case SyntaxKind.EqualsEqualsEqualsToken:
|
|
case SyntaxKind.ExclamationEqualsEqualsToken:
|
|
case SyntaxKind.AmpersandToken:
|
|
case SyntaxKind.CaretToken:
|
|
case SyntaxKind.BarToken:
|
|
case SyntaxKind.AmpersandAmpersandToken:
|
|
case SyntaxKind.BarBarToken:
|
|
case SyntaxKind.BarEqualsToken:
|
|
case SyntaxKind.AmpersandEqualsToken:
|
|
case SyntaxKind.CaretEqualsToken:
|
|
case SyntaxKind.LessThanLessThanEqualsToken:
|
|
case SyntaxKind.GreaterThanGreaterThanEqualsToken:
|
|
case SyntaxKind.GreaterThanGreaterThanGreaterThanEqualsToken:
|
|
case SyntaxKind.PlusEqualsToken:
|
|
case SyntaxKind.MinusEqualsToken:
|
|
case SyntaxKind.AsteriskEqualsToken:
|
|
case SyntaxKind.SlashEqualsToken:
|
|
case SyntaxKind.PercentEqualsToken:
|
|
case SyntaxKind.EqualsToken:
|
|
case SyntaxKind.CommaToken:
|
|
return true;
|
|
default:
|
|
return false;
|
|
}
|
|
}
|
|
|
|
function isPrefixUnaryExpressionOperatorToken(token: SyntaxKind): boolean {
|
|
switch (token) {
|
|
case SyntaxKind.PlusToken:
|
|
case SyntaxKind.MinusToken:
|
|
case SyntaxKind.TildeToken:
|
|
case SyntaxKind.ExclamationToken:
|
|
case SyntaxKind.PlusPlusToken:
|
|
case SyntaxKind.MinusMinusToken:
|
|
return true;
|
|
default:
|
|
return false;
|
|
}
|
|
}
|
|
|
|
function classFromKind(token: SyntaxKind): ClassificationType {
|
|
if (isKeyword(token)) {
|
|
return ClassificationType.keyword;
|
|
}
|
|
else if (isBinaryExpressionOperatorToken(token) || isPrefixUnaryExpressionOperatorToken(token)) {
|
|
return ClassificationType.operator;
|
|
}
|
|
else if (token >= SyntaxKind.FirstPunctuation && token <= SyntaxKind.LastPunctuation) {
|
|
return ClassificationType.punctuation;
|
|
}
|
|
|
|
switch (token) {
|
|
case SyntaxKind.NumericLiteral:
|
|
return ClassificationType.numericLiteral;
|
|
case SyntaxKind.StringLiteral:
|
|
return ClassificationType.stringLiteral;
|
|
case SyntaxKind.RegularExpressionLiteral:
|
|
return ClassificationType.regularExpressionLiteral;
|
|
case SyntaxKind.ConflictMarkerTrivia:
|
|
case SyntaxKind.MultiLineCommentTrivia:
|
|
case SyntaxKind.SingleLineCommentTrivia:
|
|
return ClassificationType.comment;
|
|
case SyntaxKind.WhitespaceTrivia:
|
|
case SyntaxKind.NewLineTrivia:
|
|
return ClassificationType.whiteSpace;
|
|
case SyntaxKind.Identifier:
|
|
default:
|
|
if (isTemplateLiteralKind(token)) {
|
|
return ClassificationType.stringLiteral;
|
|
}
|
|
return ClassificationType.identifier;
|
|
}
|
|
}
|
|
|
|
/* @internal */
|
|
export function getSemanticClassifications(typeChecker: TypeChecker, cancellationToken: CancellationToken, sourceFile: SourceFile, classifiableNames: UnderscoreEscapedMap<true>, span: TextSpan): ClassifiedSpan[] {
|
|
return convertClassificationsToSpans(getEncodedSemanticClassifications(typeChecker, cancellationToken, sourceFile, classifiableNames, span));
|
|
}
|
|
|
|
function checkForClassificationCancellation(cancellationToken: CancellationToken, kind: SyntaxKind) {
|
|
// We don't want to actually call back into our host on every node to find out if we've
|
|
// been canceled. That would be an enormous amount of chattyness, along with the all
|
|
// the overhead of marshalling the data to/from the host. So instead we pick a few
|
|
// reasonable node kinds to bother checking on. These node kinds represent high level
|
|
// constructs that we would expect to see commonly, but just at a far less frequent
|
|
// interval.
|
|
//
|
|
// For example, in checker.ts (around 750k) we only have around 600 of these constructs.
|
|
// That means we're calling back into the host around every 1.2k of the file we process.
|
|
// Lib.d.ts has similar numbers.
|
|
switch (kind) {
|
|
case SyntaxKind.ModuleDeclaration:
|
|
case SyntaxKind.ClassDeclaration:
|
|
case SyntaxKind.InterfaceDeclaration:
|
|
case SyntaxKind.FunctionDeclaration:
|
|
cancellationToken.throwIfCancellationRequested();
|
|
}
|
|
}
|
|
|
|
/* @internal */
|
|
export function getEncodedSemanticClassifications(typeChecker: TypeChecker, cancellationToken: CancellationToken, sourceFile: SourceFile, classifiableNames: UnderscoreEscapedMap<true>, span: TextSpan): Classifications {
|
|
const spans: number[] = [];
|
|
sourceFile.forEachChild(function cb(node: Node): void {
|
|
// Only walk into nodes that intersect the requested span.
|
|
if (!node || !textSpanIntersectsWith(span, node.pos, node.getFullWidth())) {
|
|
return;
|
|
}
|
|
|
|
checkForClassificationCancellation(cancellationToken, node.kind);
|
|
// Only bother calling into the typechecker if this is an identifier that
|
|
// could possibly resolve to a type name. This makes classification run
|
|
// in a third of the time it would normally take.
|
|
if (isIdentifier(node) && !nodeIsMissing(node) && classifiableNames.has(node.escapedText)) {
|
|
const symbol = typeChecker.getSymbolAtLocation(node);
|
|
const type = symbol && classifySymbol(symbol, getMeaningFromLocation(node), typeChecker);
|
|
if (type) {
|
|
pushClassification(node.getStart(sourceFile), node.getEnd(), type);
|
|
}
|
|
}
|
|
|
|
node.forEachChild(cb);
|
|
});
|
|
return { spans, endOfLineState: EndOfLineState.None };
|
|
|
|
function pushClassification(start: number, end: number, type: ClassificationType): void {
|
|
spans.push(start);
|
|
spans.push(end - start);
|
|
spans.push(type);
|
|
}
|
|
}
|
|
|
|
function classifySymbol(symbol: Symbol, meaningAtPosition: SemanticMeaning, checker: TypeChecker): ClassificationType | undefined {
|
|
const flags = symbol.getFlags();
|
|
if ((flags & SymbolFlags.Classifiable) === SymbolFlags.None) {
|
|
return undefined;
|
|
}
|
|
else if (flags & SymbolFlags.Class) {
|
|
return ClassificationType.className;
|
|
}
|
|
else if (flags & SymbolFlags.Enum) {
|
|
return ClassificationType.enumName;
|
|
}
|
|
else if (flags & SymbolFlags.TypeAlias) {
|
|
return ClassificationType.typeAliasName;
|
|
}
|
|
else if (flags & SymbolFlags.Module) {
|
|
// Only classify a module as such if
|
|
// - It appears in a namespace context.
|
|
// - There exists a module declaration which actually impacts the value side.
|
|
return meaningAtPosition & SemanticMeaning.Namespace || meaningAtPosition & SemanticMeaning.Value && hasValueSideModule(symbol) ? ClassificationType.moduleName : undefined;
|
|
}
|
|
else if (flags & SymbolFlags.Alias) {
|
|
return classifySymbol(checker.getAliasedSymbol(symbol), meaningAtPosition, checker);
|
|
}
|
|
else if (meaningAtPosition & SemanticMeaning.Type) {
|
|
return flags & SymbolFlags.Interface ? ClassificationType.interfaceName : flags & SymbolFlags.TypeParameter ? ClassificationType.typeParameterName : undefined;
|
|
}
|
|
else {
|
|
return undefined;
|
|
}
|
|
}
|
|
|
|
/** Returns true if there exists a module that introduces entities on the value side. */
|
|
function hasValueSideModule(symbol: Symbol): boolean {
|
|
return some(symbol.declarations, declaration =>
|
|
isModuleDeclaration(declaration) && getModuleInstanceState(declaration) === ModuleInstanceState.Instantiated);
|
|
}
|
|
|
|
function getClassificationTypeName(type: ClassificationType): ClassificationTypeNames {
|
|
switch (type) {
|
|
case ClassificationType.comment: return ClassificationTypeNames.comment;
|
|
case ClassificationType.identifier: return ClassificationTypeNames.identifier;
|
|
case ClassificationType.keyword: return ClassificationTypeNames.keyword;
|
|
case ClassificationType.numericLiteral: return ClassificationTypeNames.numericLiteral;
|
|
case ClassificationType.operator: return ClassificationTypeNames.operator;
|
|
case ClassificationType.stringLiteral: return ClassificationTypeNames.stringLiteral;
|
|
case ClassificationType.whiteSpace: return ClassificationTypeNames.whiteSpace;
|
|
case ClassificationType.text: return ClassificationTypeNames.text;
|
|
case ClassificationType.punctuation: return ClassificationTypeNames.punctuation;
|
|
case ClassificationType.className: return ClassificationTypeNames.className;
|
|
case ClassificationType.enumName: return ClassificationTypeNames.enumName;
|
|
case ClassificationType.interfaceName: return ClassificationTypeNames.interfaceName;
|
|
case ClassificationType.moduleName: return ClassificationTypeNames.moduleName;
|
|
case ClassificationType.typeParameterName: return ClassificationTypeNames.typeParameterName;
|
|
case ClassificationType.typeAliasName: return ClassificationTypeNames.typeAliasName;
|
|
case ClassificationType.parameterName: return ClassificationTypeNames.parameterName;
|
|
case ClassificationType.docCommentTagName: return ClassificationTypeNames.docCommentTagName;
|
|
case ClassificationType.jsxOpenTagName: return ClassificationTypeNames.jsxOpenTagName;
|
|
case ClassificationType.jsxCloseTagName: return ClassificationTypeNames.jsxCloseTagName;
|
|
case ClassificationType.jsxSelfClosingTagName: return ClassificationTypeNames.jsxSelfClosingTagName;
|
|
case ClassificationType.jsxAttribute: return ClassificationTypeNames.jsxAttribute;
|
|
case ClassificationType.jsxText: return ClassificationTypeNames.jsxText;
|
|
case ClassificationType.jsxAttributeStringLiteralValue: return ClassificationTypeNames.jsxAttributeStringLiteralValue;
|
|
default: return undefined!; // TODO: GH#18217 throw Debug.assertNever(type);
|
|
}
|
|
}
|
|
|
|
function convertClassificationsToSpans(classifications: Classifications): ClassifiedSpan[] {
|
|
Debug.assert(classifications.spans.length % 3 === 0);
|
|
const dense = classifications.spans;
|
|
const result: ClassifiedSpan[] = [];
|
|
for (let i = 0; i < dense.length; i += 3) {
|
|
result.push({
|
|
textSpan: createTextSpan(dense[i], dense[i + 1]),
|
|
classificationType: getClassificationTypeName(dense[i + 2])
|
|
});
|
|
}
|
|
|
|
return result;
|
|
}
|
|
|
|
/* @internal */
|
|
export function getSyntacticClassifications(cancellationToken: CancellationToken, sourceFile: SourceFile, span: TextSpan): ClassifiedSpan[] {
|
|
return convertClassificationsToSpans(getEncodedSyntacticClassifications(cancellationToken, sourceFile, span));
|
|
}
|
|
|
|
/* @internal */
|
|
export function getEncodedSyntacticClassifications(cancellationToken: CancellationToken, sourceFile: SourceFile, span: TextSpan): Classifications {
|
|
const spanStart = span.start;
|
|
const spanLength = span.length;
|
|
|
|
// Make a scanner we can get trivia from.
|
|
const triviaScanner = createScanner(ScriptTarget.Latest, /*skipTrivia*/ false, sourceFile.languageVariant, sourceFile.text);
|
|
const mergeConflictScanner = createScanner(ScriptTarget.Latest, /*skipTrivia*/ false, sourceFile.languageVariant, sourceFile.text);
|
|
|
|
const result: number[] = [];
|
|
processElement(sourceFile);
|
|
|
|
return { spans: result, endOfLineState: EndOfLineState.None };
|
|
|
|
function pushClassification(start: number, length: number, type: ClassificationType) {
|
|
result.push(start);
|
|
result.push(length);
|
|
result.push(type);
|
|
}
|
|
|
|
function classifyLeadingTriviaAndGetTokenStart(token: Node): number {
|
|
triviaScanner.setTextPos(token.pos);
|
|
while (true) {
|
|
const start = triviaScanner.getTextPos();
|
|
// only bother scanning if we have something that could be trivia.
|
|
if (!couldStartTrivia(sourceFile.text, start)) {
|
|
return start;
|
|
}
|
|
|
|
const kind = triviaScanner.scan();
|
|
const end = triviaScanner.getTextPos();
|
|
const width = end - start;
|
|
|
|
// The moment we get something that isn't trivia, then stop processing.
|
|
if (!isTrivia(kind)) {
|
|
return start;
|
|
}
|
|
|
|
switch (kind) {
|
|
case SyntaxKind.NewLineTrivia:
|
|
case SyntaxKind.WhitespaceTrivia:
|
|
// Don't bother with newlines/whitespace.
|
|
continue;
|
|
|
|
case SyntaxKind.SingleLineCommentTrivia:
|
|
case SyntaxKind.MultiLineCommentTrivia:
|
|
// Only bother with the trivia if it at least intersects the span of interest.
|
|
classifyComment(token, kind, start, width);
|
|
|
|
// Classifying a comment might cause us to reuse the trivia scanner
|
|
// (because of jsdoc comments). So after we classify the comment make
|
|
// sure we set the scanner position back to where it needs to be.
|
|
triviaScanner.setTextPos(end);
|
|
continue;
|
|
|
|
case SyntaxKind.ConflictMarkerTrivia:
|
|
const text = sourceFile.text;
|
|
const ch = text.charCodeAt(start);
|
|
|
|
// for the <<<<<<< and >>>>>>> markers, we just add them in as comments
|
|
// in the classification stream.
|
|
if (ch === CharacterCodes.lessThan || ch === CharacterCodes.greaterThan) {
|
|
pushClassification(start, width, ClassificationType.comment);
|
|
continue;
|
|
}
|
|
|
|
// for the ||||||| and ======== markers, add a comment for the first line,
|
|
// and then lex all subsequent lines up until the end of the conflict marker.
|
|
Debug.assert(ch === CharacterCodes.bar || ch === CharacterCodes.equals);
|
|
classifyDisabledMergeCode(text, start, end);
|
|
break;
|
|
|
|
case SyntaxKind.ShebangTrivia:
|
|
// TODO: Maybe we should classify these.
|
|
break;
|
|
|
|
default:
|
|
Debug.assertNever(kind);
|
|
}
|
|
}
|
|
}
|
|
|
|
function classifyComment(token: Node, kind: SyntaxKind, start: number, width: number) {
|
|
if (kind === SyntaxKind.MultiLineCommentTrivia) {
|
|
// See if this is a doc comment. If so, we'll classify certain portions of it
|
|
// specially.
|
|
const docCommentAndDiagnostics = parseIsolatedJSDocComment(sourceFile.text, start, width);
|
|
if (docCommentAndDiagnostics && docCommentAndDiagnostics.jsDoc) {
|
|
// TODO: This should be predicated on `token["kind"]` being compatible with `HasJSDoc["kind"]`
|
|
docCommentAndDiagnostics.jsDoc.parent = token as HasJSDoc;
|
|
classifyJSDocComment(docCommentAndDiagnostics.jsDoc);
|
|
return;
|
|
}
|
|
}
|
|
|
|
// Simple comment. Just add as is.
|
|
pushCommentRange(start, width);
|
|
}
|
|
|
|
function pushCommentRange(start: number, width: number) {
|
|
pushClassification(start, width, ClassificationType.comment);
|
|
}
|
|
|
|
function classifyJSDocComment(docComment: JSDoc) {
|
|
let pos = docComment.pos;
|
|
|
|
if (docComment.tags) {
|
|
for (const tag of docComment.tags) {
|
|
// As we walk through each tag, classify the portion of text from the end of
|
|
// the last tag (or the start of the entire doc comment) as 'comment'.
|
|
if (tag.pos !== pos) {
|
|
pushCommentRange(pos, tag.pos - pos);
|
|
}
|
|
|
|
pushClassification(tag.atToken.pos, tag.atToken.end - tag.atToken.pos, ClassificationType.punctuation); // "@"
|
|
pushClassification(tag.tagName.pos, tag.tagName.end - tag.tagName.pos, ClassificationType.docCommentTagName); // e.g. "param"
|
|
|
|
pos = tag.tagName.end;
|
|
|
|
switch (tag.kind) {
|
|
case SyntaxKind.JSDocParameterTag:
|
|
processJSDocParameterTag(<JSDocParameterTag>tag);
|
|
break;
|
|
case SyntaxKind.JSDocTemplateTag:
|
|
processJSDocTemplateTag(<JSDocTemplateTag>tag);
|
|
pos = tag.end;
|
|
break;
|
|
case SyntaxKind.JSDocTypeTag:
|
|
processElement((<JSDocTypeTag>tag).typeExpression);
|
|
pos = tag.end;
|
|
break;
|
|
case SyntaxKind.JSDocReturnTag:
|
|
processElement((<JSDocReturnTag>tag).typeExpression);
|
|
pos = tag.end;
|
|
break;
|
|
}
|
|
}
|
|
}
|
|
|
|
if (pos !== docComment.end) {
|
|
pushCommentRange(pos, docComment.end - pos);
|
|
}
|
|
|
|
return;
|
|
|
|
function processJSDocParameterTag(tag: JSDocParameterTag) {
|
|
if (tag.isNameFirst) {
|
|
pushCommentRange(pos, tag.name.pos - pos);
|
|
pushClassification(tag.name.pos, tag.name.end - tag.name.pos, ClassificationType.parameterName);
|
|
pos = tag.name.end;
|
|
}
|
|
|
|
if (tag.typeExpression) {
|
|
pushCommentRange(pos, tag.typeExpression.pos - pos);
|
|
processElement(tag.typeExpression);
|
|
pos = tag.typeExpression.end;
|
|
}
|
|
|
|
if (!tag.isNameFirst) {
|
|
pushCommentRange(pos, tag.name.pos - pos);
|
|
pushClassification(tag.name.pos, tag.name.end - tag.name.pos, ClassificationType.parameterName);
|
|
pos = tag.name.end;
|
|
}
|
|
}
|
|
}
|
|
|
|
function processJSDocTemplateTag(tag: JSDocTemplateTag) {
|
|
for (const child of tag.getChildren()) {
|
|
processElement(child);
|
|
}
|
|
}
|
|
|
|
function classifyDisabledMergeCode(text: string, start: number, end: number) {
|
|
// Classify the line that the ||||||| or ======= marker is on as a comment.
|
|
// Then just lex all further tokens and add them to the result.
|
|
let i: number;
|
|
for (i = start; i < end; i++) {
|
|
if (isLineBreak(text.charCodeAt(i))) {
|
|
break;
|
|
}
|
|
}
|
|
pushClassification(start, i - start, ClassificationType.comment);
|
|
mergeConflictScanner.setTextPos(i);
|
|
|
|
while (mergeConflictScanner.getTextPos() < end) {
|
|
classifyDisabledCodeToken();
|
|
}
|
|
}
|
|
|
|
function classifyDisabledCodeToken() {
|
|
const start = mergeConflictScanner.getTextPos();
|
|
const tokenKind = mergeConflictScanner.scan();
|
|
const end = mergeConflictScanner.getTextPos();
|
|
|
|
const type = classifyTokenType(tokenKind);
|
|
if (type) {
|
|
pushClassification(start, end - start, type);
|
|
}
|
|
}
|
|
|
|
/**
|
|
* Returns true if node should be treated as classified and no further processing is required.
|
|
* False will mean that node is not classified and traverse routine should recurse into node contents.
|
|
*/
|
|
function tryClassifyNode(node: Node): boolean {
|
|
if (isJSDoc(node)) {
|
|
return true;
|
|
}
|
|
|
|
if (nodeIsMissing(node)) {
|
|
return true;
|
|
}
|
|
|
|
const classifiedElementName = tryClassifyJsxElementName(node);
|
|
if (!isToken(node) && node.kind !== SyntaxKind.JsxText && classifiedElementName === undefined) {
|
|
return false;
|
|
}
|
|
|
|
const tokenStart = node.kind === SyntaxKind.JsxText ? node.pos : classifyLeadingTriviaAndGetTokenStart(node);
|
|
|
|
const tokenWidth = node.end - tokenStart;
|
|
Debug.assert(tokenWidth >= 0);
|
|
if (tokenWidth > 0) {
|
|
const type = classifiedElementName || classifyTokenType(node.kind, node);
|
|
if (type) {
|
|
pushClassification(tokenStart, tokenWidth, type);
|
|
}
|
|
}
|
|
|
|
return true;
|
|
}
|
|
|
|
function tryClassifyJsxElementName(token: Node): ClassificationType | undefined {
|
|
switch (token.parent && token.parent.kind) {
|
|
case SyntaxKind.JsxOpeningElement:
|
|
if ((<JsxOpeningElement>token.parent).tagName === token) {
|
|
return ClassificationType.jsxOpenTagName;
|
|
}
|
|
break;
|
|
case SyntaxKind.JsxClosingElement:
|
|
if ((<JsxClosingElement>token.parent).tagName === token) {
|
|
return ClassificationType.jsxCloseTagName;
|
|
}
|
|
break;
|
|
case SyntaxKind.JsxSelfClosingElement:
|
|
if ((<JsxSelfClosingElement>token.parent).tagName === token) {
|
|
return ClassificationType.jsxSelfClosingTagName;
|
|
}
|
|
break;
|
|
case SyntaxKind.JsxAttribute:
|
|
if ((<JsxAttribute>token.parent).name === token) {
|
|
return ClassificationType.jsxAttribute;
|
|
}
|
|
break;
|
|
}
|
|
return undefined;
|
|
}
|
|
|
|
// for accurate classification, the actual token should be passed in. however, for
|
|
// cases like 'disabled merge code' classification, we just get the token kind and
|
|
// classify based on that instead.
|
|
function classifyTokenType(tokenKind: SyntaxKind, token?: Node): ClassificationType | undefined {
|
|
if (isKeyword(tokenKind)) {
|
|
return ClassificationType.keyword;
|
|
}
|
|
|
|
// Special case `<` and `>`: If they appear in a generic context they are punctuation,
|
|
// not operators.
|
|
if (tokenKind === SyntaxKind.LessThanToken || tokenKind === SyntaxKind.GreaterThanToken) {
|
|
// If the node owning the token has a type argument list or type parameter list, then
|
|
// we can effectively assume that a '<' and '>' belong to those lists.
|
|
if (token && getTypeArgumentOrTypeParameterList(token.parent)) {
|
|
return ClassificationType.punctuation;
|
|
}
|
|
}
|
|
|
|
if (isPunctuation(tokenKind)) {
|
|
if (token) {
|
|
const parent = token.parent;
|
|
if (tokenKind === SyntaxKind.EqualsToken) {
|
|
// the '=' in a variable declaration is special cased here.
|
|
if (parent.kind === SyntaxKind.VariableDeclaration ||
|
|
parent.kind === SyntaxKind.PropertyDeclaration ||
|
|
parent.kind === SyntaxKind.Parameter ||
|
|
parent.kind === SyntaxKind.JsxAttribute) {
|
|
return ClassificationType.operator;
|
|
}
|
|
}
|
|
|
|
if (parent.kind === SyntaxKind.BinaryExpression ||
|
|
parent.kind === SyntaxKind.PrefixUnaryExpression ||
|
|
parent.kind === SyntaxKind.PostfixUnaryExpression ||
|
|
parent.kind === SyntaxKind.ConditionalExpression) {
|
|
return ClassificationType.operator;
|
|
}
|
|
}
|
|
|
|
return ClassificationType.punctuation;
|
|
}
|
|
else if (tokenKind === SyntaxKind.NumericLiteral) {
|
|
return ClassificationType.numericLiteral;
|
|
}
|
|
else if (tokenKind === SyntaxKind.StringLiteral) {
|
|
// TODO: GH#18217
|
|
return token!.parent.kind === SyntaxKind.JsxAttribute ? ClassificationType.jsxAttributeStringLiteralValue : ClassificationType.stringLiteral;
|
|
}
|
|
else if (tokenKind === SyntaxKind.RegularExpressionLiteral) {
|
|
// TODO: we should get another classification type for these literals.
|
|
return ClassificationType.stringLiteral;
|
|
}
|
|
else if (isTemplateLiteralKind(tokenKind)) {
|
|
// TODO (drosen): we should *also* get another classification type for these literals.
|
|
return ClassificationType.stringLiteral;
|
|
}
|
|
else if (tokenKind === SyntaxKind.JsxText) {
|
|
return ClassificationType.jsxText;
|
|
}
|
|
else if (tokenKind === SyntaxKind.Identifier) {
|
|
if (token) {
|
|
switch (token.parent.kind) {
|
|
case SyntaxKind.ClassDeclaration:
|
|
if ((<ClassDeclaration>token.parent).name === token) {
|
|
return ClassificationType.className;
|
|
}
|
|
return;
|
|
case SyntaxKind.TypeParameter:
|
|
if ((<TypeParameterDeclaration>token.parent).name === token) {
|
|
return ClassificationType.typeParameterName;
|
|
}
|
|
return;
|
|
case SyntaxKind.InterfaceDeclaration:
|
|
if ((<InterfaceDeclaration>token.parent).name === token) {
|
|
return ClassificationType.interfaceName;
|
|
}
|
|
return;
|
|
case SyntaxKind.EnumDeclaration:
|
|
if ((<EnumDeclaration>token.parent).name === token) {
|
|
return ClassificationType.enumName;
|
|
}
|
|
return;
|
|
case SyntaxKind.ModuleDeclaration:
|
|
if ((<ModuleDeclaration>token.parent).name === token) {
|
|
return ClassificationType.moduleName;
|
|
}
|
|
return;
|
|
case SyntaxKind.Parameter:
|
|
if ((<ParameterDeclaration>token.parent).name === token) {
|
|
return isThisIdentifier(token) ? ClassificationType.keyword : ClassificationType.parameterName;
|
|
}
|
|
return;
|
|
}
|
|
}
|
|
return ClassificationType.identifier;
|
|
}
|
|
}
|
|
|
|
function processElement(element: Node | undefined) {
|
|
if (!element) {
|
|
return;
|
|
}
|
|
|
|
// Ignore nodes that don't intersect the original span to classify.
|
|
if (decodedTextSpanIntersectsWith(spanStart, spanLength, element.pos, element.getFullWidth())) {
|
|
checkForClassificationCancellation(cancellationToken, element.kind);
|
|
|
|
for (const child of element.getChildren(sourceFile)) {
|
|
if (!tryClassifyNode(child)) {
|
|
// Recurse into our child nodes.
|
|
processElement(child);
|
|
}
|
|
}
|
|
}
|
|
}
|
|
}
|
|
}
|