mirror of
https://github.com/microsoft/vscode.git
synced 2026-04-12 09:50:11 -05:00
* Allow invoking simulationMain with alternative action input * Address review comments: rename CLI opts, extract pipeline, fix correctness issues - Rename CLI options with --train- prefix (--train-input, --train-strategy, --train-out, --train-row-offset, --train-worker) and document all options - Extract runInputPipeline/runInputPipelineParallel to test/pipeline/trainPipeline.ts - Preserve original row index through parse/replay/prompt pipeline to fix sample numbering drift when rows are filtered out - Fix parseSuggestedEdit: use JSON.parse for escaped text, handle missing delimiter - Fix line number regex to accept optional space after | (WithoutSpace format) - Clamp concurrency to >= 1, type samples as ISample[], wrap dispose in try/finally - Gate verbose logging in loadAndParseInput behind verbose flag - Use splitLines from existing utility instead of local duplicate * move nes-datagen to a subcommand * more code reuse around setting promptStrategy and model config * Address review: use ResponseFormat, Limiter, assertNever, and raw messages * minor refactor runPipeline * finalize * use POT instead of custom code * move files from script/ to test/pipeline/ --------- Co-authored-by: ulugbekna <ulugbekna@gmail.com>
333 lines
16 KiB
TypeScript
333 lines
16 KiB
TypeScript
/*---------------------------------------------------------------------------------------------
|
|
* Copyright (c) Microsoft Corporation. All rights reserved.
|
|
* Licensed under the MIT License. See License.txt in the project root for license information.
|
|
*--------------------------------------------------------------------------------------------*/
|
|
import minimist from 'minimist';
|
|
import { EmbeddingType } from '../../src/platform/embeddings/common/embeddingsComputer';
|
|
import { CacheMode } from './simulationContext';
|
|
|
|
/** Number of runs that are stored in baseline.json */
|
|
export const BASELINE_RUN_COUNT = 10;
|
|
|
|
export type NesDatagen = {
|
|
readonly input: string;
|
|
readonly output: string | undefined;
|
|
readonly rowOffset: number;
|
|
readonly workerMode: boolean;
|
|
}
|
|
|
|
export class SimulationOptions {
|
|
public static fromProcessArgs(): SimulationOptions {
|
|
return new SimulationOptions(process.argv);
|
|
}
|
|
|
|
public static fromArray(argv: readonly string[]): SimulationOptions {
|
|
return new SimulationOptions(argv);
|
|
}
|
|
|
|
private readonly argv: minimist.ParsedArgs;
|
|
|
|
public readonly help: boolean;
|
|
public readonly listModels: boolean;
|
|
public readonly listTests: boolean;
|
|
public readonly listSuites: boolean;
|
|
public readonly jsonOutput: boolean;
|
|
public readonly nRuns: number;
|
|
public readonly chatModel: string | undefined;
|
|
public readonly smartChatModel: string | undefined;
|
|
public readonly fastChatModel: string | undefined;
|
|
public readonly fastRewriteModel: string | undefined;
|
|
public readonly summarizeHistory: boolean;
|
|
public readonly swebenchPrompt: boolean;
|
|
public readonly embeddingType: EmbeddingType | undefined;
|
|
public readonly boost: boolean;
|
|
public readonly parallelism: number;
|
|
public readonly lmCacheMode: CacheMode;
|
|
public readonly modelCacheMode: CacheMode;
|
|
public readonly resourcesCacheMode: CacheMode;
|
|
public readonly cachePath: string | undefined;
|
|
public readonly externalBaseline: string | undefined;
|
|
public readonly externalScenarios: string | undefined;
|
|
public readonly output: string | undefined;
|
|
public readonly inline: boolean;
|
|
public readonly sidebar: boolean;
|
|
public readonly applyChatCodeBlocks: boolean;
|
|
public readonly stageCacheEntries: boolean;
|
|
public readonly ci: boolean;
|
|
public readonly gc: boolean;
|
|
public readonly externalCacheLayersPath: string | undefined;
|
|
public readonly verbose: number | boolean | undefined;
|
|
public readonly grep: string[] | string | undefined;
|
|
public readonly omitGrep: string | undefined;
|
|
public readonly heapSnapshots: boolean | string | undefined;
|
|
/** --scenario-test, --scenarioTest Run tests from provided scenario test file name */
|
|
public readonly scenarioTest: string | undefined;
|
|
public readonly isUpdateBaseline: boolean;
|
|
public readonly noFetch: boolean;
|
|
public readonly noCachePointer: boolean;
|
|
/**
|
|
* A label for the current simulation run, to be displayed in the UI for distinguishing between runs.
|
|
*/
|
|
public readonly label: string;
|
|
public readonly runServerPoweredNesProvider: boolean;
|
|
public readonly nes: 'external' | 'coffe' | undefined;
|
|
public readonly nesUrl: string | undefined;
|
|
public readonly nesApiKey: string | undefined;
|
|
|
|
public readonly nesDatagen: NesDatagen | undefined;
|
|
|
|
public readonly subcommand: 'nes-datagen' | undefined;
|
|
|
|
public readonly disabledTools: Set<string>;
|
|
|
|
/** If true, all tests are run in the extension host */
|
|
public readonly inExtensionHost: boolean;
|
|
/** Extensions to ensure are available in the extension host */
|
|
public readonly installExtensions: string[];
|
|
/** Whether to run headless (defaults to false) */
|
|
public readonly headless: boolean;
|
|
/** @internal Only run a single test number */
|
|
public readonly runNumber: number;
|
|
/** Explicit workspace URI to use for stest --in-extension-host */
|
|
public readonly useScenarioWorkspace: boolean;
|
|
|
|
/** If true, will try to use code search using our service. */
|
|
public readonly useExperimentalCodeSearchService: boolean;
|
|
|
|
public readonly configFile: string | undefined;
|
|
|
|
public readonly modelConfigFile: string | undefined;
|
|
|
|
protected constructor(processArgv: readonly string[]) {
|
|
const argv = minimist(processArgv.slice(2));
|
|
this.argv = argv;
|
|
this.help = boolean(argv['help'], false);
|
|
this.listModels = boolean(argv['list-models'], false);
|
|
this.listTests = boolean(argv['list-tests'], false);
|
|
this.listSuites = boolean(argv['list-suites'], false);
|
|
this.jsonOutput = boolean(argv['json'], false);
|
|
this.isUpdateBaseline = boolean(argv['update-baseline'] ?? argv['u'], false);
|
|
this.boost = boolean(argv['boost'], false);
|
|
const fetch = boolean(argv['fetch'], true);
|
|
this.noFetch = !fetch; // `--no-fetch` becomes argv[`fetch`] because of how minimist works
|
|
const cachePointer = boolean(argv['cache-pointer'], true);
|
|
this.noCachePointer = !cachePointer; // `--no-cache-pointer` becomes argv[`cache-pointer`] because of how minimist works
|
|
this.nRuns = typeof argv['n'] === 'number' ? argv['n'] : (this.isUpdateBaseline || argv['ci'] ? BASELINE_RUN_COUNT : 10);
|
|
this.chatModel = this.argv['model'];
|
|
this.smartChatModel = this.argv['smart-model'];
|
|
this.fastChatModel = this.argv['fast-model'];
|
|
this.fastRewriteModel = this.argv['fast-rewrite-model'];
|
|
this.summarizeHistory = boolean(argv['summarize-history'], true);
|
|
this.swebenchPrompt = boolean(argv['swebench-prompt'], false);
|
|
this.embeddingType = cliOptionsToWellKnownEmbeddingsType(this.argv['embedding-model']);
|
|
this.parallelism = this.argv['parallelism'] ?? this.argv['p'] ?? 20;
|
|
this.modelCacheMode = this.argv['skip-model-cache'] ? CacheMode.Disable : CacheMode.Default;
|
|
this.lmCacheMode = (
|
|
this.argv['skip-cache'] ? CacheMode.Disable
|
|
: (this.argv['require-cache'] ? CacheMode.Require : CacheMode.Default)
|
|
);
|
|
this.resourcesCacheMode = (
|
|
this.argv['skip-resources-cache'] ? CacheMode.Disable : CacheMode.Default
|
|
);
|
|
this.externalScenarios = this.argv['external-scenarios'];
|
|
this.externalBaseline = this.argv['external-baseline']; // must be set after `externalScenarios`
|
|
this.validateExternalBaseline();
|
|
this.output = this.argv['output'];
|
|
this.cachePath = this.argv['cache-location'];
|
|
this.inline = boolean(this.argv['inline'], false);
|
|
this.sidebar = boolean(this.argv['sidebar'], false);
|
|
this.applyChatCodeBlocks = boolean(this.argv['apply-chat-code-blocks'], false);
|
|
this.stageCacheEntries = boolean(this.argv['stage-cache-entries'], false);
|
|
this.ci = boolean(this.argv['ci'], false);
|
|
this.gc = boolean(this.argv['gc'], false);
|
|
this.externalCacheLayersPath = argv['external-cache-layers-path'];
|
|
this.verbose = this.argv['verbose'];
|
|
this.grep = argv['grep'];
|
|
this.omitGrep = argv['omit-grep'];
|
|
this.heapSnapshots = argv['heap-snapshots'];
|
|
this.scenarioTest = argv['scenarioTest'] ?? argv['scenario-test'];
|
|
this.label = argv['label'] ?? '';
|
|
|
|
this.inExtensionHost = boolean(argv['in-extension-host'], false);
|
|
this.installExtensions = argv['install-extension'] ? argv['install-extension'].split(',') : [];
|
|
this.headless = boolean(argv['headless'], true);
|
|
this.runNumber = Number(argv['run-number']) || 0;
|
|
|
|
this.runServerPoweredNesProvider = boolean(argv['runServerPoweredNesProvider'], false);
|
|
|
|
this.nes = SimulationOptions.validateNesArgument(argv['nes']);
|
|
|
|
this.nesUrl = argv['nes-url'];
|
|
// [SuppressMessage("Microsoft.Security", "CS002:SecretInNextLine", Justification="used for local simulation tests")]
|
|
this.nesApiKey = argv['nes-api-key'];
|
|
SimulationOptions.validateNesUrlOverride(this.nesUrl, this.nesApiKey);
|
|
|
|
this.disabledTools = argv['disable-tools'] ? new Set(argv['disable-tools'].split(',')) : new Set();
|
|
this.useScenarioWorkspace = boolean(argv['scenario-workspace-folder'], false);
|
|
|
|
this.useExperimentalCodeSearchService = boolean(argv['use-experimental-code-search-service'], false);
|
|
|
|
const isNesDatagen = (argv._ as string[]).includes('nes-datagen');
|
|
this.subcommand = isNesDatagen ? 'nes-datagen' : undefined;
|
|
this.nesDatagen = isNesDatagen && argv['input']
|
|
? {
|
|
input: argv['input'],
|
|
output: argv['out'],
|
|
rowOffset: typeof argv['row-offset'] === 'number' ? argv['row-offset'] : 0,
|
|
workerMode: boolean(argv['worker'], false),
|
|
}
|
|
: undefined;
|
|
|
|
this.configFile = argv['config-file'];
|
|
this.modelConfigFile = argv['model-config-file'];
|
|
}
|
|
|
|
public printHelp(): void {
|
|
console.log([
|
|
`Example usages: `,
|
|
` npm run simulate`,
|
|
` npm run simulate -- --external-scenarios=<path> --inline --output=<path>`,
|
|
` npm run simulate -- --external-scenarios=<path> --sidebar --output=<path>`,
|
|
` npm run simulate -- --external-scenarios=<path> --nes --output=<path>`,
|
|
` npm run simulate -- --update-baseline`,
|
|
``,
|
|
` -u, --update-baseline Updates scores in baseline.json if they change as a result of your changes to prompts sent to the model`,
|
|
` --external-scenarios Path to a directory containing scenarios to run`,
|
|
` --inline Run inline chat external scenarios`,
|
|
` --sidebar Run sidebar chat external scenarios`,
|
|
` --nes Run NES external scenarios`,
|
|
` --output Path to a directory where to generate output`,
|
|
` --n Run each scenario N times`,
|
|
` --ci Equivalent to --n=${BASELINE_RUN_COUNT} but throws if the baseline is not up-to-date`,
|
|
` --gc Used with --require-cache to compact cache layers into the baseline cache`,
|
|
` --external-cache-layers-path Used to specify the path to the external cache layers`,
|
|
` --grep Run a test which contains the passed-in string`,
|
|
` --omit-grep Run a test which does not contain the passed-in string`,
|
|
` --embedding-model Specify the model to use for the embedding endpoint (default: ada)`,
|
|
` Values: ada, text3small, text3large`,
|
|
` --list-models List available chat models`,
|
|
` --model Specify the model to use for the chat endpoint (use --list-models to see valid options)`,
|
|
` --smart-model Specify the model to use in place of the smarter slower model, i.e GPT 4o`,
|
|
` --fast-model Specify the model to use in place of the faster / less smart model, i.e GPT 4o mini`,
|
|
` --fast-rewrite-model [experimental] Specify the model to use for the fast rewrite endpoint`,
|
|
` -p, --parallelism [experimental] Run tests in parallel (default: 1)`,
|
|
` --skip-cache [experimental] Do not use the cache for language model requests`,
|
|
` --require-cache [experimental] Require cache hits, fail on cache misses`,
|
|
` --regenerate-cache [experimental] Fetch all responses and refresh the cache`,
|
|
` --skip-resources-cache [experimental] Do not use the cache for computed resources`,
|
|
` --skip-model-cache [experimental] Do not use the cache for model metadata`,
|
|
` --stage-cache-entries [experimental] Stage cache files that were used in current simulation run`,
|
|
` --list-tests List tests without running them`,
|
|
` --json Print output in JSONL format`,
|
|
` --verbose Print more information about test and assertion failures`,
|
|
` --scenario-test Run tests from provided scenario test file name, e.g., 'docComment.stest' or 'docComment.stest.ts' (--scenarioTest is supported but will be deprecated in future)`,
|
|
` --no-fetch Do not send requests to the model endpoint (uses cache but doesn't write to it) (useful to make sure prompts are unchanged by observing cache misses)`,
|
|
` --no-cache-pointer [experimental] Do not write files to outcome/`,
|
|
` --label A label for the current simulation run, to be displayed in the UI for distinguishing between runs`,
|
|
` --nes-url To override endpoint URL for NES (must be used with --nes-api-key)`,
|
|
` --nes-api-key API key for endpoint URL provided via NES (must be used with --nes-url)`,
|
|
` --runServerPoweredNesProvider Run stests against the http server powered NES provider (server must be run at port 8001)`,
|
|
` --disable-tools A comma-separated list of tools to disable`,
|
|
` --swebench-prompt Use the headless swebench prompt for agent mode`,
|
|
` --summarize-history Enable experimental conversation history summarization in agent mode`,
|
|
` --scenario-workspace-folder If true, runs the stest inline in the scenario's workspace folder`,
|
|
` --config-file Path to a JSON file containing configuration options`,
|
|
` --model-config-file Path to a JSON file containing model configuration options`,
|
|
``,
|
|
`Subcommands:`,
|
|
` nes-datagen Generate training data from alternative action recordings`,
|
|
` Run 'npm run simulate -- nes-datagen --help' for options`,
|
|
``,
|
|
].join('\n'));
|
|
}
|
|
|
|
public printTrainHelp(): void {
|
|
console.log([
|
|
`Usage: npm run simulate -- --config-file=<path> [global options] nes-datagen --input=<path> [options]`,
|
|
``,
|
|
`Generate training data by replaying alternative action recordings through the NES prompt pipeline.`,
|
|
`The prompting strategy is read from the model configuration in --config-file.`,
|
|
``,
|
|
`Options:`,
|
|
` --input Path to a JSON file with training data recordings (required)`,
|
|
` --out Output path for JSON file. Default: <input-path>_output.json`,
|
|
``,
|
|
`Global options (placed before 'nes-datagen'):`,
|
|
` --config-file Path to a JSON config file (required for nes-datagen)`,
|
|
` Must include "chat.advanced.inlineEdits.xtabProvider.modelConfiguration"`,
|
|
` with at least { "modelName", "promptingStrategy", "includeTagsInCurrentFile" }`,
|
|
` -p, --parallelism Number of parallel workers (default: 20)`,
|
|
` --verbose Print detailed progress and error information`,
|
|
` --help Show this help message`,
|
|
``,
|
|
`Examples:`,
|
|
` npm run simulate -- --config-file=config.json nes-datagen --input=data.json`,
|
|
` npm run simulate -- --config-file=config.json --parallelism=10 --verbose nes-datagen --input=data.json`,
|
|
``,
|
|
].join('\n'));
|
|
}
|
|
|
|
private validateExternalBaseline() {
|
|
if (this.externalBaseline && !this.externalScenarios) {
|
|
throw new Error('External scenarios must be provided for external baseline to work.');
|
|
}
|
|
}
|
|
|
|
private static validateNesArgument(nes: unknown): 'external' | 'coffe' | undefined {
|
|
if (nes === undefined || nes === null) {
|
|
return undefined;
|
|
}
|
|
if (typeof nes === 'boolean') { // this's for backward compat because previously it was possible to just pass `--nes` to run external stests against NES
|
|
return 'external';
|
|
}
|
|
if (typeof nes !== 'string') {
|
|
throw new Error(`--nes must be a string, but got: ${typeof nes}`);
|
|
}
|
|
switch (nes) {
|
|
case 'external':
|
|
case 'coffe':
|
|
return nes;
|
|
default:
|
|
throw new Error(`--nes can only be 'external' or 'coffe', but got: ${nes}`);
|
|
}
|
|
}
|
|
|
|
private static validateNesUrlOverride(nesUrl: string | undefined, nesApiKey: string | undefined): void {
|
|
if (nesUrl !== undefined && nesApiKey === undefined) {
|
|
throw new Error(`--nesApiKey must be provided when --nesUrl is set`);
|
|
}
|
|
if (nesUrl === undefined && nesApiKey !== undefined) {
|
|
throw new Error(`--nesUrl must be provided when --nesApiKey is set`);
|
|
}
|
|
}
|
|
}
|
|
|
|
function cliOptionsToWellKnownEmbeddingsType(model: string | undefined): EmbeddingType | undefined {
|
|
switch (model) {
|
|
case 'text3small':
|
|
case EmbeddingType.text3small_512.id:
|
|
return EmbeddingType.text3small_512;
|
|
|
|
case 'metis':
|
|
case EmbeddingType.metis_1024_I16_Binary.id:
|
|
return EmbeddingType.metis_1024_I16_Binary;
|
|
|
|
case undefined:
|
|
return undefined;
|
|
|
|
default:
|
|
throw new Error(`Unknown embedding model: ${model}`);
|
|
}
|
|
}
|
|
|
|
function boolean(value: any, defaultValue: boolean): boolean {
|
|
if (typeof value === 'undefined') {
|
|
return defaultValue;
|
|
}
|
|
if (value === 'false') {
|
|
// treat the string 'false' as false
|
|
return false;
|
|
}
|
|
return Boolean(value);
|
|
}
|