parseopt: autocorrect mistyped subcommands

Try to autocorrect the mistyped mandatory subcommand before showing an
error and exiting. Subcommands parsed with PARSE_OPT_SUBCOMMAND_OPTIONAL
are skipped.

Use standard Damerau-Levenshtein distance (weights 1, 1, 1, 1) to
establish a predictable, mathematically sound baseline.

Scale the allowed edit distance based on input length to prevent
false positives on short commands, following common practice for
fuzziness thresholds (e.g., Elasticsearch's AUTO fuzziness):
  - Length 0-2: 0 edits allowed
  - Length 3-5: 1 edit allowed
  - Length 6+:  2 edits allowed

Signed-off-by: Jiamu Sun <39@barroit.sh>
Signed-off-by: Junio C Hamano <gitster@pobox.com>
This commit is contained in:
Jiamu Sun
2026-03-17 00:36:20 +09:00
committed by Junio C Hamano
parent 7cd07f167d
commit be9df6de6e

View File

@@ -6,6 +6,8 @@
#include "strbuf.h"
#include "string-list.h"
#include "utf8.h"
#include "autocorrect.h"
#include "levenshtein.h"
static int disallow_abbreviated_options;
@@ -622,13 +624,77 @@ static int parse_subcommand(const char *arg, const struct option *options)
return -1;
}
static void find_subcommands(struct string_list *list,
const struct option *options)
{
for (; options->type != OPTION_END; options++) {
if (options->type == OPTION_SUBCOMMAND)
string_list_append(list, options->long_name);
}
}
static int similar_enough(const char *cmd, unsigned int edit)
{
size_t len = strlen(cmd);
unsigned int allowed = len < 3 ? 0 : len < 6 ? 1 : 2;
return edit <= allowed;
}
static const char *autocorrect_subcommand(const char *cmd,
struct string_list *cmds)
{
struct autocorrect autocorrect = { 0 };
unsigned int min = UINT_MAX;
unsigned int ties = 0;
struct string_list_item *cand;
struct string_list_item *best = NULL;
autocorrect_resolve(&autocorrect);
/*
* Builtin subcommands are small enough that printing them all via
* usage_with_options() is sufficient. Therefore, AUTOCORRECT_HINT
* acts like AUTOCORRECT_NEVER.
*/
if (autocorrect.mode == AUTOCORRECT_HINT ||
autocorrect.mode == AUTOCORRECT_NEVER)
return NULL;
for_each_string_list_item(cand, cmds) {
unsigned int edit = levenshtein(cmd, cand->string, 1, 1, 1, 1);
if (edit < min) {
min = edit;
best = cand;
ties = 0;
} else if (edit == min) {
ties++;
}
}
if (!ties && similar_enough(cmd, min)) {
fprintf_ln(stderr,
_("WARNING: You called a subcommand named '%s', which does not exist."),
cmd);
autocorrect_confirm(&autocorrect, best->string);
return best->string;
}
return NULL;
}
static enum parse_opt_result handle_subcommand(struct parse_opt_ctx_t *ctx,
const char *arg,
const struct option *options,
const char * const usagestr[])
{
int err = parse_subcommand(arg, options);
int err;
const char *assumed;
struct string_list cmds = STRING_LIST_INIT_NODUP;
err = parse_subcommand(arg, options);
if (!err)
return PARSE_OPT_SUBCOMMAND;
@@ -641,8 +707,17 @@ static enum parse_opt_result handle_subcommand(struct parse_opt_ctx_t *ctx,
if (ctx->flags & PARSE_OPT_SUBCOMMAND_OPTIONAL)
return PARSE_OPT_DONE;
error(_("unknown subcommand: `%s'"), arg);
usage_with_options(usagestr, options);
find_subcommands(&cmds, options);
assumed = autocorrect_subcommand(arg, &cmds);
if (!assumed) {
error(_("unknown subcommand: `%s'"), arg);
usage_with_options(usagestr, options);
}
string_list_clear(&cmds, 0);
parse_subcommand(assumed, options);
return PARSE_OPT_SUBCOMMAND;
}
static void check_typos(const char *arg, const struct option *options)