From 4d5fb9377bba1f45a940e10b0b7354fe7db2b301 Mon Sep 17 00:00:00 2001 From: Jeff King Date: Thu, 26 Mar 2026 15:04:44 -0400 Subject: [PATCH 1/5] revision: make handle_dotdot() interface less confusing There are two very subtle bits to the way we parse ".." (and "...") range operators: 1. In handle_dotdot_1(), we assume that the incoming arguments "dotdot" and "arg" are part of the same string, with the first digit of the range-operator blanked to a NUL. Then when we want the full name (e.g., to report an error), we replace the NUL with a dot to restore the original string. 2. In handle_dotdot(), we take in a const string, but then we modify it by overwriting the range operator with a NUL. This has worked OK in practice since we tend to pass in buffers that are actually writeable (including argv), but segfaults with something like: handle_revision_arg("..HEAD", &revs, 0, 0); On top of that, building with recent versions of glibc causes the compiler to complain, because it notices when we use strchr() or strstr() to launder away constness (basically detecting the possibility of the segfault above via the type system). Instead of munging the buffer, let's instead make a temporary copy of the left-hand side of the range operator. That avoids any const violations, and lets us pass around the parsed elements independently: the left-hand side, the right-hand side, the number of dots (via the "symmetric" flag), and the original full string for error messages. Signed-off-by: Jeff King Signed-off-by: Junio C Hamano --- revision.c | 42 +++++++++++++++++++----------------------- 1 file changed, 19 insertions(+), 23 deletions(-) diff --git a/revision.c b/revision.c index 31808e3df0..f61262436f 100644 --- a/revision.c +++ b/revision.c @@ -2038,41 +2038,32 @@ static void prepare_show_merge(struct rev_info *revs) free(prune); } -static int dotdot_missing(const char *arg, char *dotdot, +static int dotdot_missing(const char *full_name, struct rev_info *revs, int symmetric) { if (revs->ignore_missing) return 0; - /* de-munge so we report the full argument */ - *dotdot = '.'; die(symmetric ? "Invalid symmetric difference expression %s" - : "Invalid revision range %s", arg); + : "Invalid revision range %s", full_name); } -static int handle_dotdot_1(const char *arg, char *dotdot, +static int handle_dotdot_1(const char *a_name, const char *b_name, + const char *full_name, int symmetric, struct rev_info *revs, int flags, int cant_be_filename, struct object_context *a_oc, struct object_context *b_oc) { - const char *a_name, *b_name; struct object_id a_oid, b_oid; struct object *a_obj, *b_obj; unsigned int a_flags, b_flags; - int symmetric = 0; unsigned int flags_exclude = flags ^ (UNINTERESTING | BOTTOM); unsigned int oc_flags = GET_OID_COMMITTISH | GET_OID_RECORD_PATH; - a_name = arg; if (!*a_name) a_name = "HEAD"; - b_name = dotdot + 2; - if (*b_name == '.') { - symmetric = 1; - b_name++; - } if (!*b_name) b_name = "HEAD"; @@ -2081,15 +2072,13 @@ static int handle_dotdot_1(const char *arg, char *dotdot, return -1; if (!cant_be_filename) { - *dotdot = '.'; - verify_non_filename(revs->prefix, arg); - *dotdot = '\0'; + verify_non_filename(revs->prefix, full_name); } a_obj = parse_object(revs->repo, &a_oid); b_obj = parse_object(revs->repo, &b_oid); if (!a_obj || !b_obj) - return dotdot_missing(arg, dotdot, revs, symmetric); + return dotdot_missing(full_name, revs, symmetric); if (!symmetric) { /* just A..B */ @@ -2103,7 +2092,7 @@ static int handle_dotdot_1(const char *arg, char *dotdot, a = lookup_commit_reference(revs->repo, &a_obj->oid); b = lookup_commit_reference(revs->repo, &b_obj->oid); if (!a || !b) - return dotdot_missing(arg, dotdot, revs, symmetric); + return dotdot_missing(full_name, revs, symmetric); if (repo_get_merge_bases(the_repository, a, b, &exclude) < 0) { commit_list_free(exclude); @@ -2132,16 +2121,23 @@ static int handle_dotdot(const char *arg, int cant_be_filename) { struct object_context a_oc = {0}, b_oc = {0}; - char *dotdot = strstr(arg, ".."); + const char *dotdot = strstr(arg, ".."); + char *tmp; + int symmetric = 0; int ret; if (!dotdot) return -1; - *dotdot = '\0'; - ret = handle_dotdot_1(arg, dotdot, revs, flags, cant_be_filename, - &a_oc, &b_oc); - *dotdot = '.'; + tmp = xmemdupz(arg, dotdot - arg); + dotdot += 2; + if (*dotdot == '.') { + symmetric = 1; + dotdot++; + } + ret = handle_dotdot_1(tmp, dotdot, arg, symmetric, revs, flags, + cant_be_filename, &a_oc, &b_oc); + free(tmp); object_context_release(&a_oc); object_context_release(&b_oc); From 268a9caaf29f0269147dacbea2c8d439c505c5ee Mon Sep 17 00:00:00 2001 From: Jeff King Date: Thu, 26 Mar 2026 15:05:25 -0400 Subject: [PATCH 2/5] rev-parse: simplify dotdot parsing The previous commit simplified the way that revision.c parses ".." and "..." range operators. But there's roughly similar code in rev-parse. This is less likely to trigger a segfault, as there is no library function which we'd pass a string literal to, but it still causes the compiler to complain about laundering away constness via strstr(). Let's give it the same treatment, copying the left-hand side of the range operator into its own string. Signed-off-by: Jeff King Signed-off-by: Junio C Hamano --- builtin/rev-parse.c | 15 +++++++-------- 1 file changed, 7 insertions(+), 8 deletions(-) diff --git a/builtin/rev-parse.c b/builtin/rev-parse.c index 01a62800e8..5da9537113 100644 --- a/builtin/rev-parse.c +++ b/builtin/rev-parse.c @@ -267,21 +267,20 @@ static int show_file(const char *arg, int output_prefix) static int try_difference(const char *arg) { - char *dotdot; + const char *dotdot; struct object_id start_oid; struct object_id end_oid; const char *end; const char *start; + char *to_free; int symmetric; static const char head_by_default[] = "HEAD"; if (!(dotdot = strstr(arg, ".."))) return 0; + start = to_free = xmemdupz(arg, dotdot - arg); end = dotdot + 2; - start = arg; symmetric = (*end == '.'); - - *dotdot = 0; end += symmetric; if (!*end) @@ -295,7 +294,7 @@ static int try_difference(const char *arg) * Just ".."? That is not a range but the * pathspec for the parent directory. */ - *dotdot = '.'; + free(to_free); return 0; } @@ -308,7 +307,7 @@ static int try_difference(const char *arg) a = lookup_commit_reference(the_repository, &start_oid); b = lookup_commit_reference(the_repository, &end_oid); if (!a || !b) { - *dotdot = '.'; + free(to_free); return 0; } if (repo_get_merge_bases(the_repository, a, b, &exclude) < 0) @@ -318,10 +317,10 @@ static int try_difference(const char *arg) show_rev(REVERSED, &commit->object.oid, NULL); } } - *dotdot = '.'; + free(to_free); return 1; } - *dotdot = '.'; + free(to_free); return 0; } From 22b985ef193a18ec0e6602ea1838e3290a351dd6 Mon Sep 17 00:00:00 2001 From: Jeff King Date: Thu, 26 Mar 2026 15:13:18 -0400 Subject: [PATCH 3/5] revision: avoid writing to const string for parent marks We take in a "const char *", but may write a NUL into it when parsing parent marks like "foo^-", since we want to isolate "foo" as a string for further parsing. This is usually OK, as our "const" strings are often actually argv strings which are technically writeable, but we'd segfault with a string literal like: handle_revision_arg("HEAD^-", &revs, 0, 0); Similar to how we handled dotdot in a previous commit, we can avoid this by making a temporary copy of the left-hand side of the string. The cost should negligible compared to the rest of the parsing (like actually parsing commits to create their parent linked-lists). There is one slightly tricky thing, though. We parse some of the marks progressively, so that if we see "foo^!" for example, we'll strip that down to "foo" not just for calling add_parents_only(), but also for the rest of the function. That makes sense since we eventually want to pass "foo" to get_oid_with_context(). But it also means that we'll keep looking for other marks. In particular, "foo^-^!" is valid, though oddly "foo^!^-" would ignore the "^-". I'm not sure if this is a weird historical artifact of the implementation, or if there are important corner cases. So I've left the behavior unchanged. Each mark we find allocates a string with the mark stripped, which means we could allocate multiple times (and carry a free-able pointer for each to the end). But in practice we won't, because of the three marks, "^@" jumps immediately to the end without further parsing, and "^-^!" is nonsense that nobody would pass. So you'd get one allocation in general, and never more than two. Another obvious option would be to just copy "arg" up front and be OK with munging it. But that means we pay the cost even when we find no marks. We could make a single copy upon finding a mark and then munge, but that adds extra code to each site (checking whether somebody else allocated, and if not, adjusting our "mark" pointer to be relative to the copied string). I aimed for something that was clear and obvious, if a bit verbose. Signed-off-by: Jeff King Signed-off-by: Junio C Hamano --- revision.c | 25 +++++++++++++++---------- 1 file changed, 15 insertions(+), 10 deletions(-) diff --git a/revision.c b/revision.c index f61262436f..fda405bf65 100644 --- a/revision.c +++ b/revision.c @@ -2147,7 +2147,10 @@ static int handle_dotdot(const char *arg, static int handle_revision_arg_1(const char *arg_, struct rev_info *revs, int flags, unsigned revarg_opt) { struct object_context oc = {0}; - char *mark; + const char *mark; + char *arg_minus_at = NULL; + char *arg_minus_excl = NULL; + char *arg_minus_dash = NULL; struct object *object; struct object_id oid; int local_flags; @@ -2174,18 +2177,17 @@ static int handle_revision_arg_1(const char *arg_, struct rev_info *revs, int fl mark = strstr(arg, "^@"); if (mark && !mark[2]) { - *mark = 0; - if (add_parents_only(revs, arg, flags, 0)) { + arg_minus_at = xmemdupz(arg, mark - arg); + if (add_parents_only(revs, arg_minus_at, flags, 0)) { ret = 0; goto out; } - *mark = '^'; } mark = strstr(arg, "^!"); if (mark && !mark[2]) { - *mark = 0; - if (!add_parents_only(revs, arg, flags ^ (UNINTERESTING | BOTTOM), 0)) - *mark = '^'; + arg_minus_excl = xmemdupz(arg, mark - arg); + if (add_parents_only(revs, arg_minus_excl, flags ^ (UNINTERESTING | BOTTOM), 0)) + arg = arg_minus_excl; } mark = strstr(arg, "^-"); if (mark) { @@ -2199,9 +2201,9 @@ static int handle_revision_arg_1(const char *arg_, struct rev_info *revs, int fl } } - *mark = 0; - if (!add_parents_only(revs, arg, flags ^ (UNINTERESTING | BOTTOM), exclude_parent)) - *mark = '^'; + arg_minus_dash = xmemdupz(arg, mark - arg); + if (add_parents_only(revs, arg_minus_dash, flags ^ (UNINTERESTING | BOTTOM), exclude_parent)) + arg = arg_minus_dash; } local_flags = 0; @@ -2236,6 +2238,9 @@ static int handle_revision_arg_1(const char *arg_, struct rev_info *revs, int fl out: object_context_release(&oc); + free(arg_minus_at); + free(arg_minus_excl); + free(arg_minus_dash); return ret; } From 213b2138770d820bc28fde839f3e4df90a5d5d81 Mon Sep 17 00:00:00 2001 From: Jeff King Date: Thu, 26 Mar 2026 15:14:24 -0400 Subject: [PATCH 4/5] rev-parse: avoid writing to const string for parent marks The previous commit cleared up some const confusion in handling parent marks in revision.c, but we have roughly the same code duplicated in rev-parse. This one is much easier to fix, because the handling of the shortened string is all done in one place, after detecting any marks (but without shortening the string between marks). As a side note, I suspect this means that it behaves differently than the revision.c parser for weird stuff like "foo^!^@^-", but that is outside the scope of this patch. While we are here, let's also rename the variable "dotdot", which is totally misleading (and which we already fixed in revision.c long ago via f632dedd8d (handle_revision_arg: stop using "dotdot" as a generic pointer, 2017-05-19)). Doing that here makes the diff a little messier, but it also lets the compiler help us make sure we did not miss any stray mentions of the variable while we are changing its semantics. Signed-off-by: Jeff King Signed-off-by: Junio C Hamano --- builtin/rev-parse.c | 25 +++++++++++++------------ 1 file changed, 13 insertions(+), 12 deletions(-) diff --git a/builtin/rev-parse.c b/builtin/rev-parse.c index 5da9537113..218b5f34d6 100644 --- a/builtin/rev-parse.c +++ b/builtin/rev-parse.c @@ -326,7 +326,7 @@ static int try_difference(const char *arg) static int try_parent_shorthands(const char *arg) { - char *dotdot; + const char *mark; struct object_id oid; struct commit *commit; struct commit_list *parents; @@ -334,38 +334,39 @@ static int try_parent_shorthands(const char *arg) int include_rev = 0; int include_parents = 0; int exclude_parent = 0; + char *to_free; - if ((dotdot = strstr(arg, "^!"))) { + if ((mark = strstr(arg, "^!"))) { include_rev = 1; - if (dotdot[2]) + if (mark[2]) return 0; - } else if ((dotdot = strstr(arg, "^@"))) { + } else if ((mark = strstr(arg, "^@"))) { include_parents = 1; - if (dotdot[2]) + if (mark[2]) return 0; - } else if ((dotdot = strstr(arg, "^-"))) { + } else if ((mark = strstr(arg, "^-"))) { include_rev = 1; exclude_parent = 1; - if (dotdot[2]) { + if (mark[2]) { char *end; - exclude_parent = strtoul(dotdot + 2, &end, 10); + exclude_parent = strtoul(mark + 2, &end, 10); if (*end != '\0' || !exclude_parent) return 0; } } else return 0; - *dotdot = 0; + arg = to_free = xmemdupz(arg, mark - arg); if (repo_get_oid_committish(the_repository, arg, &oid) || !(commit = lookup_commit_reference(the_repository, &oid))) { - *dotdot = '^'; + free(to_free); return 0; } if (exclude_parent && exclude_parent > commit_list_count(commit->parents)) { - *dotdot = '^'; + free(to_free); return 0; } @@ -386,7 +387,7 @@ static int try_parent_shorthands(const char *arg) free(name); } - *dotdot = '^'; + free(to_free); return 1; } From d385845d55e0e3a775fc47ac8d73a5ec41308db3 Mon Sep 17 00:00:00 2001 From: Jeff King Date: Thu, 26 Mar 2026 15:23:20 -0400 Subject: [PATCH 5/5] config: store allocated string in non-const pointer When git-config matches a url, we copy the variable section name and store it in the "section" member of a urlmatch_config struct. That member is const, since the url-matcher will not touch it (and other callers really will have a const string). But that means that we have only a const pointer to our allocated string. We have to cast away the constness when we free it, and likewise when we assign NUL to tie off the "." separating the subsection and key. This latter happens implicitly via a strchr() call, but recent versions of glibc have added annotations that let the compiler detect that and complain. Let's keep our own "section" pointer for the non-const string, and then just point config.section at it. That avoids all of the casting, both explicit and implicit. Signed-off-by: Jeff King Signed-off-by: Junio C Hamano --- builtin/config.c | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/builtin/config.c b/builtin/config.c index 7c4857be62..cf4ba0f7cc 100644 --- a/builtin/config.c +++ b/builtin/config.c @@ -838,6 +838,7 @@ static int get_urlmatch(const struct config_location_options *opts, const char *var, const char *url) { int ret; + char *section; char *section_tail; struct config_display_options display_opts = *_display_opts; struct string_list_item *item; @@ -851,8 +852,8 @@ static int get_urlmatch(const struct config_location_options *opts, if (!url_normalize(url, &config.url)) die("%s", config.url.err); - config.section = xstrdup_tolower(var); - section_tail = strchr(config.section, '.'); + config.section = section = xstrdup_tolower(var); + section_tail = strchr(section, '.'); if (section_tail) { *section_tail = '\0'; config.key = section_tail + 1; @@ -886,7 +887,7 @@ static int get_urlmatch(const struct config_location_options *opts, string_list_clear(&values, 1); free(config.url.url); - free((void *)config.section); + free(section); return ret; }