diff --git a/Documentation/config/promisor.adoc b/Documentation/config/promisor.adoc index b0fa43b839..f07a2e883b 100644 --- a/Documentation/config/promisor.adoc +++ b/Documentation/config/promisor.adoc @@ -32,24 +32,136 @@ variable is set to "true", and the "name" and "url" fields are always advertised regardless of this setting. promisor.acceptFromServer:: - If set to "all", a client will accept all the promisor remotes - a server might advertise using the "promisor-remote" - capability. If set to "knownName" the client will accept - promisor remotes which are already configured on the client - and have the same name as those advertised by the client. This - is not very secure, but could be used in a corporate setup - where servers and clients are trusted to not switch name and - URLs. If set to "knownUrl", the client will accept promisor - remotes which have both the same name and the same URL - configured on the client as the name and URL advertised by the - server. This is more secure than "all" or "knownName", so it - should be used if possible instead of those options. Default - is "none", which means no promisor remote advertised by a - server will be accepted. By accepting a promisor remote, the - client agrees that the server might omit objects that are - lazily fetchable from this promisor remote from its responses - to "fetch" and "clone" requests from the client. Name and URL - comparisons are case sensitive. See linkgit:gitprotocol-v2[5]. + Controls which promisor remotes advertised by a server (using the + "promisor-remote" protocol capability) a client will accept. By + accepting a promisor remote, the client agrees that the server + might omit objects that are lazily fetchable from this promisor + remote from its responses to "fetch" and "clone" requests. ++ +Note that this option does not cause new remotes to be automatically +created in the client's configuration. It only allows remotes which +are somehow already configured to be trusted for the current +operation, or their fields to be updated (if `promisor.storeFields` is +set and the remote already exists locally). To allow Git to +automatically create and persist new remotes from server +advertisements, use `promisor.acceptFromServerUrl`. ++ +The available options are: ++ +* `none` (default): No promisor remote advertised by a server will be + accepted. ++ +* `knownUrl`: The client will accept promisor remotes that are already + configured on the client and have both the same name and the same URL + as advertised by the server. This is more secure than `all` or + `knownName`, and should be used if possible instead of those options. ++ +* `knownName`: The client will accept promisor remotes that are already + configured on the client and have the same name as those advertised + by the server. This is not very secure, but could be used in a corporate + setup where servers and clients are trusted to not switch names and URLs. ++ +* `all`: The client will accept all the promisor remotes a server might + advertise. This is the least secure option and should only be used in + fully trusted environments. ++ +Name and URL comparisons are case-sensitive. See linkgit:gitprotocol-v2[5] +for protocol details. + +promisor.acceptFromServerUrl:: + A glob pattern to specify which server-advertised URLs a + client is allowed to act on. When a URL matches, the client + will accept the advertised remote as a promisor remote, may + automatically create a new remote configuration for it and may + automatically accept field updates (such as authentication + tokens) from the server, even if `promisor.acceptFromServer` + is set to `none` (the default). ++ +This option can appear multiple times in config files. An advertised +URL will be accepted if it matches _ANY_ glob pattern specified by +this option in _ANY_ config file read by Git. ++ +When both `promisor.acceptFromServer` and `promisor.acceptFromServerUrl` +are set, `promisor.acceptFromServerUrl` is consulted first and takes +precedence: if a matching pattern leads to acceptance (either by +auto-configuring an unknown remote or by accepting field updates for +a known remote whose URL matches both the local configuration and the +allowlist), the advertised remote is accepted regardless of the +`promisor.acceptFromServer` setting. If no pattern in +`promisor.acceptFromServerUrl` triggers acceptance, the decision is +left to `promisor.acceptFromServer`. ++ +Note however that, even when an advertised URL matches a pattern in +`promisor.acceptFromServerUrl`, an already-existing remote on the +client whose name matches the advertised name but whose configured URL +differs from the advertised one will _NOT_ be accepted through +`promisor.acceptFromServerUrl`. This prevents a server from silently +re-pointing an existing client-side remote at a different URL. (Such a +remote may still be accepted through `promisor.acceptFromServer=all` +or `=knownName`, which have their own, looser semantics; see the +documentation of that option.) ++ +Be _VERY_ careful with these patterns: `*` matches any sequence of +characters within the 'host' and 'path' parts of a URL (but cannot +cross part boundaries). An overly broad pattern is a major security +risk, as a matching URL allows a server to auto-configure new remotes +and to update fields (such as authentication tokens) on known remotes +without further confirmation. To minimize security risks, follow these +guidelines: ++ +-- +1. Start with a secure protocol scheme, like `https://` or `ssh://`. ++ +2. Only allow domain names or paths where you control and trust _ALL_ + the content. Be especially careful with shared hosting platforms + like `github.com` or `gitlab.com`. A broad pattern like + `https://gitlab.com/*` is dangerous because it trusts every + repository on the entire platform. Always restrict such patterns to + your specific organization or namespace (e.g., + `https://gitlab.com/your-org/*`). ++ +3. Never use globs at the end of domain names. For example, + `https://cdn.your-org.com/*` might be safe, but + `https://cdn.your-org.com*/*` is a major security risk because + the latter matches `https://cdn.your-org.com.hacker.net/repo`. ++ +4. Be careful using globs at the beginning of domain names. While the + code ensures a `*` in the host cannot cross into the path, a + pattern like `https://*.example.com/*` will still match any + subdomain. This is extremely dangerous on shared hosting platforms + (e.g., `https://*.github.io/*` trusts every user's site on the + entire platform). +-- ++ +Before matching, both the advertised URL and the pattern are +normalized: the scheme and host are lowercased, percent-encoded +characters are decoded where possible, and path segments like `..` +are resolved. The port must also match exactly (e.g., +`https://example.com:8080/*` will not match a URL advertised on +port 9999). The username and password components of the URL are +ignored during matching. Note that embedding credentials in URLs is +discouraged. Passing authentication tokens via the `token` field of +the `promisor-remote` capability is strongly preferred. ++ +The glob pattern can optionally be prefixed with a remote name and an +equals sign (e.g., `cdn=https://cdn.example.com/*`). If such a prefix +is provided, accepted remotes will be saved under that name. If no +such prefix is provided, a safe remote name will be automatically +generated by sanitizing the URL and prefixing it with +`promisor-auto-`. ++ +If a remote with the chosen name already exists but points to a +different URL, Git will append a numeric suffix (e.g., `-1`, `-2`) to +the name to prevent overwriting existing configurations. You should +make sure that this doesn't happen often though, as remotes will be +rejected if the numeric suffix increases too much. In all cases, the +original name advertised by the server is recorded in the +`remote..advertisedAs` configuration variable for tracing and +debugging purposes. ++ +For the security implications of accepting a promisor remote, see the +documentation of `promisor.acceptFromServer`. For details on the +protocol, see linkgit:gitprotocol-v2[5]. promisor.checkFields:: A comma or space separated list of additional remote related diff --git a/Documentation/config/remote.adoc b/Documentation/config/remote.adoc index eb9c8a3c48..6885905d90 100644 --- a/Documentation/config/remote.adoc +++ b/Documentation/config/remote.adoc @@ -91,6 +91,15 @@ remote..promisor:: When set to true, this remote will be used to fetch promisor objects. +remote..advertisedAs:: + When a promisor remote is automatically configured using + information advertised by a server through the + `promisor-remote` protocol capability (see + `promisor.acceptFromServerUrl`), the server's originally + advertised name is saved in this variable. This is for + information, tracing and debugging purposes. Users should not + typically modify or create such configuration entries. + remote..partialclonefilter:: The filter that will be applied when fetching from this promisor remote. Changing or clearing this value will only affect fetches for new commits. diff --git a/Documentation/gitprotocol-v2.adoc b/Documentation/gitprotocol-v2.adoc index befa697d21..2beb70595f 100644 --- a/Documentation/gitprotocol-v2.adoc +++ b/Documentation/gitprotocol-v2.adoc @@ -866,10 +866,11 @@ the server advertised, the client shouldn't advertise the On the server side, the "promisor.advertise" and "promisor.sendFields" configuration options can be used to control what it advertises. On -the client side, the "promisor.acceptFromServer" configuration option -can be used to control what it accepts, and the "promisor.storeFields" -option, to control what it stores. See the documentation of these -configuration options in linkgit:git-config[1] for more information. +the client side, the "promisor.acceptFromServer" and +"promisor.acceptFromServerUrl" configuration options can be used to +control what it accepts, and the "promisor.storeFields" option, to +control what it stores. See the documentation of these configuration +options in linkgit:git-config[1] for more information. Note that in the future it would be nice if the "promisor-remote" protocol capability could be used by the server, when responding to diff --git a/promisor-remote.c b/promisor-remote.c index f34856d499..43505d1e1a 100644 --- a/promisor-remote.c +++ b/promisor-remote.c @@ -12,7 +12,9 @@ #include "packfile.h" #include "environment.h" #include "url.h" +#include "urlmatch.h" #include "version.h" +#include "wildmatch.h" struct promisor_remote_config { struct promisor_remote *promisors; @@ -434,13 +436,14 @@ static struct string_list *fields_stored(void) * Struct for promisor remotes involved in the "promisor-remote" * protocol capability. * - * Except for "name", each in this struct and its - * should correspond (either on the client side or on the server side) - * to a "remote.." config variable set to where - * "" is a promisor remote name. + * Except for "name" and "local_name", each in this struct + * and its should correspond (either on the client side or on + * the server side) to a "remote.." config variable set + * to where "" is a promisor remote name. */ struct promisor_info { - const char *name; + const char *name; /* name the server advertised */ + const char *local_name; /* name used locally (may be auto-generated) */ const char *url; const char *filter; const char *token; @@ -449,6 +452,7 @@ struct promisor_info { static void promisor_info_free(struct promisor_info *p) { free((char *)p->name); + free((char *)p->local_name); free((char *)p->url); free((char *)p->filter); free((char *)p->token); @@ -462,6 +466,11 @@ static void promisor_info_list_clear(struct string_list *list) string_list_clear(list, 0); } +static const char *promisor_info_local_name(struct promisor_info *p) +{ + return p->local_name ? p->local_name : p->name; +} + static void set_one_field(struct promisor_info *p, const char *field, const char *value) { @@ -650,9 +659,351 @@ static bool has_control_char(const char *s) return false; } -static int should_accept_remote(enum accept_promisor accept, +struct allowed_url { + char *remote_name; + char *url_pattern; + struct url_info pattern_info; +}; + +static void allowed_url_free(void *util, const char *str UNUSED) +{ + struct allowed_url *allowed = util; + + if (!allowed) + return; + + /* Depending on prefix, free either remote_name or url_pattern */ + free(allowed->remote_name ? allowed->remote_name : allowed->url_pattern); + free(allowed->pattern_info.url); + free(allowed); +} + +static struct allowed_url *valid_accept_url(const char *url) +{ + char *dup, *p; + struct allowed_url *allowed; + + if (!url) + return NULL; + + dup = xstrdup(url); + p = strchr(dup, '='); + if (p) { + *p = '\0'; + if (!valid_remote_name(dup)) { + warning(_("invalid remote name '%s' before '=' sign " + "in '%s' from promisor.acceptFromServerUrl config"), + dup, url); + free(dup); + return NULL; + } + p++; + } else { + p = dup; + } + + if (has_control_char(p)) { + warning(_("invalid url pattern '%s' " + "in '%s' from promisor.acceptFromServerUrl config"), p, url); + free(dup); + return NULL; + } + + allowed = xmalloc(sizeof(*allowed)); + allowed->remote_name = (p == dup) ? NULL : dup; + allowed->url_pattern = p; + allowed->pattern_info.url = url_normalize_pattern(p, &allowed->pattern_info); + if (!allowed->pattern_info.url) { + warning(_("invalid url pattern '%s' " + "in '%s' from promisor.acceptFromServerUrl config"), p, url); + free(dup); + free(allowed); + return NULL; + } + + return allowed; +} + +static void load_accept_from_server_url(struct repository *repo, + struct string_list *accept_urls) +{ + const struct string_list *config_urls; + + if (!repo_config_get_string_multi(repo, "promisor.acceptfromserverurl", &config_urls)) { + struct string_list_item *item; + + for_each_string_list_item(item, config_urls) { + struct allowed_url *allowed = valid_accept_url(item->string); + if (allowed) { + struct string_list_item *new; + new = string_list_append(accept_urls, item->string); + new->util = allowed; + } + } + } +} + +static bool match_pattern_url(const char *pat, size_t pat_len, + const char *url, size_t url_len) +{ + char *p_str = xstrndup(pat, pat_len); + char *u_str = xstrndup(url, url_len); + bool res = !wildmatch(p_str, u_str, 0); + + free(p_str); + free(u_str); + + return res; +} + +static bool match_one_url(const struct url_info *pi, const struct url_info *ui) +{ + const char *pat = pi->url; + const char *url = ui->url; + + /* + * Schemes must match exactly. They are case-folded by + * url_normalize(), so strncmp() suffices. + */ + if (pi->scheme_len != ui->scheme_len || strncmp(pat, url, pi->scheme_len)) + return false; + + /* + * Ports must match exactly. url_normalize() strips default + * ports (like 443 for https), so length and content + * comparisons are sufficient. + */ + if (pi->port_len != ui->port_len || + strncmp(pat + pi->port_off, url + ui->port_off, pi->port_len)) + return false; + + /* + * Match host and path separately to prevent a '*' in the host + * portion of the pattern from matching across the '/' + * boundary into the path. + */ + + return match_pattern_url(pat + pi->host_off, pi->host_len, + url + ui->host_off, ui->host_len) && + match_pattern_url(pat + pi->path_off, pi->path_len, + url + ui->path_off, ui->path_len); +} + +static struct allowed_url *url_matches_accept_list( + struct string_list *accept_urls, const char *url) +{ + struct string_list_item *item; + struct url_info url_info; + + url_info.url = url_normalize(url, &url_info); + + if (!url_info.url) + return NULL; + + for_each_string_list_item(item, accept_urls) { + struct allowed_url *allowed = item->util; + + if (match_one_url(&allowed->pattern_info, &url_info)) { + free(url_info.url); + return allowed; + } + } + + free(url_info.url); + return NULL; +} + +/* + * Sanitize the buffer to make it a valid remote name coming from the + * server by: + * + * - replacing any non alphanumeric character with a '-' + * - stripping any leading '-', + * - condensing multiple '-' into one, + * - prepending "promisor-auto-", + * - validating the result. + */ +static int sanitize_remote_name(struct strbuf *buf, const char *url) +{ + char prev = '-'; + for (size_t i = 0; i < buf->len; ) { + if (!isalnum(buf->buf[i])) + buf->buf[i] = '-'; + if (prev == '-' && buf->buf[i] == '-') { + strbuf_remove(buf, i, 1); + } else { + prev = buf->buf[i]; + i++; + } + } + + strbuf_strip_suffix(buf, "-"); + + if (!buf->len) { + warning(_("couldn't generate a valid remote name from " + "advertised url '%s', ignoring this remote"), url); + return -1; + } + + strbuf_insertstr(buf, 0, "promisor-auto-"); + + if (!valid_remote_name(buf->buf)) { + warning(_("generated remote name '%s' from advertised url '%s' " + "is invalid, ignoring this remote"), buf->buf, url); + return -1; + } + + return 0; +} + +static char *promisor_remote_name_from_url(const char *url) +{ + struct url_info url_info = { 0 }; + char *normalized = url_normalize(url, &url_info); + struct strbuf buf = STRBUF_INIT; + + if (!normalized) { + warning(_("couldn't normalize advertised url '%s', " + "ignoring this remote"), url); + return NULL; + } + + if (url_info.host_len) { + strbuf_add(&buf, normalized + url_info.host_off, url_info.host_len); + strbuf_addch(&buf, '-'); + } + + if (url_info.port_len) { + strbuf_add(&buf, normalized + url_info.port_off, url_info.port_len); + strbuf_addch(&buf, '-'); + } + + if (url_info.path_len) { + strbuf_add(&buf, normalized + url_info.path_off, url_info.path_len); + strbuf_trim_trailing_dir_sep(&buf); + strbuf_strip_suffix(&buf, ".git"); + } + + free(normalized); + + if (sanitize_remote_name(&buf, url)) { + strbuf_release(&buf); + return NULL; + } + + return strbuf_detach(&buf, NULL); +} + +static void configure_auto_promisor_remote(struct repository *repo, + const char *name, + const char *url, + const char *advertised_as, + bool reuse) +{ + char *key; + + if (!reuse) { + fprintf(stderr, _("Auto-creating promisor remote '%s' for URL '%s'\n"), + name, url); + + key = xstrfmt("remote.%s.url", name); + repo_config_set_gently(repo, key, url); + free(key); + } + + /* NB: when reusing, this promotes an existing non-promisor remote */ + key = xstrfmt("remote.%s.promisor", name); + repo_config_set_gently(repo, key, "true"); + free(key); + + if (advertised_as) { + key = xstrfmt("remote.%s.advertisedAs", name); + repo_config_set_gently(repo, key, advertised_as); + free(key); + } +} + +#define MAX_REMOTES_WITH_SIMILAR_NAMES 20 + +/* Return the allocated local name, or NULL on failure */ +static char *handle_matching_allowed_url(struct repository *repo, + char *allowed_name, + const char *remote_url, + const char *remote_name) +{ + char *name; + char *basename = allowed_name ? + xstrdup(allowed_name) : + promisor_remote_name_from_url(remote_url); + int i = 0; + bool reuse = false; + + if (!basename) + return NULL; + + name = xstrdup(basename); + + while (i < MAX_REMOTES_WITH_SIMILAR_NAMES) { + char *url_key = xstrfmt("remote.%s.url", name); + const char *existing_url; + int exists = !repo_config_get_string_tmp(repo, url_key, &existing_url); + + free(url_key); + + if (!exists) + break; /* Free to use */ + + if (!strcmp(existing_url, remote_url)) { + reuse = true; + break; /* Same URL, so safe to reuse */ + } + + i++; + free(name); + name = xstrfmt("%s-%d", basename, i); + } + + if (i < MAX_REMOTES_WITH_SIMILAR_NAMES) { + configure_auto_promisor_remote(repo, name, + remote_url, remote_name, + reuse); + } else { + warning(_("too many remotes accepted with name like '%s-X', " + "ignoring this remote"), basename); + FREE_AND_NULL(name); + } + + free(basename); + return name; +} + +static int should_accept_new_remote_url(struct repository *repo, + struct string_list *accept_urls, + struct promisor_info *advertised) +{ + struct allowed_url *allowed = url_matches_accept_list(accept_urls, + advertised->url); + if (allowed) { + char *name = handle_matching_allowed_url(repo, + allowed->remote_name, + advertised->url, + advertised->name); + if (name) { + free((char *)advertised->local_name); + advertised->local_name = name; + return 1; + } + } + + return 0; +} + +static int should_accept_remote(struct repository *repo, + enum accept_promisor accept, struct promisor_info *advertised, - struct string_list *config_info) + struct string_list *accept_urls, + struct string_list *config_info, + bool *reload_config) { struct promisor_info *p; struct string_list_item *item; @@ -664,23 +1015,34 @@ static int should_accept_remote(enum accept_promisor accept, "this remote should have been rejected earlier", remote_name); - if (accept == ACCEPT_ALL) - return all_fields_match(advertised, config_info, NULL); - /* Get config info for that promisor remote */ item = string_list_lookup(config_info, remote_name); - if (!item) + if (!item) { /* We don't know about that remote */ + + int res = should_accept_new_remote_url(repo, accept_urls, advertised); + if (res) { + *reload_config = true; + return res; + } + + if (accept == ACCEPT_ALL) + return all_fields_match(advertised, config_info, NULL); return 0; + } p = item->util; - if (accept == ACCEPT_KNOWN_NAME) + /* Known remote in the allowlist? */ + if (!strcmp(p->url, remote_url) && url_matches_accept_list(accept_urls, remote_url)) return all_fields_match(advertised, config_info, p); - if (accept != ACCEPT_KNOWN_URL) - BUG("Unhandled 'enum accept_promisor' value '%d'", accept); + if (accept == ACCEPT_ALL) + return all_fields_match(advertised, config_info, NULL); + + if (accept == ACCEPT_KNOWN_NAME) + return all_fields_match(advertised, config_info, p); if (strcmp(p->url, remote_url)) { warning(_("known remote named '%s' but with URL '%s' instead of '%s', " @@ -689,7 +1051,13 @@ static int should_accept_remote(enum accept_promisor accept, return 0; } - return all_fields_match(advertised, config_info, p); + if (accept == ACCEPT_KNOWN_URL) + return all_fields_match(advertised, config_info, p); + + if (accept != ACCEPT_NONE) + BUG("Unhandled 'enum accept_promisor' value '%d'", accept); + + return 0; } static int skip_field_name_prefix(const char *elem, const char *field_name, const char **value) @@ -829,7 +1197,7 @@ static bool promisor_store_advertised_fields(struct promisor_info *advertised, { struct promisor_info *p; struct string_list_item *item; - const char *remote_name = advertised->name; + const char *remote_name = promisor_info_local_name(advertised); bool reload_config = false; if (!(store_info->store_filter || store_info->store_token)) @@ -894,8 +1262,12 @@ static void filter_promisor_remote(struct repository *repo, struct string_list_item *item; bool reload_config = false; enum accept_promisor accept = accept_from_server(repo); + struct string_list accept_urls = STRING_LIST_INIT_DUP; - if (accept == ACCEPT_NONE) + /* Load and validate the acceptFromServerUrl config */ + load_accept_from_server_url(repo, &accept_urls); + + if (accept == ACCEPT_NONE && !accept_urls.nr) return; /* Parse remote info received */ @@ -915,7 +1287,8 @@ static void filter_promisor_remote(struct repository *repo, string_list_sort(&config_info); } - if (should_accept_remote(accept, advertised, &config_info)) { + if (should_accept_remote(repo, accept, advertised, &accept_urls, + &config_info, &reload_config)) { if (!store_info) store_info = store_info_new(repo); if (promisor_store_advertised_fields(advertised, store_info)) @@ -927,6 +1300,7 @@ static void filter_promisor_remote(struct repository *repo, } } + string_list_clear_func(&accept_urls, allowed_url_free); promisor_info_list_clear(&config_info); string_list_clear(&remote_info, 0); store_info_free(store_info); @@ -937,7 +1311,8 @@ static void filter_promisor_remote(struct repository *repo, /* Apply accepted remotes to the stable repo state */ for_each_string_list_item(item, accepted_remotes) { struct promisor_info *info = item->util; - struct promisor_remote *r = repo_promisor_remote_find(repo, info->name); + const char *remote_name = promisor_info_local_name(info); + struct promisor_remote *r = repo_promisor_remote_find(repo, remote_name); if (r) { r->accepted = 1; diff --git a/t/t5710-promisor-remote-capability.sh b/t/t5710-promisor-remote-capability.sh index b404ad9f0a..549acff23f 100755 --- a/t/t5710-promisor-remote-capability.sh +++ b/t/t5710-promisor-remote-capability.sh @@ -177,8 +177,7 @@ test_expect_success "init + fetch with promisor.advertise set to 'true'" ' git -C server config promisor.advertise true && test_when_finished "rm -rf client" && - mkdir client && - git -C client init && + git init client && git -C client config remote.lop.promisor true && git -C client config remote.lop.fetch "+refs/heads/*:refs/remotes/lop/*" && git -C client config remote.lop.url "$TRASH_DIRECTORY_URL/lop" && @@ -231,8 +230,7 @@ test_expect_success "init + fetch two promisors but only one advertised" ' # Create a promisor that will be configured but not be used git init --bare unused_lop && - mkdir client && - git -C client init && + git init client && git -C client config remote.unused_lop.promisor true && git -C client config remote.unused_lop.fetch "+refs/heads/*:refs/remotes/unused_lop/*" && git -C client config remote.unused_lop.url "$TRASH_DIRECTORY_URL/unused_lop" && @@ -389,6 +387,202 @@ test_expect_success "clone with 'KnownUrl' and empty url, so not advertised" ' check_missing_objects server 1 "$oid" ' +test_expect_success "clone with 'None' but URL allowlisted" ' + git -C server config promisor.advertise true && + test_when_finished "rm -rf client" && + + GIT_NO_LAZY_FETCH=0 git clone -c remote.lop.promisor=true \ + -c remote.lop.fetch="+refs/heads/*:refs/remotes/lop/*" \ + -c remote.lop.url="$TRASH_DIRECTORY_URL/lop" \ + -c promisor.acceptfromserver=None \ + -c promisor.acceptFromServerUrl="$ENCODED_TRASH_DIRECTORY_URL/*" \ + --no-local --filter="blob:limit=5k" server client && + + # Check that the largest object is still missing on the server + check_missing_objects server 1 "$oid" +' + +test_expect_success "clone with 'None' but URL not in allowlist" ' + git -C server config promisor.advertise true && + test_when_finished "rm -rf client" && + + GIT_NO_LAZY_FETCH=0 git clone -c remote.lop.promisor=true \ + -c remote.lop.fetch="+refs/heads/*:refs/remotes/lop/*" \ + -c remote.lop.url="$TRASH_DIRECTORY_URL/lop" \ + -c promisor.acceptfromserver=None \ + -c promisor.acceptFromServerUrl="https://example.com/*" \ + --no-local --filter="blob:limit=5k" server client && + + # Check that the largest object is not missing on the server + check_missing_objects server 0 "" && + + # Reinitialize server so that the largest object is missing again + initialize_server 1 "$oid" +' + +test_expect_success "clone with 'None' but URL allowlisted in one pattern out of two" ' + git -C server config promisor.advertise true && + test_when_finished "rm -rf client" && + + GIT_NO_LAZY_FETCH=0 git clone -c remote.lop.promisor=true \ + -c remote.lop.fetch="+refs/heads/*:refs/remotes/lop/*" \ + -c remote.lop.url="$TRASH_DIRECTORY_URL/lop" \ + -c promisor.acceptfromserver=None \ + -c promisor.acceptFromServerUrl="https://example.com/*" \ + -c promisor.acceptFromServerUrl="$ENCODED_TRASH_DIRECTORY_URL/*" \ + --no-local --filter="blob:limit=5k" server client && + + # Check that the largest object is still missing on the server + check_missing_objects server 1 "$oid" +' + +test_expect_success "clone with 'None', URL allowlisted, but client has different URL" ' + git -C server config promisor.advertise true && + test_when_finished "rm -rf client" && + + # The client configures "lop" with a different URL (serverTwo) than + # what the server advertises (lop). Even though the advertised URL + # matches the allowlist, the remote is rejected because the + # configured URL does not match the advertised one. + GIT_NO_LAZY_FETCH=0 git clone -c remote.lop.promisor=true \ + -c remote.lop.fetch="+refs/heads/*:refs/remotes/lop/*" \ + -c remote.lop.url="$TRASH_DIRECTORY_URL/serverTwo" \ + -c promisor.acceptfromserver=None \ + -c promisor.acceptFromServerUrl="$ENCODED_TRASH_DIRECTORY_URL/*" \ + --no-local --filter="blob:limit=5k" server client && + + # Check that the largest object is not missing on the server + check_missing_objects server 0 "" && + + # Reinitialize server so that the largest object is missing again + initialize_server 1 "$oid" +' + +test_expect_success "clone with URL allowlisted and no remote already configured" ' + git -C server config promisor.advertise true && + test_when_finished "rm -rf client" && + test_when_finished "rm -f full_names" && + + GIT_NO_LAZY_FETCH=0 git clone \ + -c promisor.acceptfromserver=None \ + -c promisor.acceptFromServerUrl="$ENCODED_TRASH_DIRECTORY_URL/*" \ + --no-local --filter="blob:limit=5k" server client && + + # Check that exactly one remote has been auto-created, identified + # by "remote..advertisedAs" == "lop". + git -C client config get --all --show-names --regexp \ + "remote\..*\.advertisedas" >full_names && + test_line_count = 1 full_names && + REMOTE_NAME=$(sed "s/^remote\.\(.*\)\.advertisedas .*$/\1/" full_names) && + + # Check ".url" and ".promisor" values + printf "%s\n" "$TRASH_DIRECTORY_URL/lop" "true" >expect && + git -C client config "remote.$REMOTE_NAME.url" >actual && + git -C client config "remote.$REMOTE_NAME.promisor" >>actual && + test_cmp expect actual && + + # Check that the largest object is still missing on the server + check_missing_objects server 1 "$oid" +' + +test_expect_success "clone with named URL allowlisted and no pre-configured remote" ' + git -C server config promisor.advertise true && + test_when_finished "rm -rf client" && + + GIT_NO_LAZY_FETCH=0 git clone \ + -c promisor.acceptfromserver=None \ + -c promisor.acceptFromServerUrl="cdn=$ENCODED_TRASH_DIRECTORY_URL/*" \ + --no-local --filter="blob:limit=5k" server client && + + # Check that a remote has been auto-created with the right "cdn" name and fields. + printf "%s\n" "$TRASH_DIRECTORY_URL/lop" "true" "lop" >expect && + git -C client config "remote.cdn.url" >actual && + git -C client config "remote.cdn.promisor" >>actual && + git -C client config "remote.cdn.advertisedAs" >>actual && + test_cmp expect actual && + + # Check that the largest object is still missing on the server + check_missing_objects server 1 "$oid" +' + +test_expect_success "clone with URL allowlisted but colliding name" ' + git -C server config promisor.advertise true && + test_when_finished "rm -rf client" && + + GIT_NO_LAZY_FETCH=0 git clone -c remote.cdn.promisor=true \ + -c remote.cdn.fetch="+refs/heads/*:refs/remotes/lop/*" \ + -c remote.cdn.url="https://example.com/cdn" \ + -c promisor.acceptfromserver=None \ + -c promisor.acceptFromServerUrl="cdn=$ENCODED_TRASH_DIRECTORY_URL/*" \ + --no-local --filter="blob:limit=5k" server client && + + # Check that a remote has been auto-created with the right "cdn-1" name and fields. + printf "%s\n" "$TRASH_DIRECTORY_URL/lop" "true" "lop" >expect && + git -C client config "remote.cdn-1.url" >actual && + git -C client config "remote.cdn-1.promisor" >>actual && + git -C client config "remote.cdn-1.advertisedAs" >>actual && + test_cmp expect actual && + + # Check that the original "cdn" remote was not overwritten. + printf "%s\n" "https://example.com/cdn" "true" >expect && + git -C client config "remote.cdn.url" >actual && + git -C client config "remote.cdn.promisor" >>actual && + test_cmp expect actual && + + # Check that the largest object is still missing on the server + check_missing_objects server 1 "$oid" +' + +test_expect_success "clone with URL allowlisted and reusable remote" ' + git -C server config promisor.advertise true && + test_when_finished "rm -rf client" && + + GIT_NO_LAZY_FETCH=0 git clone \ + -c remote.cdn.fetch="+refs/heads/*:refs/remotes/lop/*" \ + -c remote.cdn.url="$TRASH_DIRECTORY_URL/lop" \ + -c promisor.acceptfromserver=None \ + -c promisor.acceptFromServerUrl="cdn=$ENCODED_TRASH_DIRECTORY_URL/*" \ + --no-local --filter="blob:limit=5k" server client && + + # Check that the existing "cdn" remote has been properly updated. + printf "%s\n" "$TRASH_DIRECTORY_URL/lop" "true" "lop" "+refs/heads/*:refs/remotes/lop/*" >expect && + git -C client config "remote.cdn.url" >actual && + git -C client config "remote.cdn.promisor" >>actual && + git -C client config "remote.cdn.advertisedAs" >>actual && + git -C client config "remote.cdn.fetch" >>actual && + test_cmp expect actual && + + # Check that no new "cdn-1" remote has been created. + test_must_fail git -C client config "remote.cdn-1.url" && + + # Check that the largest object is still missing on the server + check_missing_objects server 1 "$oid" +' + +test_expect_success "clone with invalid promisor.acceptFromServerUrl" ' + git -C server config promisor.advertise true && + test_when_finished "rm -rf client" && + + # As "bad name" contains a space, which is not a valid remote name, + # the pattern should be rejected with a warning and no remote created. + GIT_NO_LAZY_FETCH=0 git clone \ + -c promisor.acceptfromserver=None \ + -c "promisor.acceptFromServerUrl=bad name=https://example.com/*" \ + --no-local --filter="blob:limit=5k" server client 2>err && + + # Check that a warning was emitted + test_grep "invalid remote name '\''bad name'\''" err && + + # Check that no remote was auto-created + test_must_fail git -C client config get --regexp "remote\..*\.advertisedas" && + + # Check that the largest object is not missing on the server + check_missing_objects server 0 "" && + + # Reinitialize server so that the largest object is missing again + initialize_server 1 "$oid" +' + test_expect_success "clone with promisor.sendFields" ' git -C server config promisor.advertise true && test_when_finished "rm -rf client" && diff --git a/urlmatch.c b/urlmatch.c index bf8cce6de9..20bc2d009c 100644 --- a/urlmatch.c +++ b/urlmatch.c @@ -112,7 +112,7 @@ static int match_host(const struct url_info *url_info, return (!url_len && !pat_len); } -static char *url_normalize_1(const char *url, struct url_info *out_info, char allow_globs) +static char *url_normalize_1(const char *url, struct url_info *out_info, bool allow_globs) { /* * Normalize NUL-terminated url using the following rules: @@ -438,7 +438,12 @@ static char *url_normalize_1(const char *url, struct url_info *out_info, char al char *url_normalize(const char *url, struct url_info *out_info) { - return url_normalize_1(url, out_info, 0); + return url_normalize_1(url, out_info, false); +} + +char *url_normalize_pattern(const char *url, struct url_info *out_info) +{ + return url_normalize_1(url, out_info, true); } char *url_parse(const char *url_orig, struct url_info *out_info) @@ -704,7 +709,7 @@ int urlmatch_config_entry(const char *var, const char *value, struct url_info norm_info; config_url = xmemdupz(key, dot - key); - norm_url = url_normalize_1(config_url, &norm_info, 1); + norm_url = url_normalize_1(config_url, &norm_info, true); if (norm_url) retval = match_urls(url, &norm_info, &matched); else if (collect->fallback_match_fn) diff --git a/urlmatch.h b/urlmatch.h index 6b3ce42858..db1a335e72 100644 --- a/urlmatch.h +++ b/urlmatch.h @@ -37,6 +37,18 @@ struct url_info { char *url_normalize(const char *, struct url_info *); char *url_parse(const char *, struct url_info *); +/* + * Like url_normalize(), but also allows '*' glob characters in the host + * portion. Use this when normalizing URL patterns from user configuration. + * + * Note that '*' is a valid path character per RFC 3986 (as a sub-delim), + * so glob patterns using '*' in the path are also accepted. + * + * Returns a newly allocated normalized string and fills out_info if + * non-NULL, or NULL if the pattern is invalid. + */ +char *url_normalize_pattern(const char *url, struct url_info *out_info); + struct urlmatch_item { size_t hostmatch_len; size_t pathmatch_len;