diff --git a/Documentation/config/promisor.adoc b/Documentation/config/promisor.adoc index b0fa43b839..605473c82f 100644 --- a/Documentation/config/promisor.adoc +++ b/Documentation/config/promisor.adoc @@ -51,6 +51,82 @@ promisor.acceptFromServer:: to "fetch" and "clone" requests from the client. Name and URL comparisons are case sensitive. See linkgit:gitprotocol-v2[5]. +promisor.acceptFromServerUrl:: + A glob pattern to specify which server-advertised URLs a + client is allowed to act on. When a URL matches, the client + will accept the advertised remote as a promisor remote and may + automatically accept field updates (such as authentication + tokens) from the server, even if `promisor.acceptFromServer` + is set to `none` (the default). ++ +This option can appear multiple times in config files. An advertised +URL will be accepted if it matches _ANY_ glob pattern specified by +this option in _ANY_ config file read by Git. ++ +When both `promisor.acceptFromServer` and `promisor.acceptFromServerUrl` +are set, `promisor.acceptFromServerUrl` is consulted first and takes +precedence: if a matching pattern leads to acceptance (by accepting +field updates for a known remote whose URL matches both the local +configuration and the allowlist), the advertised remote is accepted +regardless of the `promisor.acceptFromServer` setting. If no pattern +in `promisor.acceptFromServerUrl` triggers acceptance, the decision +is left to `promisor.acceptFromServer`. ++ +Note however that, even when an advertised URL matches a pattern in +`promisor.acceptFromServerUrl`, an already-existing remote on the +client whose name matches the advertised name but whose configured URL +differs from the advertised one will _NOT_ be accepted through +`promisor.acceptFromServerUrl`. This prevents a server from silently +re-pointing an existing client-side remote at a different URL. (Such a +remote may still be accepted through `promisor.acceptFromServer=all` +or `=knownName`, which have their own, looser semantics; see the +documentation of that option.) ++ +Be _VERY_ careful with these patterns: `*` matches any sequence of +characters within the 'host' and 'path' parts of a URL (but cannot +cross part boundaries). An overly broad pattern is a major security +risk, as a matching URL allows a server to update fields (such as +authentication tokens) on known remotes without further confirmation. +To minimize security risks, follow these guidelines: ++ +-- +1. Start with a secure protocol scheme, like `https://` or `ssh://`. ++ +2. Only allow domain names or paths where you control and trust _ALL_ + the content. Be especially careful with shared hosting platforms + like `github.com` or `gitlab.com`. A broad pattern like + `https://gitlab.com/*` is dangerous because it trusts every + repository on the entire platform. Always restrict such patterns to + your specific organization or namespace (e.g., + `https://gitlab.com/your-org/*`). ++ +3. Never use globs at the end of domain names. For example, + `https://cdn.your-org.com/*` might be safe, but + `https://cdn.your-org.com*/*` is a major security risk because + the latter matches `https://cdn.your-org.com.hacker.net/repo`. ++ +4. Be careful using globs at the beginning of domain names. While the + code ensures a `*` in the host cannot cross into the path, a + pattern like `https://*.example.com/*` will still match any + subdomain. This is extremely dangerous on shared hosting platforms + (e.g., `https://*.github.io/*` trusts every user's site on the + entire platform). +-- ++ +Before matching, both the advertised URL and the pattern are +normalized: the scheme and host are lowercased, percent-encoded +characters are decoded where possible, and path segments like `..` +are resolved. The port must also match exactly (e.g., +`https://example.com:8080/*` will not match a URL advertised on +port 9999). The username and password components of the URL are +ignored during matching. Note that embedding credentials in URLs is +discouraged. Passing authentication tokens via the `token` field of +the `promisor-remote` capability is strongly preferred. ++ +For the security implications of accepting a promisor remote, see the +documentation of `promisor.acceptFromServer`. For details on the +protocol, see linkgit:gitprotocol-v2[5]. + promisor.checkFields:: A comma or space separated list of additional remote related field names. A client checks if the values of these fields diff --git a/Documentation/gitprotocol-v2.adoc b/Documentation/gitprotocol-v2.adoc index befa697d21..2beb70595f 100644 --- a/Documentation/gitprotocol-v2.adoc +++ b/Documentation/gitprotocol-v2.adoc @@ -866,10 +866,11 @@ the server advertised, the client shouldn't advertise the On the server side, the "promisor.advertise" and "promisor.sendFields" configuration options can be used to control what it advertises. On -the client side, the "promisor.acceptFromServer" configuration option -can be used to control what it accepts, and the "promisor.storeFields" -option, to control what it stores. See the documentation of these -configuration options in linkgit:git-config[1] for more information. +the client side, the "promisor.acceptFromServer" and +"promisor.acceptFromServerUrl" configuration options can be used to +control what it accepts, and the "promisor.storeFields" option, to +control what it stores. See the documentation of these configuration +options in linkgit:git-config[1] for more information. Note that in the future it would be nice if the "promisor-remote" protocol capability could be used by the server, when responding to diff --git a/promisor-remote.c b/promisor-remote.c index 8d4f6e0a72..04a5bb9939 100644 --- a/promisor-remote.c +++ b/promisor-remote.c @@ -14,6 +14,7 @@ #include "url.h" #include "urlmatch.h" #include "version.h" +#include "wildmatch.h" struct promisor_remote_config { struct promisor_remote *promisors; @@ -742,8 +743,79 @@ static void load_accept_from_server_url(struct repository *repo, } } +static bool match_pattern_url(const char *pat, size_t pat_len, + const char *url, size_t url_len) +{ + char *p_str = xstrndup(pat, pat_len); + char *u_str = xstrndup(url, url_len); + bool res = !wildmatch(p_str, u_str, 0); + + free(p_str); + free(u_str); + + return res; +} + +static bool match_one_url(const struct url_info *pi, const struct url_info *ui) +{ + const char *pat = pi->url; + const char *url = ui->url; + + /* + * Schemes must match exactly. They are case-folded by + * url_normalize(), so strncmp() suffices. + */ + if (pi->scheme_len != ui->scheme_len || strncmp(pat, url, pi->scheme_len)) + return false; + + /* + * Ports must match exactly. url_normalize() strips default + * ports (like 443 for https), so length and content + * comparisons are sufficient. + */ + if (pi->port_len != ui->port_len || + strncmp(pat + pi->port_off, url + ui->port_off, pi->port_len)) + return false; + + /* + * Match host and path separately to prevent a '*' in the host + * portion of the pattern from matching across the '/' + * boundary into the path. + */ + + return match_pattern_url(pat + pi->host_off, pi->host_len, + url + ui->host_off, ui->host_len) && + match_pattern_url(pat + pi->path_off, pi->path_len, + url + ui->path_off, ui->path_len); +} + +static struct allowed_url *url_matches_accept_list( + struct string_list *accept_urls, const char *url) +{ + struct string_list_item *item; + struct url_info url_info; + + url_info.url = url_normalize(url, &url_info); + + if (!url_info.url) + return NULL; + + for_each_string_list_item(item, accept_urls) { + struct allowed_url *allowed = item->util; + + if (match_one_url(&allowed->pattern_info, &url_info)) { + free(url_info.url); + return allowed; + } + } + + free(url_info.url); + return NULL; +} + static int should_accept_remote(enum accept_promisor accept, struct promisor_info *advertised, + struct string_list *accept_urls, struct string_list *config_info) { struct promisor_info *p; @@ -756,23 +828,27 @@ static int should_accept_remote(enum accept_promisor accept, "this remote should have been rejected earlier", remote_name); - if (accept == ACCEPT_ALL) - return all_fields_match(advertised, config_info, NULL); - /* Get config info for that promisor remote */ item = string_list_lookup(config_info, remote_name); - if (!item) + if (!item) { /* We don't know about that remote */ + if (accept == ACCEPT_ALL) + return all_fields_match(advertised, config_info, NULL); return 0; + } p = item->util; - if (accept == ACCEPT_KNOWN_NAME) + /* Known remote in the allowlist? */ + if (!strcmp(p->url, remote_url) && url_matches_accept_list(accept_urls, remote_url)) return all_fields_match(advertised, config_info, p); - if (accept != ACCEPT_KNOWN_URL) - BUG("Unhandled 'enum accept_promisor' value '%d'", accept); + if (accept == ACCEPT_ALL) + return all_fields_match(advertised, config_info, NULL); + + if (accept == ACCEPT_KNOWN_NAME) + return all_fields_match(advertised, config_info, p); if (strcmp(p->url, remote_url)) { warning(_("known remote named '%s' but with URL '%s' instead of '%s', " @@ -781,7 +857,13 @@ static int should_accept_remote(enum accept_promisor accept, return 0; } - return all_fields_match(advertised, config_info, p); + if (accept == ACCEPT_KNOWN_URL) + return all_fields_match(advertised, config_info, p); + + if (accept != ACCEPT_NONE) + BUG("Unhandled 'enum accept_promisor' value '%d'", accept); + + return 0; } static int skip_field_name_prefix(const char *elem, const char *field_name, const char **value) @@ -991,7 +1073,7 @@ static void filter_promisor_remote(struct repository *repo, /* Load and validate the acceptFromServerUrl config */ load_accept_from_server_url(repo, &accept_urls); - if (accept == ACCEPT_NONE) + if (accept == ACCEPT_NONE && !accept_urls.nr) return; /* Parse remote info received */ @@ -1011,7 +1093,7 @@ static void filter_promisor_remote(struct repository *repo, string_list_sort(&config_info); } - if (should_accept_remote(accept, advertised, &config_info)) { + if (should_accept_remote(accept, advertised, &accept_urls, &config_info)) { if (!store_info) store_info = store_info_new(repo); if (promisor_store_advertised_fields(advertised, store_info)) diff --git a/t/t5710-promisor-remote-capability.sh b/t/t5710-promisor-remote-capability.sh index 3b39505380..0659b2ac15 100755 --- a/t/t5710-promisor-remote-capability.sh +++ b/t/t5710-promisor-remote-capability.sh @@ -387,6 +387,77 @@ test_expect_success "clone with 'KnownUrl' and empty url, so not advertised" ' check_missing_objects server 1 "$oid" ' +test_expect_success "clone with 'None' but URL allowlisted" ' + git -C server config promisor.advertise true && + test_when_finished "rm -rf client" && + + GIT_NO_LAZY_FETCH=0 git clone -c remote.lop.promisor=true \ + -c remote.lop.fetch="+refs/heads/*:refs/remotes/lop/*" \ + -c remote.lop.url="$TRASH_DIRECTORY_URL/lop" \ + -c promisor.acceptfromserver=None \ + -c promisor.acceptFromServerUrl="$ENCODED_TRASH_DIRECTORY_URL/*" \ + --no-local --filter="blob:limit=5k" server client && + + # Check that the largest object is still missing on the server + check_missing_objects server 1 "$oid" +' + +test_expect_success "clone with 'None' but URL not in allowlist" ' + git -C server config promisor.advertise true && + test_when_finished "rm -rf client" && + + GIT_NO_LAZY_FETCH=0 git clone -c remote.lop.promisor=true \ + -c remote.lop.fetch="+refs/heads/*:refs/remotes/lop/*" \ + -c remote.lop.url="$TRASH_DIRECTORY_URL/lop" \ + -c promisor.acceptfromserver=None \ + -c promisor.acceptFromServerUrl="https://example.com/*" \ + --no-local --filter="blob:limit=5k" server client && + + # Check that the largest object is not missing on the server + check_missing_objects server 0 "" && + + # Reinitialize server so that the largest object is missing again + initialize_server 1 "$oid" +' + +test_expect_success "clone with 'None' but URL allowlisted in one pattern out of two" ' + git -C server config promisor.advertise true && + test_when_finished "rm -rf client" && + + GIT_NO_LAZY_FETCH=0 git clone -c remote.lop.promisor=true \ + -c remote.lop.fetch="+refs/heads/*:refs/remotes/lop/*" \ + -c remote.lop.url="$TRASH_DIRECTORY_URL/lop" \ + -c promisor.acceptfromserver=None \ + -c promisor.acceptFromServerUrl="https://example.com/*" \ + -c promisor.acceptFromServerUrl="$ENCODED_TRASH_DIRECTORY_URL/*" \ + --no-local --filter="blob:limit=5k" server client && + + # Check that the largest object is still missing on the server + check_missing_objects server 1 "$oid" +' + +test_expect_success "clone with 'None', URL allowlisted, but client has different URL" ' + git -C server config promisor.advertise true && + test_when_finished "rm -rf client" && + + # The client configures "lop" with a different URL (serverTwo) than + # what the server advertises (lop). Even though the advertised URL + # matches the allowlist, the remote is rejected because the + # configured URL does not match the advertised one. + GIT_NO_LAZY_FETCH=0 git clone -c remote.lop.promisor=true \ + -c remote.lop.fetch="+refs/heads/*:refs/remotes/lop/*" \ + -c remote.lop.url="$TRASH_DIRECTORY_URL/serverTwo" \ + -c promisor.acceptfromserver=None \ + -c promisor.acceptFromServerUrl="$ENCODED_TRASH_DIRECTORY_URL/*" \ + --no-local --filter="blob:limit=5k" server client && + + # Check that the largest object is not missing on the server + check_missing_objects server 0 "" && + + # Reinitialize server so that the largest object is missing again + initialize_server 1 "$oid" +' + test_expect_success "clone with invalid promisor.acceptFromServerUrl" ' git -C server config promisor.advertise true && test_when_finished "rm -rf client" &&