From b4e8f60a3c78477e1f28b052cd740ac4a43741d5 Mon Sep 17 00:00:00 2001 From: Derrick Stolee Date: Thu, 22 Jan 2026 16:05:58 +0000 Subject: [PATCH 01/24] revision: add --maximal-only option When inspecting a range of commits from some set of starting references, it is sometimes useful to learn which commits are not reachable from any other commits in the selected range. One such application is in the creation of a sequence of bundles for the bundle URI feature. Creating a stack of bundles representing different slices of time includes defining which references to include. If all references are used, then this may be overwhelming or redundant. Instead, selecting commits that are maximal to the range could help defining a smaller reference set to use in the bundle header. Add a new '--maximal-only' option to restrict the output of a revision range to be only the commits that are not reachable from any other commit in the range, based on the reachability definition of the walk. This is accomplished by adding a new 28th bit flag, CHILD_VISITED, that is set as we walk. This does extend the bit range in object.h, but using an earlier bit may collide with another feature. The tests demonstrate the behavior of the feature with a positive-only range, ranges with negative references, and walk-modifying flags like --first-parent and --exclude-first-parent-only. Since the --boundary option would not increase any results when used with the --maximal-only option, mark them as incompatible. Signed-off-by: Derrick Stolee Signed-off-by: Junio C Hamano --- Documentation/rev-list-options.adoc | 4 ++ object.h | 4 +- revision.c | 12 ++++- revision.h | 5 +- t/t6000-rev-list-misc.sh | 15 ++++++ t/t6600-test-reach.sh | 75 +++++++++++++++++++++++++++++ 6 files changed, 110 insertions(+), 5 deletions(-) diff --git a/Documentation/rev-list-options.adoc b/Documentation/rev-list-options.adoc index 453ec590571ffc..a39cf88bbcfaaa 100644 --- a/Documentation/rev-list-options.adoc +++ b/Documentation/rev-list-options.adoc @@ -148,6 +148,10 @@ endif::git-log[] from the point where it diverged from the remote branch, given that arbitrary merges can be valid topic branch changes. +`--maximal-only`:: + Restrict the output commits to be those that are not reachable + from any other commits in the revision range. + `--not`:: Reverses the meaning of the '{caret}' prefix (or lack thereof) for all following revision specifiers, up to the next `--not`. diff --git a/object.h b/object.h index 4bca957b8dcbd6..dfe7a1f0ea29da 100644 --- a/object.h +++ b/object.h @@ -64,7 +64,7 @@ void object_array_init(struct object_array *array); /* * object flag allocation: - * revision.h: 0---------10 15 23------27 + * revision.h: 0---------10 15 23--------28 * fetch-pack.c: 01 67 * negotiator/default.c: 2--5 * walker.c: 0-2 @@ -86,7 +86,7 @@ void object_array_init(struct object_array *array); * builtin/unpack-objects.c: 2021 * pack-bitmap.h: 2122 */ -#define FLAG_BITS 28 +#define FLAG_BITS 29 #define TYPE_BITS 3 diff --git a/revision.c b/revision.c index 9b131670f79b96..6ca4b9dfcd54b9 100644 --- a/revision.c +++ b/revision.c @@ -1150,7 +1150,8 @@ static int process_parents(struct rev_info *revs, struct commit *commit, struct commit *p = parent->item; parent = parent->next; if (p) - p->object.flags |= UNINTERESTING; + p->object.flags |= UNINTERESTING | + CHILD_VISITED; if (repo_parse_commit_gently(revs->repo, p, 1) < 0) continue; if (p->parents) @@ -1204,7 +1205,7 @@ static int process_parents(struct rev_info *revs, struct commit *commit, if (!*slot) *slot = *revision_sources_at(revs->sources, commit); } - p->object.flags |= pass_flags; + p->object.flags |= pass_flags | CHILD_VISITED; if (!(p->object.flags & SEEN)) { p->object.flags |= (SEEN | NOT_USER_GIVEN); if (list) @@ -2377,6 +2378,8 @@ static int handle_revision_opt(struct rev_info *revs, int argc, const char **arg } else if ((argcount = parse_long_opt("until", argv, &optarg))) { revs->min_age = approxidate(optarg); return argcount; + } else if (!strcmp(arg, "--maximal-only")) { + revs->maximal_only = 1; } else if (!strcmp(arg, "--first-parent")) { revs->first_parent_only = 1; } else if (!strcmp(arg, "--exclude-first-parent-only")) { @@ -3147,6 +3150,9 @@ int setup_revisions(int argc, const char **argv, struct rev_info *revs, struct s !!revs->reverse, "--reverse", !!revs->reflog_info, "--walk-reflogs"); + die_for_incompatible_opt2(!!revs->boundary, "--boundary", + !!revs->maximal_only, "--maximal-only"); + if (revs->no_walk && revs->graph) die(_("options '%s' and '%s' cannot be used together"), "--no-walk", "--graph"); if (!revs->reflog_info && revs->grep_filter.use_reflog_filter) @@ -4125,6 +4131,8 @@ enum commit_action get_commit_action(struct rev_info *revs, struct commit *commi { if (commit->object.flags & SHOWN) return commit_ignore; + if (revs->maximal_only && (commit->object.flags & CHILD_VISITED)) + return commit_ignore; if (revs->unpacked && has_object_pack(revs->repo, &commit->object.oid)) return commit_ignore; if (revs->no_kept_objects) { diff --git a/revision.h b/revision.h index b36acfc2d9f61d..69242ecb189a52 100644 --- a/revision.h +++ b/revision.h @@ -52,7 +52,9 @@ #define NOT_USER_GIVEN (1u<<25) #define TRACK_LINEAR (1u<<26) #define ANCESTRY_PATH (1u<<27) -#define ALL_REV_FLAGS (((1u<<11)-1) | NOT_USER_GIVEN | TRACK_LINEAR | PULL_MERGE) +#define CHILD_VISITED (1u<<28) +#define ALL_REV_FLAGS (((1u<<11)-1) | NOT_USER_GIVEN | TRACK_LINEAR \ + | PULL_MERGE | CHILD_VISITED) #define DECORATE_SHORT_REFS 1 #define DECORATE_FULL_REFS 2 @@ -189,6 +191,7 @@ struct rev_info { left_right:1, left_only:1, right_only:1, + maximal_only:1, rewrite_parents:1, print_parents:1, show_decorations:1, diff --git a/t/t6000-rev-list-misc.sh b/t/t6000-rev-list-misc.sh index fec16448cfddb8..d0a2a866100d56 100755 --- a/t/t6000-rev-list-misc.sh +++ b/t/t6000-rev-list-misc.sh @@ -248,4 +248,19 @@ test_expect_success 'rev-list -z --boundary' ' test_cmp expect actual ' +test_expect_success 'rev-list --boundary incompatible with --maximal-only' ' + test_when_finished rm -rf repo && + + git init repo && + test_commit -C repo 1 && + test_commit -C repo 2 && + + oid1=$(git -C repo rev-parse HEAD~) && + oid2=$(git -C repo rev-parse HEAD) && + + test_must_fail git -C repo rev-list --boundary --maximal-only \ + HEAD~1..HEAD 2>err && + test_grep "cannot be used together" err +' + test_done diff --git a/t/t6600-test-reach.sh b/t/t6600-test-reach.sh index 6638d1aa1dcebe..2613075894282d 100755 --- a/t/t6600-test-reach.sh +++ b/t/t6600-test-reach.sh @@ -762,4 +762,79 @@ test_expect_success 'for-each-ref is-base: --sort' ' --sort=refname --sort=-is-base:commit-2-3 ' +test_expect_success 'rev-list --maximal-only (all positive)' ' + # Only one maximal. + cat >input <<-\EOF && + refs/heads/commit-1-1 + refs/heads/commit-4-2 + refs/heads/commit-4-4 + refs/heads/commit-8-4 + EOF + + cat >expect <<-EOF && + $(git rev-parse refs/heads/commit-8-4) + EOF + run_all_modes git rev-list --maximal-only --stdin && + + # All maximal. + cat >input <<-\EOF && + refs/heads/commit-5-2 + refs/heads/commit-4-3 + refs/heads/commit-3-4 + refs/heads/commit-2-5 + EOF + + cat >expect <<-EOF && + $(git rev-parse refs/heads/commit-5-2) + $(git rev-parse refs/heads/commit-4-3) + $(git rev-parse refs/heads/commit-3-4) + $(git rev-parse refs/heads/commit-2-5) + EOF + run_all_modes git rev-list --maximal-only --stdin && + + # Mix of both. + cat >input <<-\EOF && + refs/heads/commit-5-2 + refs/heads/commit-3-2 + refs/heads/commit-2-5 + EOF + + cat >expect <<-EOF && + $(git rev-parse refs/heads/commit-5-2) + $(git rev-parse refs/heads/commit-2-5) + EOF + run_all_modes git rev-list --maximal-only --stdin +' + +test_expect_success 'rev-list --maximal-only (range)' ' + cat >input <<-\EOF && + refs/heads/commit-1-1 + refs/heads/commit-2-5 + refs/heads/commit-6-4 + ^refs/heads/commit-4-5 + EOF + + cat >expect <<-EOF && + $(git rev-parse refs/heads/commit-6-4) + EOF + run_all_modes git rev-list --maximal-only --stdin && + + # first-parent changes reachability: the first parent + # reduces the second coordinate to 1 before reducing the + # first coordinate. + cat >input <<-\EOF && + refs/heads/commit-1-1 + refs/heads/commit-2-5 + refs/heads/commit-6-4 + ^refs/heads/commit-4-5 + EOF + + cat >expect <<-EOF && + $(git rev-parse refs/heads/commit-6-4) + $(git rev-parse refs/heads/commit-2-5) + EOF + run_all_modes git rev-list --maximal-only --stdin \ + --first-parent --exclude-first-parent-only +' + test_done From bfc1b34859c973127cdef0cfba537c25ee39f763 Mon Sep 17 00:00:00 2001 From: "D. Ben Knoble" Date: Sat, 7 Feb 2026 16:59:16 -0500 Subject: [PATCH 02/24] completion: add stash import, export These newer commands lack completion; implement basic support for options and arguments. Signed-off-by: D. Ben Knoble Signed-off-by: Junio C Hamano --- contrib/completion/git-completion.bash | 10 ++++++++-- 1 file changed, 8 insertions(+), 2 deletions(-) diff --git a/contrib/completion/git-completion.bash b/contrib/completion/git-completion.bash index 538dff1ee5cebe..a8e7c6ddbfb2b1 100644 --- a/contrib/completion/git-completion.bash +++ b/contrib/completion/git-completion.bash @@ -3465,7 +3465,7 @@ _git_sparse_checkout () _git_stash () { - local subcommands='push list show apply clear drop pop create branch' + local subcommands='push list show apply clear drop pop create branch import export' local subcommand="$(__git_find_on_cmdline "$subcommands save")" if [ -z "$subcommand" ]; then @@ -3491,6 +3491,9 @@ _git_stash () show,--*) __gitcomp_builtin stash_show "$__git_diff_common_options" ;; + export,--*) + __gitcomp_builtin stash_export "--print --to-ref" + ;; *,--*) __gitcomp_builtin "stash_$subcommand" ;; @@ -3502,7 +3505,10 @@ _git_stash () | sed -n -e 's/:.*//p')" fi ;; - show,*|apply,*|drop,*|pop,*) + import,*) + __git_complete_refs + ;; + show,*|apply,*|drop,*|pop,*|export,*) __gitcomp_nl "$(__git stash list \ | sed -n -e 's/:.*//p')" ;; From f4eff7116dabac7e4f5a6aad8a74feaebdcb8342 Mon Sep 17 00:00:00 2001 From: Patrick Steinhardt Date: Wed, 11 Feb 2026 13:44:59 +0100 Subject: [PATCH 03/24] builtin/pack-objects: don't fetch objects when merging packs The "--stdin-packs" option can be used to merge objects from multiple packfiles given via stdin into a new packfile. One big upside of this option is that we don't have to perform a complete rev walk to enumerate objects. Instead, we can simply enumerate all objects that are part of the specified packfiles, which can be significantly faster in very large repositories. There is one downside though: when we don't perform a rev walk we also don't have a good way to learn about the respective object's names. As a consequence, we cannot use the name hashes as a heuristic to get better delta selection. We try to offset this downside though by performing a localized rev walk: we queue all objects that we're about to repack as interesting, and all objects from excluded packfiles as uninteresting. We then perform a best-effort rev walk that allows us to fill in object names. There is one gotcha here though: when "--exclude-promisor-objects" has not been given we will perform backfill fetches for any promised objects that are missing. This used to not be an issue though as this option was mutually exclusive with "--stdin-packs". But that has changed recently, and starting with dcc9c7ef47 (builtin/repack: handle promisor packs with geometric repacking, 2026-01-05) we will now repack promisor packs during geometric compaction. The consequence is that a geometric repack may now perform a bunch of backfill fetches. We of course cannot pass "--exclude-promisor-objects" to fix this issue -- after all, the whole intent is to repack objects part of a promisor pack. But arguably we don't have to: the rev walk is intended as best effort, and we already configure it to ignore missing links to other objects. So we can adapt the walk to unconditionally disable fetching any missing objects. Do so and add a test that verifies we don't backfill any objects. Reported-by: Lukas Wanko Signed-off-by: Patrick Steinhardt Signed-off-by: Junio C Hamano --- builtin/pack-objects.c | 10 ++++++++++ t/t5331-pack-objects-stdin.sh | 18 ++++++++++++++++++ 2 files changed, 28 insertions(+) diff --git a/builtin/pack-objects.c b/builtin/pack-objects.c index 5846b6a293a27b..0a4a358096fed4 100644 --- a/builtin/pack-objects.c +++ b/builtin/pack-objects.c @@ -3927,8 +3927,16 @@ static void add_unreachable_loose_objects(struct rev_info *revs); static void read_stdin_packs(enum stdin_packs_mode mode, int rev_list_unpacked) { + int prev_fetch_if_missing = fetch_if_missing; struct rev_info revs; + /* + * The revision walk may hit objects that are promised, only. As the + * walk is best-effort though we don't want to perform backfill fetches + * for them. + */ + fetch_if_missing = 0; + repo_init_revisions(the_repository, &revs, NULL); /* * Use a revision walk to fill in the namehash of objects in the include @@ -3964,6 +3972,8 @@ static void read_stdin_packs(enum stdin_packs_mode mode, int rev_list_unpacked) stdin_packs_found_nr); trace2_data_intmax("pack-objects", the_repository, "stdin_packs_hints", stdin_packs_hints_nr); + + fetch_if_missing = prev_fetch_if_missing; } static void add_cruft_object_entry(const struct object_id *oid, enum object_type type, diff --git a/t/t5331-pack-objects-stdin.sh b/t/t5331-pack-objects-stdin.sh index cd949025b982a4..c3bbc76b0de7a0 100755 --- a/t/t5331-pack-objects-stdin.sh +++ b/t/t5331-pack-objects-stdin.sh @@ -358,6 +358,24 @@ test_expect_success '--stdin-packs with promisors' ' ) ' +test_expect_success '--stdin-packs does not perform backfill fetch' ' + test_when_finished "rm -rf remote client" && + + git init remote && + test_commit_bulk -C remote 10 && + git -C remote config set --local uploadpack.allowfilter 1 && + git -C remote config set --local uploadpack.allowanysha1inwant 1 && + + git clone --filter=tree:0 "file://$(pwd)/remote" client && + ( + cd client && + ls .git/objects/pack/*.promisor | sed "s|.*/||; s/\.promisor$/.pack/" >packs && + test_line_count -gt 1 packs && + GIT_TRACE2_EVENT="$(pwd)/event.log" git pack-objects --stdin-packs pack Date: Thu, 12 Feb 2026 23:28:23 +0100 Subject: [PATCH 04/24] doc: add caveat about round-tripping format-patch MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit git-format-patch(1) and git-am(1) deal with formatting commits as patches and applying them, respectively. Naturally they use a few delimiters to mark where the commit message ends. This can lead to surprising behavior when these delimiters are used in the commit message itself. git-format-patch(1) will accept any commit message and not warn or error about these delimiters being used.[1] Especially problematic is the presence of unindented diffs in the commit message; the patch machinery will naturally (since the commit message has ended) try to apply that diff and everything after it.[2] It is unclear whether any commands in this chain will learn to warn about this. One concern could be that users have learned to rely on the three-dash line rule to conveniently add extra-commit message information in the commit message, knowing that git-am(1) will ignore it.[4] All of this is covered already, technically. However, we should spell out the implications. † 1: There is also git-commit(1) to consider. However, making that command warn or error out over such delimiters would be disruptive to all Git users who never use email in their workflow. † 2: Recently patch(1) caused this issue for a project, but it was noted that git-am(1) has the same behavior[3] † 3: https://github.com/i3/i3/pull/6564#issuecomment-3858381425 † 4: https://lore.kernel.org/git/xmqqldh4b5y2.fsf@gitster.g/ https://lore.kernel.org/git/V3_format-patch_caveats.354@msgid.xyz/ Reported-by: Matthias Beyer Reported-by: Christoph Anton Mitterer Reported-by: Matheus Tavares Reported-by: Chris Packham Helped-by: Jakob Haufe Helped-by: Phillip Wood Signed-off-by: Kristoffer Haugsbakk Signed-off-by: Junio C Hamano --- Documentation/format-patch-caveats.adoc | 33 +++++++++++++++++++ .../format-patch-end-of-commit-message.adoc | 8 +++++ Documentation/git-am.adoc | 19 +++++++---- Documentation/git-format-patch.adoc | 4 +++ Documentation/git-send-email.adoc | 5 +++ 5 files changed, 62 insertions(+), 7 deletions(-) create mode 100644 Documentation/format-patch-caveats.adoc create mode 100644 Documentation/format-patch-end-of-commit-message.adoc diff --git a/Documentation/format-patch-caveats.adoc b/Documentation/format-patch-caveats.adoc new file mode 100644 index 00000000000000..807a65b885b09a --- /dev/null +++ b/Documentation/format-patch-caveats.adoc @@ -0,0 +1,33 @@ +The output from linkgit:git-format-patch[1] can lead to a different +commit message when applied with linkgit:git-am[1]. The patch that is +applied may also be different from the one that was generated, or patch +application may fail outright. +ifdef::git-am[] +See the <> section above for the syntactic rules. +endif::git-am[] + +ifndef::git-am[] +include::format-patch-end-of-commit-message.adoc[] +endif::git-am[] + +Note that this is especially problematic for unindented diffs that occur +in the commit message; the diff in the commit message might get applied +along with the patch section, or the patch application machinery might +trip up because the patch target doesn't apply. This could for example +be caused by a diff in a Markdown code block. + +The solution for this is to indent the diff or other text that could +cause problems. + +This loss of fidelity might be simple to notice if you are applying +patches directly from a mailbox. However, changes originating from Git +could be applied in bulk, in which case this would be much harder to +notice. This could for example be a Linux distribution which uses patch +files to apply changes on top of the commits from the upstream +repositories. This goes to show that this behavior does not only impact +email workflows. + +Given these limitations, one might be tempted to use a general-purpose +utility like patch(1) instead. However, patch(1) will not only look for +unindented diffs (like linkgit:git-am[1]) but will try to apply indented +diffs as well. diff --git a/Documentation/format-patch-end-of-commit-message.adoc b/Documentation/format-patch-end-of-commit-message.adoc new file mode 100644 index 00000000000000..ec1ef79f5e3308 --- /dev/null +++ b/Documentation/format-patch-end-of-commit-message.adoc @@ -0,0 +1,8 @@ +Any line that is of the form: + +* three-dashes and end-of-line, or +* a line that begins with "diff -", or +* a line that begins with "Index: " + +is taken as the beginning of a patch, and the commit log message +is terminated before the first occurrence of such a line. diff --git a/Documentation/git-am.adoc b/Documentation/git-am.adoc index 0c94776e296981..972398d4575c85 100644 --- a/Documentation/git-am.adoc +++ b/Documentation/git-am.adoc @@ -233,6 +233,7 @@ applying. create an empty commit with the contents of the e-mail message as its log message. +[[discussion]] DISCUSSION ---------- @@ -252,14 +253,11 @@ where the patch begins. Excess whitespace at the end of each line is automatically stripped. The patch is expected to be inline, directly following the -message. Any line that is of the form: +message. +include::format-patch-end-of-commit-message.adoc[] -* three-dashes and end-of-line, or -* a line that begins with "diff -", or -* a line that begins with "Index: " - -is taken as the beginning of a patch, and the commit log message -is terminated before the first occurrence of such a line. +This means that the contents of the commit message can inadvertently +interrupt the processing (see the <> section below). When initially invoking `git am`, you give it the names of the mailboxes to process. Upon seeing the first patch that does not apply, it @@ -283,6 +281,13 @@ commits, like running 'git am' on the wrong branch or an error in the commits that is more easily fixed by changing the mailbox (e.g. errors in the "From:" lines). +[[caveats]] +CAVEATS +------- + +:git-am: 1 +include::format-patch-caveats.adoc[] + HOOKS ----- This command can run `applypatch-msg`, `pre-applypatch`, diff --git a/Documentation/git-format-patch.adoc b/Documentation/git-format-patch.adoc index 9a7807ca71a528..bac9b818f3b418 100644 --- a/Documentation/git-format-patch.adoc +++ b/Documentation/git-format-patch.adoc @@ -798,6 +798,10 @@ if they are part of the requested range. A simple "patch" does not include enough information for the receiving end to reproduce the same merge commit. +=== PATCH APPLICATION + +include::format-patch-caveats.adoc[] + SEE ALSO -------- linkgit:git-am[1], linkgit:git-send-email[1] diff --git a/Documentation/git-send-email.adoc b/Documentation/git-send-email.adoc index ebe8853e9f5da7..0b118df6498b0d 100644 --- a/Documentation/git-send-email.adoc +++ b/Documentation/git-send-email.adoc @@ -692,6 +692,11 @@ Links of a few such community maintained helpers are: - https://github.com/AdityaGarg8/git-credential-email[git-msgraph] (cross platform client that can send emails using the Microsoft Graph API) +CAVEATS +------- + +include::format-patch-caveats.adoc[] + SEE ALSO -------- linkgit:git-format-patch[1], linkgit:git-imap-send[1], mbox(5) From 88fb80c4b21b29653eac4b3da6746bb9a35596f7 Mon Sep 17 00:00:00 2001 From: Amisha Chhajed <136238836+amishhaa@users.noreply.github.com> Date: Fri, 13 Feb 2026 09:07:28 +0530 Subject: [PATCH 05/24] sparse-checkout: use string_list_sort_u sparse_checkout_list() uses string_list_sort and string_list_remove_duplicates instead of string_list_sort_u. use string_list_sort_u at that place. Signed-off-by: Amisha Chhajed <136238836+amishhaa@users.noreply.github.com> Signed-off-by: Junio C Hamano --- builtin/sparse-checkout.c | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/builtin/sparse-checkout.c b/builtin/sparse-checkout.c index cccf63033190d2..34e965bfa6f6c7 100644 --- a/builtin/sparse-checkout.c +++ b/builtin/sparse-checkout.c @@ -94,8 +94,7 @@ static int sparse_checkout_list(int argc, const char **argv, const char *prefix, string_list_append(&sl, pe->pattern + 1); } - string_list_sort(&sl); - string_list_remove_duplicates(&sl, 0); + string_list_sort_u(&sl, 0); for (i = 0; i < sl.nr; i++) { quote_c_style(sl.items[i].string, NULL, stdout, 0); From 046e1117d5d7ccb493ff8857876b05ac16453d43 Mon Sep 17 00:00:00 2001 From: Phillip Wood Date: Fri, 13 Feb 2026 14:34:48 +0000 Subject: [PATCH 06/24] templates: add .gitattributes entry for sample hooks The sample hooks are shell scripts but the filenames end with ".sample" so they need their own .gitattributes rule. Update our editorconfig settings to match the attributes as well. Signed-off-by: Phillip Wood Signed-off-by: Junio C Hamano --- .editorconfig | 2 +- .gitattributes | 1 + 2 files changed, 2 insertions(+), 1 deletion(-) diff --git a/.editorconfig b/.editorconfig index 2d3929b5916a14..6e4eaa8e955341 100644 --- a/.editorconfig +++ b/.editorconfig @@ -4,7 +4,7 @@ insert_final_newline = true # The settings for C (*.c and *.h) files are mirrored in .clang-format. Keep # them in sync. -[{*.{c,h,sh,bash,perl,pl,pm,txt,adoc},config.mak.*,Makefile}] +[{*.{c,h,sh,bash,perl,pl,pm,txt,adoc},config.mak.*,Makefile,templates/hooks/*.sample}] indent_style = tab tab_width = 8 diff --git a/.gitattributes b/.gitattributes index 38b1c52fe0e230..556322be01b4a8 100644 --- a/.gitattributes +++ b/.gitattributes @@ -18,3 +18,4 @@ CODE_OF_CONDUCT.md -whitespace /Documentation/user-manual.adoc conflict-marker-size=32 /t/t????-*.sh conflict-marker-size=32 /t/unit-tests/clar/test/expected/* whitespace=-blank-at-eof +/templates/hooks/*.sample whitespace=indent,trail,space,incomplete text eol=lf From 83804c361be1c1b8433b72b8e82f0ab6a476609d Mon Sep 17 00:00:00 2001 From: Phillip Wood Date: Fri, 13 Feb 2026 14:34:49 +0000 Subject: [PATCH 07/24] templates: detect commit messages containing diffs If the body of a commit message contains a diff that is not indented then "git am" will treat that diff as part of the patch rather than as part of the commit message. This allows it to apply email messages that were created by adding a commit message in front of a regular diff without adding the "---" separator used by "git format-patch". This often surprises users [1-4] so add a check to the sample "commit-msg" hook to reject messages that would confuse "git am". Even if a project does not use an email based workflow it is not uncommon for people to generate patches from it and apply them with "git am". Therefore it is still worth discouraging the creation of commit messages that would not be applied correctly. A further source of confusion when applying patches with "git am" is the "---" separator that is added by "git format patch". If a commit message body contains that line then it will be truncated by "git am". As this is often used by patch authors to add some commentary that they do not want to end up in the commit message when the patch is applied, the hook does not complain about the presence of "---" lines in the message. Detecting if the message contains a diff is complicated by the hook being passed the message before it is cleaned up so we need to ignore any diffs below the scissors line. There are also two possible config keys to check to find the comment character at the start of the scissors line. The first paragraph of the commit message becomes the email subject header which beings "Subject: " and so does not need to be checked. The trailing ".*" when matching commented lines ensures that if the comment string ends with a "$" it is not treated as an anchor. [1] https://lore.kernel.org/git/bcqvh7ahjjgzpgxwnr4kh3hfkksfruf54refyry3ha7qk7dldf@fij5calmscvm [2] https://lore.kernel.org/git/ca13705ae4817ffba16f97530637411b59c9eb19.camel@scientia.org/ [3] https://lore.kernel.org/git/d0b577825124ac684ab304d3a1395f3d2d0708e8.1662333027.git.matheus.bernardino@usp.br/ [4] https://lore.kernel.org/git/CAFOYHZC6Qd9wkoWPcTJDxAs9u=FGpHQTkjE-guhwkya0DRVA6g@mail.gmail.com/ Signed-off-by: Phillip Wood Signed-off-by: Junio C Hamano --- templates/hooks/commit-msg.sample | 54 +++++++++++++++++++++++++++++-- 1 file changed, 52 insertions(+), 2 deletions(-) diff --git a/templates/hooks/commit-msg.sample b/templates/hooks/commit-msg.sample index b58d1184a9d43a..f7458efe62f2c7 100755 --- a/templates/hooks/commit-msg.sample +++ b/templates/hooks/commit-msg.sample @@ -15,10 +15,60 @@ # SOB=$(git var GIT_AUTHOR_IDENT | sed -n 's/^\(.*>\).*$/Signed-off-by: \1/p') # grep -qs "^$SOB" "$1" || echo "$SOB" >> "$1" -# This example catches duplicate Signed-off-by lines. +# This example catches duplicate Signed-off-by lines and messages that +# would confuse 'git am'. + +ret=0 test "" = "$(grep '^Signed-off-by: ' "$1" | sort | uniq -c | sed -e '/^[ ]*1[ ]/d')" || { echo >&2 Duplicate Signed-off-by lines. - exit 1 + ret=1 } + +comment_re="$( + { + git config --get-regexp "^core\.comment(char|string)\$" || + echo '#' + } | sed -n -e ' + ${ + s/^[^ ]* // + s|[][*./\]|\\&|g + s/^auto$/[#;@!$%^&|:]/ + p + }' +)" +scissors_line="^${comment_re} -\{8,\} >8 -\{8,\}\$" +comment_line="^${comment_re}.*" +blank_line='^[ ]*$' +# Disallow lines starting with "diff -" or "Index: " in the body of the +# message. Stop looking if we see a scissors line. +line="$(sed -n -e " + # Skip comments and blank lines at the start of the file. + /${scissors_line}/q + /${comment_line}/d + /${blank_line}/d + # The first paragraph will become the subject header so + # does not need to be checked. + : subject + n + /${scissors_line}/q + /${blank_line}/!b subject + # Check the body of the message for problematic + # prefixes. + : body + n + /${scissors_line}/q + /${comment_line}/b body + /^diff -/{p;q;} + /^Index: /{p;q;} + b body + " "$1")" +if test -n "$line" +then + echo >&2 "Message contains a diff that will confuse 'git am'." + echo >&2 "To fix this indent the diff." + ret=1 +fi + +exit $ret From fd104ef15deb16fae9ed48ad548f1d7a40e17222 Mon Sep 17 00:00:00 2001 From: Matthew John Cheetham Date: Fri, 13 Feb 2026 19:54:55 +0000 Subject: [PATCH 08/24] trace2: add macOS process ancestry tracing In 353d3d77f4 (trace2: collect Windows-specific process information, 2019-02-22) Windows-specific process ancestry information was added as a data_json event to TRACE2. Furthermore in 2f732bf15e (tr2: log parent process name, 2021-07-21) similar functionality was added for Linux-based systems, using procfs. Teach Git to also log process ancestry on macOS using the sysctl with KERN_PROC to get process information (PPID and process name). Like the Linux implementation, we use the cmd_ancestry TRACE2 event rather than using a data_json event and creating another custom data point. Signed-off-by: Matthew John Cheetham Signed-off-by: Junio C Hamano --- compat/darwin/procinfo.c | 97 ++++++++++++++++++++++++++++++++++++++++ 1 file changed, 97 insertions(+) create mode 100644 compat/darwin/procinfo.c diff --git a/compat/darwin/procinfo.c b/compat/darwin/procinfo.c new file mode 100644 index 00000000000000..c8954f02d73787 --- /dev/null +++ b/compat/darwin/procinfo.c @@ -0,0 +1,97 @@ +#include "git-compat-util.h" +#include "strbuf.h" +#include "strvec.h" +#include "trace2.h" +#include + +/* + * An arbitrarily chosen value to limit the depth of the ancestor chain. + */ +#define NR_PIDS_LIMIT 10 + +/* + * Get the process name and parent PID for a given PID using sysctl(). + * Returns 0 on success, -1 on failure. + */ +static int get_proc_info(pid_t pid, struct strbuf *name, pid_t *ppid) +{ + int mib[4]; + struct kinfo_proc proc; + size_t size = sizeof(proc); + + mib[0] = CTL_KERN; + mib[1] = KERN_PROC; + mib[2] = KERN_PROC_PID; + mib[3] = pid; + + if (sysctl(mib, 4, &proc, &size, NULL, 0) < 0) + return -1; + + if (size == 0) + return -1; + + strbuf_addstr(name, proc.kp_proc.p_comm); + *ppid = proc.kp_eproc.e_ppid; + + return 0; +} + +/* + * Recursively push process names onto the ancestry array. + * We guard against cycles by limiting the depth to NR_PIDS_LIMIT. + */ +static void push_ancestry_name(struct strvec *names, pid_t pid, int depth) +{ + struct strbuf name = STRBUF_INIT; + pid_t ppid; + + if (depth >= NR_PIDS_LIMIT) + return; + + if (pid <= 0) + return; + + if (get_proc_info(pid, &name, &ppid) < 0) + goto cleanup; + + strvec_push(names, name.buf); + + /* + * Recurse to the parent process. Stop if ppid not valid + * or if we've reached ourselves (cycle). + */ + if (ppid && ppid != pid) + push_ancestry_name(names, ppid, depth + 1); + +cleanup: + strbuf_release(&name); +} + +void trace2_collect_process_info(enum trace2_process_info_reason reason) +{ + struct strvec names = STRVEC_INIT; + + if (!trace2_is_enabled()) + return; + + switch (reason) { + case TRACE2_PROCESS_INFO_STARTUP: + push_ancestry_name(&names, getppid(), 0); + if (names.nr) + trace2_cmd_ancestry(names.v); + + strvec_clear(&names); + break; + + case TRACE2_PROCESS_INFO_EXIT: + /* + * The Windows version of this calls its + * get_peak_memory_info() here. We may want to insert + * similar process-end statistics here in the future. + */ + break; + + default: + BUG("trace2_collect_process_info: unknown reason '%d'", reason); + } +} From 088aaf1d416e5ea1f6986d82df9ba63d7bf1197f Mon Sep 17 00:00:00 2001 From: Matthew John Cheetham Date: Fri, 13 Feb 2026 19:54:56 +0000 Subject: [PATCH 09/24] build: include procinfo.c impl for macOS Include an implementation of trace2_collect_process_info for macOS. Signed-off-by: Matthew John Cheetham Signed-off-by: Junio C Hamano --- config.mak.uname | 2 ++ contrib/buildsystems/CMakeLists.txt | 2 ++ meson.build | 2 ++ 3 files changed, 6 insertions(+) diff --git a/config.mak.uname b/config.mak.uname index 1691c6ae6e01e3..baa5018461c3db 100644 --- a/config.mak.uname +++ b/config.mak.uname @@ -148,6 +148,8 @@ ifeq ($(uname_S),Darwin) HAVE_NS_GET_EXECUTABLE_PATH = YesPlease CSPRNG_METHOD = arc4random USE_ENHANCED_BASIC_REGULAR_EXPRESSIONS = YesPlease + HAVE_PLATFORM_PROCINFO = YesPlease + COMPAT_OBJS += compat/darwin/procinfo.o # Workaround for `gettext` being keg-only and not even being linked via # `brew link --force gettext`, should be obsolete as of diff --git a/contrib/buildsystems/CMakeLists.txt b/contrib/buildsystems/CMakeLists.txt index edb0fc04ad7649..d489f0cadab4de 100644 --- a/contrib/buildsystems/CMakeLists.txt +++ b/contrib/buildsystems/CMakeLists.txt @@ -274,6 +274,8 @@ if(CMAKE_SYSTEM_NAME STREQUAL "Windows") elseif(CMAKE_SYSTEM_NAME STREQUAL "Linux") add_compile_definitions(PROCFS_EXECUTABLE_PATH="/proc/self/exe" HAVE_DEV_TTY ) list(APPEND compat_SOURCES unix-socket.c unix-stream-server.c compat/linux/procinfo.c) +elseif(CMAKE_SYSTEM_NAME STREQUAL "Darwin") + list(APPEND compat_SOURCES compat/darwin/procinfo.c) endif() if(CMAKE_SYSTEM_NAME STREQUAL "Windows") diff --git a/meson.build b/meson.build index 1f95a06edb7829..32d470e4f73739 100644 --- a/meson.build +++ b/meson.build @@ -1292,6 +1292,8 @@ if host_machine.system() == 'linux' libgit_sources += 'compat/linux/procinfo.c' elif host_machine.system() == 'windows' libgit_sources += 'compat/win32/trace2_win32_process_info.c' +elif host_machine.system() == 'darwin' + libgit_sources += 'compat/darwin/procinfo.c' else libgit_sources += 'compat/stub/procinfo.c' endif From c2a473b2b1b3b79e308a7cbed1eee46fb67aacd8 Mon Sep 17 00:00:00 2001 From: Matthew John Cheetham Date: Fri, 13 Feb 2026 19:54:57 +0000 Subject: [PATCH 10/24] trace2: refactor Windows process ancestry trace2 event In 353d3d77f4 (trace2: collect Windows-specific process information, 2019-02-22) we added process ancestry information for Windows to TRACE2 via a data_json event. It was only later in 2f732bf15e (tr2: log parent process name, 2021-07-21) that the specific cmd_ancestry event was added to TRACE2. In a future commit we will emit the ancestry information with the newer cmd_ancestry TRACE2 event. Right now, we rework this implementation of trace2_collect_process_info to separate the calculation of ancestors from building and emiting the JSON array via a data_json event. Signed-off-by: Matthew John Cheetham Signed-off-by: Junio C Hamano --- compat/win32/trace2_win32_process_info.c | 50 ++++++++++++------------ 1 file changed, 25 insertions(+), 25 deletions(-) diff --git a/compat/win32/trace2_win32_process_info.c b/compat/win32/trace2_win32_process_info.c index f147da706a240e..aceea054301501 100644 --- a/compat/win32/trace2_win32_process_info.c +++ b/compat/win32/trace2_win32_process_info.c @@ -3,6 +3,7 @@ #include "../../git-compat-util.h" #include "../../json-writer.h" #include "../../repository.h" +#include "../../strvec.h" #include "../../trace2.h" #include "lazyload.h" #include @@ -32,12 +33,7 @@ static int find_pid(DWORD pid, HANDLE hSnapshot, PROCESSENTRY32 *pe32) } /* - * Accumulate JSON array of our parent processes: - * [ - * exe-name-parent, - * exe-name-grand-parent, - * ... - * ] + * Accumulate array of our parent process names. * * Note: we only report the filename of the process executable; the * only way to get its full pathname is to use OpenProcess() @@ -73,7 +69,7 @@ static int find_pid(DWORD pid, HANDLE hSnapshot, PROCESSENTRY32 *pe32) * simple and avoid the alloc/realloc overhead. It is OK if we * truncate the search and return a partial answer. */ -static void get_processes(struct json_writer *jw, HANDLE hSnapshot) +static void get_processes(struct strvec *names, HANDLE hSnapshot) { PROCESSENTRY32 pe32; DWORD pid; @@ -82,19 +78,19 @@ static void get_processes(struct json_writer *jw, HANDLE hSnapshot) pid = GetCurrentProcessId(); while (find_pid(pid, hSnapshot, &pe32)) { - /* Only report parents. Omit self from the JSON output. */ + /* Only report parents. Omit self from the output. */ if (nr_pids) - jw_array_string(jw, pe32.szExeFile); + strvec_push(names, pe32.szExeFile); /* Check for cycle in snapshot. (Yes, it happened.) */ for (k = 0; k < nr_pids; k++) if (pid == pid_list[k]) { - jw_array_string(jw, "(cycle)"); + strvec_push(names, "(cycle)"); return; } if (nr_pids == NR_PIDS_LIMIT) { - jw_array_string(jw, "(truncated)"); + strvec_push(names, "(truncated)"); return; } @@ -105,24 +101,14 @@ static void get_processes(struct json_writer *jw, HANDLE hSnapshot) } /* - * Emit JSON data for the current and parent processes. Individual - * trace2 targets can decide how to actually print it. + * Collect the list of parent process names. */ -static void get_ancestry(void) +static void get_ancestry(struct strvec *names) { HANDLE hSnapshot = CreateToolhelp32Snapshot(TH32CS_SNAPPROCESS, 0); if (hSnapshot != INVALID_HANDLE_VALUE) { - struct json_writer jw = JSON_WRITER_INIT; - - jw_array_begin(&jw, 0); - get_processes(&jw, hSnapshot); - jw_end(&jw); - - trace2_data_json("process", the_repository, "windows/ancestry", - &jw); - - jw_release(&jw); + get_processes(names, hSnapshot); CloseHandle(hSnapshot); } } @@ -176,13 +162,27 @@ static void get_peak_memory_info(void) void trace2_collect_process_info(enum trace2_process_info_reason reason) { + struct strvec names = STRVEC_INIT; + if (!trace2_is_enabled()) return; switch (reason) { case TRACE2_PROCESS_INFO_STARTUP: get_is_being_debugged(); - get_ancestry(); + get_ancestry(&names); + if (names.nr) { + struct json_writer jw = JSON_WRITER_INIT; + jw_array_begin(&jw, 0); + for (size_t i = 0; i < names.nr; i++) + jw_array_string(&jw, names.v[i]); + jw_end(&jw); + trace2_data_json("process", the_repository, + "windows/ancestry", &jw); + jw_release(&jw); + } + + strvec_clear(&names); return; case TRACE2_PROCESS_INFO_EXIT: From d7301487a38766109589b02309dac1be6490fa95 Mon Sep 17 00:00:00 2001 From: Matthew John Cheetham Date: Fri, 13 Feb 2026 19:54:58 +0000 Subject: [PATCH 11/24] trace2: emit cmd_ancestry data for Windows Since 2f732bf15e (tr2: log parent process name, 2021-07-21) it is now now possible to emit a specific process ancestry event in TRACE2. We should emit the Windows process ancestry data with the correct event type. To not break existing consumers of the data_json "windows/ancestry" event, we continue to emit the ancestry data as a JSON event. Signed-off-by: Matthew John Cheetham Signed-off-by: Junio C Hamano --- compat/win32/trace2_win32_process_info.c | 8 ++++++++ 1 file changed, 8 insertions(+) diff --git a/compat/win32/trace2_win32_process_info.c b/compat/win32/trace2_win32_process_info.c index aceea054301501..6a6a396078899c 100644 --- a/compat/win32/trace2_win32_process_info.c +++ b/compat/win32/trace2_win32_process_info.c @@ -172,6 +172,11 @@ void trace2_collect_process_info(enum trace2_process_info_reason reason) get_is_being_debugged(); get_ancestry(&names); if (names.nr) { + /* + Emit the ancestry data as a data_json event to + maintain compatibility for consumers of the older + "windows/ancestry" event. + */ struct json_writer jw = JSON_WRITER_INIT; jw_array_begin(&jw, 0); for (size_t i = 0; i < names.nr; i++) @@ -180,6 +185,9 @@ void trace2_collect_process_info(enum trace2_process_info_reason reason) trace2_data_json("process", the_repository, "windows/ancestry", &jw); jw_release(&jw); + + /* Emit the ancestry data with the new event. */ + trace2_cmd_ancestry(names.v); } strvec_clear(&names); From b6a125b936bfb05de704243e1fa568d4a9ad9a11 Mon Sep 17 00:00:00 2001 From: Matthew John Cheetham Date: Fri, 13 Feb 2026 19:54:59 +0000 Subject: [PATCH 12/24] test-tool: extend trace2 helper with 400ancestry Add a new test helper "400ancestry" to the trace2 test-tool that spawns a child process with a controlled trace2 environment, capturing only the child's trace2 output (including cmd_ancestry events) in isolation. The helper clears all inherited GIT_TRACE2* variables in the child and enables only the requested target (normal, perf, or event), directing output to a specified file. This gives the test suite a reliable way to capture cmd_ancestry events: the child always sees "test-tool" as its immediate parent in the process ancestry, providing a predictable value to verify in tests. Signed-off-by: Matthew John Cheetham Signed-off-by: Junio C Hamano --- t/helper/test-trace2.c | 59 ++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 59 insertions(+) diff --git a/t/helper/test-trace2.c b/t/helper/test-trace2.c index 415df078c1638a..3b12f4173e4c09 100644 --- a/t/helper/test-trace2.c +++ b/t/helper/test-trace2.c @@ -466,6 +466,63 @@ static int ut_303redact_def_param(int argc, const char **argv) return 0; } +/* + * Run a child process with specific trace2 environment settings so that + * we can capture its trace2 output (including cmd_ancestry) in isolation. + * + * test-tool trace2 400ancestry [] + * + * is one of: normal, perf, event + * + * For example: + * test-tool trace2 400ancestry normal out.normal test-tool trace2 001return 0 + * + * The child process inherits a controlled trace2 environment where only + * the specified target is directed to . The parent's trace2 + * environment variables are cleared in the child so that only the child's + * events are captured. + * + * This is used by t0213-trace2-ancestry.sh to test cmd_ancestry events. + * The child process will see "test-tool" as its immediate parent in the + * process ancestry, giving us a predictable value to verify. + */ +static int ut_400ancestry(int argc, const char **argv) +{ + struct child_process cmd = CHILD_PROCESS_INIT; + const char *target; + const char *outfile; + int result; + + if (argc < 3) + die("expect "); + + target = argv[0]; + outfile = argv[1]; + argv += 2; + argc -= 2; + + /* Clear all trace2 environment variables in the child. */ + strvec_push(&cmd.env, "GIT_TRACE2="); + strvec_push(&cmd.env, "GIT_TRACE2_PERF="); + strvec_push(&cmd.env, "GIT_TRACE2_EVENT="); + strvec_push(&cmd.env, "GIT_TRACE2_BRIEF=1"); + + /* Set only the requested target. */ + if (!strcmp(target, "normal")) + strvec_pushf(&cmd.env, "GIT_TRACE2=%s", outfile); + else if (!strcmp(target, "perf")) + strvec_pushf(&cmd.env, "GIT_TRACE2_PERF=%s", outfile); + else if (!strcmp(target, "event")) + strvec_pushf(&cmd.env, "GIT_TRACE2_EVENT=%s", outfile); + else + die("invalid target '%s', expected: normal, perf, event", + target); + + strvec_pushv(&cmd.args, argv); + result = run_command(&cmd); + exit(result); +} + /* * Usage: * test-tool trace2 @@ -497,6 +554,8 @@ static struct unit_test ut_table[] = { { ut_301redact_child_start, "301redact_child_start", "" }, { ut_302redact_exec, "302redact_exec", " " }, { ut_303redact_def_param, "303redact_def_param", " " }, + + { ut_400ancestry, "400ancestry", " []" }, }; /* clang-format on */ From 3c8c638df66d6a77d280a04321aeb18b2ad61338 Mon Sep 17 00:00:00 2001 From: Matthew John Cheetham Date: Fri, 13 Feb 2026 19:55:00 +0000 Subject: [PATCH 13/24] t0213: add trace2 cmd_ancestry tests Add a new test script t0213-trace2-ancestry.sh that verifies cmd_ancestry events across all three trace2 output formats (normal, perf, and event). The tests use the "400ancestry" test helper to spawn child processes with controlled trace2 environments. Git alias resolution (which spawns a child git process) creates a predictable multi-level process tree. Filter functions extract cmd_ancestry events from each format, truncating the ancestor list at the outermost "test-tool" so that only the controlled portion of the tree is verified, regardless of the test runner environment. A runtime prerequisite (TRACE2_ANCESTRY) is used to detect whether the platform has a real procinfo implementation; platforms with only the stub are skipped. We must pay attention to an extra ancestor on Windows (MINGW) when running without the bin-wrappers (such as we do in CI). In this situation we see an extra "sh.exe" ancestor after "test-tool.exe". Also update the comment in t0210-trace2-normal.sh to reflect that ancestry testing now has its own dedicated test script. Signed-off-by: Matthew John Cheetham Signed-off-by: Junio C Hamano --- t/meson.build | 1 + t/t0210-trace2-normal.sh | 5 +- t/t0213-trace2-ancestry.sh | 180 +++++++++++++++++++++++++++++++++++++ 3 files changed, 184 insertions(+), 2 deletions(-) create mode 100755 t/t0213-trace2-ancestry.sh diff --git a/t/meson.build b/t/meson.build index a5531df415ffe2..551c3036c0d30b 100644 --- a/t/meson.build +++ b/t/meson.build @@ -131,6 +131,7 @@ integration_tests = [ 't0210-trace2-normal.sh', 't0211-trace2-perf.sh', 't0212-trace2-event.sh', + 't0213-trace2-ancestry.sh', 't0300-credentials.sh', 't0301-credential-cache.sh', 't0302-credential-store.sh', diff --git a/t/t0210-trace2-normal.sh b/t/t0210-trace2-normal.sh index 96c68f65df209f..7e1e7af862b430 100755 --- a/t/t0210-trace2-normal.sh +++ b/t/t0210-trace2-normal.sh @@ -74,8 +74,9 @@ scrub_normal () { # This line is only emitted when RUNTIME_PREFIX is defined, # so just omit it for testing purposes. # - # 4. 'cmd_ancestry' is not implemented everywhere, so for portability's - # sake, skip it when parsing normal. + # 4. 'cmd_ancestry' output depends on how the test is run and + # is not relevant to the features we are testing here. + # Ancestry tests are covered in t0213-trace2-ancestry.sh instead. sed \ -e 's/elapsed:[0-9]*\.[0-9][0-9]*\([eE][-+]\{0,1\}[0-9][0-9]*\)\{0,1\}/elapsed:_TIME_/g' \ -e "s/^start '[^']*' \(.*\)/start _EXE_ \1/" \ diff --git a/t/t0213-trace2-ancestry.sh b/t/t0213-trace2-ancestry.sh new file mode 100755 index 00000000000000..a2b9536da83152 --- /dev/null +++ b/t/t0213-trace2-ancestry.sh @@ -0,0 +1,180 @@ +#!/bin/sh + +test_description='test trace2 cmd_ancestry event' + +. ./test-lib.sh + +# Turn off any inherited trace2 settings for this test. +sane_unset GIT_TRACE2 GIT_TRACE2_PERF GIT_TRACE2_EVENT +sane_unset GIT_TRACE2_BRIEF +sane_unset GIT_TRACE2_CONFIG_PARAMS + +# Add t/helper directory to PATH so that we can use a relative +# path to run nested instances of test-tool.exe (see 004child). +# This helps with HEREDOC comparisons later. +TTDIR="$GIT_BUILD_DIR/t/helper/" && export TTDIR +PATH="$TTDIR:$PATH" && export PATH + +# The 400ancestry helper spawns a child process so that the child +# sees "test-tool" in its process ancestry. We capture only the +# child's trace2 output to a file. +# +# The tests use git commands that spawn child git processes (e.g., +# alias resolution) to create a controlled multi-level process tree. +# Because cmd_ancestry walks the real process tree, processes will +# also report ancestors above "test-tool" that depend on the test +# runner environment (e.g., bash, make, tmux). The filter functions +# below truncate the ancestry at "test-tool", discarding anything +# above it, so only the controlled portion is verified. +# +# On platforms without a real procinfo implementation (the stub), +# no cmd_ancestry event is emitted. We detect this at runtime and +# skip the format-specific tests accordingly. + +# Determine if cmd_ancestry is supported on this platform. +test_expect_success 'detect cmd_ancestry support' ' + test_when_finished "rm -f trace.detect" && + GIT_TRACE2_BRIEF=1 GIT_TRACE2="$(pwd)/trace.detect" \ + test-tool trace2 001return 0 && + if grep -q "^cmd_ancestry" trace.detect + then + test_set_prereq TRACE2_ANCESTRY + fi +' + +# Filter functions for each trace2 target format. +# +# Each extracts cmd_ancestry events, strips format-specific syntax, +# and truncates the ancestor list at the outermost "test-tool" +# (or "test-tool.exe" on Windows), discarding any higher-level +# (uncontrolled) ancestors. +# +# Output is a space-separated list of ancestor names, one line per +# cmd_ancestry event, with the immediate parent listed first: +# +# test-tool (or: test-tool.exe) +# git test-tool (or: git.exe test-tool.exe) +# git test-tool test-tool (or: git.exe test-tool.exe test-tool.exe) + +if test_have_prereq MINGW +then + TT=test-tool$X +else + TT=test-tool +fi + +filter_ancestry_normal () { + sed -n '/^cmd_ancestry/{ + s/^cmd_ancestry // + s/ <- / /g + s/\(.*'"$TT"'\) .*/\1/ + p + }' +} + +filter_ancestry_perf () { + sed -n '/cmd_ancestry/{ + s/.*ancestry:\[// + s/\]// + s/\(.*'"$TT"'\) .*/\1/ + p + }' +} + +filter_ancestry_event () { + sed -n '/"cmd_ancestry"/{ + s/.*"ancestry":\[// + s/\].*// + s/"//g + s/,/ /g + s/\(.*'"$TT"'\) .*/\1/ + p + }' +} + +# On Windows (MINGW) when running with the bin-wrappers, we also see "sh.exe" in +# the ancestry. We must therefore account for this expected ancestry element in +# the expected output of the tests. +if test_have_prereq MINGW && test -z "$no_bin_wrappers"; then + SH_TT="sh$X $TT" +else + SH_TT="$TT" +fi + +# Git alias resolution spawns the target command as a child process. +# Using "git -c alias.xyz=version xyz" creates a two-level chain: +# +# test-tool (400ancestry) +# -> git (resolves alias xyz -> version) +# -> git (version) +# +# Both git processes are instrumented and emit cmd_ancestry. After +# filtering out ancestors above test-tool, we get: +# +# test-tool (from git alias resolver) +# git test-tool (from git version) + +test_expect_success TRACE2_ANCESTRY 'normal: git alias chain, 2 levels' ' + test_when_finished "rm -f trace.normal actual expect" && + test-tool trace2 400ancestry normal "$(pwd)/trace.normal" \ + git -c alias.xyz=version xyz && + filter_ancestry_normal actual && + cat >expect <<-EOF && + $SH_TT + git$X $SH_TT + EOF + test_cmp expect actual +' + +test_expect_success TRACE2_ANCESTRY 'perf: git alias chain, 2 levels' ' + test_when_finished "rm -f trace.perf actual expect" && + test-tool trace2 400ancestry perf "$(pwd)/trace.perf" \ + git -c alias.xyz=version xyz && + filter_ancestry_perf actual && + cat >expect <<-EOF && + $SH_TT + git$X $SH_TT + EOF + test_cmp expect actual +' + +test_expect_success TRACE2_ANCESTRY 'event: git alias chain, 2 levels' ' + test_when_finished "rm -f trace.event actual expect" && + test-tool trace2 400ancestry event "$(pwd)/trace.event" \ + git -c alias.xyz=version xyz && + filter_ancestry_event actual && + cat >expect <<-EOF && + $SH_TT + git$X $SH_TT + EOF + test_cmp expect actual +' + +# Use 004child to add a test-tool layer, creating a three-level chain: +# +# test-tool (400ancestry) +# -> test-tool (004child) +# -> git (resolves alias xyz -> version) +# -> git (version) +# +# Three instrumented processes emit cmd_ancestry. After filtering: +# +# test-tool (from test-tool 004child) +# test-tool test-tool (from git alias resolver) +# git test-tool test-tool (from git version) + +test_expect_success TRACE2_ANCESTRY 'normal: deeper chain, 3 levels' ' + test_when_finished "rm -f trace.normal actual expect" && + test-tool trace2 400ancestry normal "$(pwd)/trace.normal" \ + test-tool trace2 004child \ + git -c alias.xyz=version xyz && + filter_ancestry_normal actual && + cat >expect <<-EOF && + $TT + $SH_TT $TT + git$X $SH_TT $TT + EOF + test_cmp expect actual +' + +test_done From aa94ba7d80c3b917a507f6975bd6400436fcd9e6 Mon Sep 17 00:00:00 2001 From: Junio C Hamano Date: Thu, 12 Feb 2026 13:22:56 -0800 Subject: [PATCH 14/24] CodingGuidelines: document NEEDSWORK comments We often say things like /* NEEDSWORK: further _do_ _this_ */ in comments, but it is a short-hand to say "We might later want to do this. We might not. We do not have to decide it right now at this moment in the commit this comment was added. If somebody is inclined to work in this area further, the first thing they need to do is to figure out if it truly makes sense to do so, before blindly doing it." This seems to have never been documented. Do so now. Signed-off-by: Junio C Hamano --- Documentation/CodingGuidelines | 10 ++++++++++ 1 file changed, 10 insertions(+) diff --git a/Documentation/CodingGuidelines b/Documentation/CodingGuidelines index df72fe01772a18..d071f8e69f5259 100644 --- a/Documentation/CodingGuidelines +++ b/Documentation/CodingGuidelines @@ -33,6 +33,16 @@ Git in general, a few rough rules are: achieve and why the changes were necessary (more on this in the accompanying SubmittingPatches document). + - A label "NEEDSWORK:" followed by a description of the things to + be done is a way to leave in-code comments to document design + decisions yet to be made. 80% of the work to resolve a NEEDSWORK + comment is to decide if it still makes sense to do so, since the + situation around the codebase may have changed since the comment + was written. It can be a very valid change to remove an existing + NEEDSWORK comment without doing anything else, with the commit log + message describing a good argument why it does not make sense to do + the thing the NEEDSWORK comment mentioned. + Make your code readable and sensible, and don't try to be clever. As for more concrete guidelines, just imitate the existing code From a7d430d5b50bb37adae783ec29539cb76cbcc406 Mon Sep 17 00:00:00 2001 From: Christian Couder Date: Mon, 16 Feb 2026 14:23:07 +0100 Subject: [PATCH 15/24] promisor-remote: refactor initialising field lists In "promisor-remote.c", the fields_sent() and fields_checked() functions serve similar purposes and contain a small amount of duplicated code. As we are going to add a similar function in a following commit, let's refactor this common code into a new initialize_fields_list() function. Signed-off-by: Christian Couder Signed-off-by: Junio C Hamano --- promisor-remote.c | 28 ++++++++++++++-------------- 1 file changed, 14 insertions(+), 14 deletions(-) diff --git a/promisor-remote.c b/promisor-remote.c index 77ebf537e2b3ee..5d8151cedb59ff 100644 --- a/promisor-remote.c +++ b/promisor-remote.c @@ -375,18 +375,24 @@ static char *fields_from_config(struct string_list *fields_list, const char *con return fields; } +static struct string_list *initialize_fields_list(struct string_list *fields_list, int *initialized, + const char *config_key) +{ + if (!*initialized) { + fields_list->cmp = strcasecmp; + fields_from_config(fields_list, config_key); + *initialized = 1; + } + + return fields_list; +} + static struct string_list *fields_sent(void) { static struct string_list fields_list = STRING_LIST_INIT_NODUP; static int initialized; - if (!initialized) { - fields_list.cmp = strcasecmp; - fields_from_config(&fields_list, "promisor.sendFields"); - initialized = 1; - } - - return &fields_list; + return initialize_fields_list(&fields_list, &initialized, "promisor.sendFields"); } static struct string_list *fields_checked(void) @@ -394,13 +400,7 @@ static struct string_list *fields_checked(void) static struct string_list fields_list = STRING_LIST_INIT_NODUP; static int initialized; - if (!initialized) { - fields_list.cmp = strcasecmp; - fields_from_config(&fields_list, "promisor.checkFields"); - initialized = 1; - } - - return &fields_list; + return initialize_fields_list(&fields_list, &initialized, "promisor.checkFields"); } /* From 3e20258d11f188189c18bb6c4f2b2eee47abf03d Mon Sep 17 00:00:00 2001 From: Christian Couder Date: Mon, 16 Feb 2026 14:23:08 +0100 Subject: [PATCH 16/24] promisor-remote: allow a client to store fields A previous commit allowed a server to pass additional fields through the "promisor-remote" protocol capability after the "name" and "url" fields, specifically the "partialCloneFilter" and "token" fields. Another previous commit, c213820c51 (promisor-remote: allow a client to check fields, 2025-09-08), has made it possible for a client to decide if it accepts a promisor remote advertised by a server based on these additional fields. Often though, it would be interesting for the client to just store in its configuration files these additional fields passed by the server, so that it can use them when needed. For example if a token is necessary to access a promisor remote, that token could be updated frequently only on the server side and then passed to all the clients through the "promisor-remote" capability, avoiding the need to update it on all the clients manually. Storing the token on the client side makes sure that the token is available when the client needs to access the promisor remotes for a lazy fetch. To allow this, let's introduce a new "promisor.storeFields" configuration variable. Note that for a partial clone filter, it's less interesting to have it stored on the client. This is because a filter should be used right away and we already pass a `--filter=` option to `git clone` when starting a partial clone. Storing the filter could perhaps still be interesting for information purposes. Like "promisor.checkFields" and "promisor.sendFields", the new configuration variable should contain a comma or space separated list of field names. Only the "partialCloneFilter" and "token" field names are supported for now. When a server advertises a promisor remote, for example "foo", along with for example "token=XXXXX" to a client, and on the client side "promisor.storeFields" contains "token", then the client will store XXXXX for the "remote.foo.token" variable in its configuration file and reload its configuration so it can immediately use this new configuration variable. A message is emitted on stderr to warn users when the config is changed. Note that even if "promisor.acceptFromServer" is set to "all", a promisor remote has to be already configured on the client side for some of its config to be changed. In any case no new remote is configured and no new URL is stored. Signed-off-by: Christian Couder Signed-off-by: Junio C Hamano --- Documentation/config/promisor.adoc | 33 ++++++ Documentation/gitprotocol-v2.adoc | 12 ++- promisor-remote.c | 148 +++++++++++++++++++++++++- t/t5710-promisor-remote-capability.sh | 63 +++++++++++ 4 files changed, 250 insertions(+), 6 deletions(-) diff --git a/Documentation/config/promisor.adoc b/Documentation/config/promisor.adoc index 93e5e0d9b55eb4..b0fa43b8393a53 100644 --- a/Documentation/config/promisor.adoc +++ b/Documentation/config/promisor.adoc @@ -89,3 +89,36 @@ variable. The fields are checked only if the `promisor.acceptFromServer` config variable is not set to "None". If set to "None", this config variable has no effect. See linkgit:gitprotocol-v2[5]. + +promisor.storeFields:: + A comma or space separated list of additional remote related + field names. If a client accepts an advertised remote, the + client will store the values associated with these field names + taken from the remote advertisement into its configuration, + and then reload its remote configuration. Currently, + "partialCloneFilter" and "token" are the only supported field + names. ++ +For example if a server advertises "partialCloneFilter=blob:limit=20k" +for remote "foo", and that remote is accepted, then "blob:limit=20k" +will be stored for the "remote.foo.partialCloneFilter" configuration +variable. ++ +If the new field value from an advertised remote is the same as the +existing field value for that remote on the client side, then no +change is made to the client configuration though. ++ +When a new value is stored, a message is printed to standard error to +let users know about this. ++ +Note that for security reasons, if the remote is not already +configured on the client side, nothing will be stored for that +remote. In any case, no new remote will be created and no URL will be +stored. ++ +Before storing a partial clone filter, it's parsed to check it's +valid. If it's not, a warning is emitted and it's not stored. ++ +Before storing a token, a check is performed to ensure it contains no +control character. If the check fails, a warning is emitted and it's +not stored. diff --git a/Documentation/gitprotocol-v2.adoc b/Documentation/gitprotocol-v2.adoc index c7db103299ae54..d93dd279ead7b2 100644 --- a/Documentation/gitprotocol-v2.adoc +++ b/Documentation/gitprotocol-v2.adoc @@ -826,9 +826,10 @@ are case-sensitive and MUST be transmitted exactly as specified above. Clients MUST ignore fields they don't recognize to allow for future protocol extensions. -For now, the client can only use information transmitted through these -fields to decide if it accepts the advertised promisor remote. In the -future that information might be used for other purposes though. +The client can use information transmitted through these fields to +decide if it accepts the advertised promisor remote. Also, the client +can be configured to store the values of these fields (see +"promisor.storeFields" in linkgit:git-config[1]). Field values MUST be urlencoded. @@ -856,8 +857,9 @@ the server advertised, the client shouldn't advertise the On the server side, the "promisor.advertise" and "promisor.sendFields" configuration options can be used to control what it advertises. On the client side, the "promisor.acceptFromServer" configuration option -can be used to control what it accepts. See the documentation of these -configuration options for more information. +can be used to control what it accepts, and the "promisor.storeFields" +option, to control what it stores. See the documentation of these +configuration options in linkgit:git-config[1] for more information. Note that in the future it would be nice if the "promisor-remote" protocol capability could be used by the server, when responding to diff --git a/promisor-remote.c b/promisor-remote.c index 5d8151cedb59ff..59997dd4c78812 100644 --- a/promisor-remote.c +++ b/promisor-remote.c @@ -403,6 +403,14 @@ static struct string_list *fields_checked(void) return initialize_fields_list(&fields_list, &initialized, "promisor.checkFields"); } +static struct string_list *fields_stored(void) +{ + static struct string_list fields_list = STRING_LIST_INIT_NODUP; + static int initialized; + + return initialize_fields_list(&fields_list, &initialized, "promisor.storeFields"); +} + /* * Struct for promisor remotes involved in the "promisor-remote" * protocol capability. @@ -692,6 +700,132 @@ static struct promisor_info *parse_one_advertised_remote(const char *remote_info return info; } +static bool store_one_field(struct repository *repo, const char *remote_name, + const char *field_name, const char *field_key, + const char *advertised, const char *current) +{ + if (advertised && (!current || strcmp(current, advertised))) { + char *key = xstrfmt("remote.%s.%s", remote_name, field_key); + + fprintf(stderr, _("Storing new %s from server for remote '%s'.\n" + " '%s' -> '%s'\n"), + field_name, remote_name, + current ? current : "", + advertised); + + repo_config_set_gently(repo, key, advertised); + free(key); + + return true; + } + + return false; +} + +/* Check that a filter is valid by parsing it */ +static bool valid_filter(const char *filter, const char *remote_name) +{ + struct list_objects_filter_options filter_opts = LIST_OBJECTS_FILTER_INIT; + struct strbuf err = STRBUF_INIT; + int res = gently_parse_list_objects_filter(&filter_opts, filter, &err); + + if (res) + warning(_("invalid filter '%s' for remote '%s' " + "will not be stored: %s"), + filter, remote_name, err.buf); + + list_objects_filter_release(&filter_opts); + strbuf_release(&err); + + return !res; +} + +/* Check that a token doesn't contain any control character */ +static bool valid_token(const char *token, const char *remote_name) +{ + const char *c = token; + + for (; *c; c++) + if (iscntrl(*c)) { + warning(_("invalid token '%s' for remote '%s' " + "will not be stored"), + token, remote_name); + return false; + } + + return true; +} + +struct store_info { + struct repository *repo; + struct string_list config_info; + bool store_filter; + bool store_token; +}; + +static struct store_info *store_info_new(struct repository *repo) +{ + struct string_list *fields_to_store = fields_stored(); + struct store_info *s = xmalloc(sizeof(*s)); + + s->repo = repo; + + string_list_init_nodup(&s->config_info); + promisor_config_info_list(repo, &s->config_info, fields_to_store); + string_list_sort(&s->config_info); + + s->store_filter = !!string_list_lookup(fields_to_store, promisor_field_filter); + s->store_token = !!string_list_lookup(fields_to_store, promisor_field_token); + + return s; +} + +static void store_info_free(struct store_info *s) +{ + if (s) { + promisor_info_list_clear(&s->config_info); + free(s); + } +} + +static bool promisor_store_advertised_fields(struct promisor_info *advertised, + struct store_info *store_info) +{ + struct promisor_info *p; + struct string_list_item *item; + const char *remote_name = advertised->name; + bool reload_config = false; + + if (!(store_info->store_filter || store_info->store_token)) + return false; + + /* + * Get existing config info for the advertised promisor + * remote. This ensures the remote is already configured on + * the client side. + */ + item = string_list_lookup(&store_info->config_info, remote_name); + + if (!item) + return false; + + p = item->util; + + if (store_info->store_filter && advertised->filter && + valid_filter(advertised->filter, remote_name)) + reload_config |= store_one_field(store_info->repo, remote_name, + "filter", promisor_field_filter, + advertised->filter, p->filter); + + if (store_info->store_token && advertised->token && + valid_token(advertised->token, remote_name)) + reload_config |= store_one_field(store_info->repo, remote_name, + "token", promisor_field_token, + advertised->token, p->token); + + return reload_config; +} + static void filter_promisor_remote(struct repository *repo, struct strvec *accepted, const char *info) @@ -700,7 +834,9 @@ static void filter_promisor_remote(struct repository *repo, enum accept_promisor accept = ACCEPT_NONE; struct string_list config_info = STRING_LIST_INIT_NODUP; struct string_list remote_info = STRING_LIST_INIT_DUP; + struct store_info *store_info = NULL; struct string_list_item *item; + bool reload_config = false; if (!repo_config_get_string_tmp(the_repository, "promisor.acceptfromserver", &accept_str)) { if (!*accept_str || !strcasecmp("None", accept_str)) @@ -736,14 +872,24 @@ static void filter_promisor_remote(struct repository *repo, string_list_sort(&config_info); } - if (should_accept_remote(accept, advertised, &config_info)) + if (should_accept_remote(accept, advertised, &config_info)) { + if (!store_info) + store_info = store_info_new(repo); + if (promisor_store_advertised_fields(advertised, store_info)) + reload_config = true; + strvec_push(accepted, advertised->name); + } promisor_info_free(advertised); } promisor_info_list_clear(&config_info); string_list_clear(&remote_info, 0); + store_info_free(store_info); + + if (reload_config) + repo_promisor_remote_reinit(repo); } char *promisor_remote_reply(const char *info) diff --git a/t/t5710-promisor-remote-capability.sh b/t/t5710-promisor-remote-capability.sh index 023735d6a84ea8..6ef6431bd7dd25 100755 --- a/t/t5710-promisor-remote-capability.sh +++ b/t/t5710-promisor-remote-capability.sh @@ -360,6 +360,69 @@ test_expect_success "clone with promisor.checkFields" ' check_missing_objects server 1 "$oid" ' +test_expect_success "clone with promisor.storeFields=partialCloneFilter" ' + git -C server config promisor.advertise true && + test_when_finished "rm -rf client" && + + git -C server remote add otherLop "https://invalid.invalid" && + git -C server config remote.otherLop.token "fooBar" && + git -C server config remote.otherLop.stuff "baz" && + git -C server config remote.otherLop.partialCloneFilter "blob:limit=10k" && + test_when_finished "git -C server remote remove otherLop" && + + git -C server config remote.lop.token "fooXXX" && + git -C server config remote.lop.partialCloneFilter "blob:limit=8k" && + + test_config -C server promisor.sendFields "partialCloneFilter, token" && + test_when_finished "rm trace" && + + # Clone from server to create a client + GIT_TRACE_PACKET="$(pwd)/trace" GIT_NO_LAZY_FETCH=0 git clone \ + -c remote.lop.promisor=true \ + -c remote.lop.fetch="+refs/heads/*:refs/remotes/lop/*" \ + -c remote.lop.url="file://$(pwd)/lop" \ + -c remote.lop.token="fooYYY" \ + -c remote.lop.partialCloneFilter="blob:none" \ + -c promisor.acceptfromserver=All \ + -c promisor.storeFields=partialcloneFilter \ + --no-local --filter="blob:limit=5k" server client 2>err && + + # Check that the filter from the server is stored + echo "blob:limit=8k" >expected && + git -C client config remote.lop.partialCloneFilter >actual && + test_cmp expected actual && + + # Check that user is notified when the filter is stored + test_grep "Storing new filter from server for remote '\''lop'\''" err && + test_grep "'\''blob:none'\'' -> '\''blob:limit=8k'\''" err && + + # Check that the token from the server is NOT stored + echo "fooYYY" >expected && + git -C client config remote.lop.token >actual && + test_cmp expected actual && + test_grep ! "Storing new token from server" err && + + # Check that the filter for an unknown remote is NOT stored + test_must_fail git -C client config remote.otherLop.partialCloneFilter >actual && + + # Check that the largest object is still missing on the server + check_missing_objects server 1 "$oid" && + + # Change the configuration on the server and fetch from the client + git -C server config remote.lop.partialCloneFilter "blob:limit=7k" && + GIT_NO_LAZY_FETCH=0 git -C client fetch \ + --filter="blob:limit=5k" ../server 2>err && + + # Check that the fetch updated the configuration on the client + echo "blob:limit=7k" >expected && + git -C client config remote.lop.partialCloneFilter >actual && + test_cmp expected actual && + + # Check that user is notified when the new filter is stored + test_grep "Storing new filter from server for remote '\''lop'\''" err && + test_grep "'\''blob:limit=8k'\'' -> '\''blob:limit=7k'\''" err +' + test_expect_success "clone with promisor.advertise set to 'true' but don't delete the client" ' git -C server config promisor.advertise true && From fe5335974323da5e829676735cc32d89422d58ba Mon Sep 17 00:00:00 2001 From: Christian Couder Date: Mon, 16 Feb 2026 14:23:09 +0100 Subject: [PATCH 17/24] clone: make filter_options local to cmd_clone() The `struct list_objects_filter_options filter_options` variable used in "builtin/clone.c" to store the parsed filters specified by `--filter=` is currently a static variable global to the file. As we are going to use it more in a following commit, it could become a bit less easy to understand how it's managed. To avoid that, let's make it clear that it's owned by cmd_clone() by moving its definition into that function and making it non-static. The only additional change to make this work is to pass it as an argument to checkout(). So it's a small quite cheap cleanup anyway. Signed-off-by: Christian Couder Signed-off-by: Junio C Hamano --- builtin/clone.c | 16 +++++++++++----- 1 file changed, 11 insertions(+), 5 deletions(-) diff --git a/builtin/clone.c b/builtin/clone.c index b14a39a687c840..bb27472020dc46 100644 --- a/builtin/clone.c +++ b/builtin/clone.c @@ -77,7 +77,6 @@ static struct string_list option_required_reference = STRING_LIST_INIT_NODUP; static struct string_list option_optional_reference = STRING_LIST_INIT_NODUP; static int max_jobs = -1; static struct string_list option_recurse_submodules = STRING_LIST_INIT_NODUP; -static struct list_objects_filter_options filter_options = LIST_OBJECTS_FILTER_INIT; static int config_filter_submodules = -1; /* unspecified */ static int option_remote_submodules; @@ -634,7 +633,9 @@ static int git_sparse_checkout_init(const char *repo) return result; } -static int checkout(int submodule_progress, int filter_submodules, +static int checkout(int submodule_progress, + struct list_objects_filter_options *filter_options, + int filter_submodules, enum ref_storage_format ref_storage_format) { struct object_id oid; @@ -723,9 +724,9 @@ static int checkout(int submodule_progress, int filter_submodules, strvec_pushf(&cmd.args, "--ref-format=%s", ref_storage_format_to_name(ref_storage_format)); - if (filter_submodules && filter_options.choice) + if (filter_submodules && filter_options->choice) strvec_pushf(&cmd.args, "--filter=%s", - expand_list_objects_filter_spec(&filter_options)); + expand_list_objects_filter_spec(filter_options)); if (option_single_branch >= 0) strvec_push(&cmd.args, option_single_branch ? @@ -903,6 +904,7 @@ int cmd_clone(int argc, enum transport_family family = TRANSPORT_FAMILY_ALL; struct string_list option_config = STRING_LIST_INIT_DUP; int option_dissociate = 0; + struct list_objects_filter_options filter_options = LIST_OBJECTS_FILTER_INIT; int option_filter_submodules = -1; /* unspecified */ struct string_list server_options = STRING_LIST_INIT_NODUP; const char *bundle_uri = NULL; @@ -1624,9 +1626,13 @@ int cmd_clone(int argc, return 1; junk_mode = JUNK_LEAVE_REPO; - err = checkout(submodule_progress, filter_submodules, + err = checkout(submodule_progress, + &filter_options, + filter_submodules, ref_storage_format); + list_objects_filter_release(&filter_options); + string_list_clear(&option_not, 0); string_list_clear(&option_config, 0); string_list_clear(&server_options, 0); From f7565410e1f803873c097258109ff0258ad913fc Mon Sep 17 00:00:00 2001 From: Christian Couder Date: Mon, 16 Feb 2026 14:23:10 +0100 Subject: [PATCH 18/24] fetch: make filter_options local to cmd_fetch() The `struct list_objects_filter_options filter_options` variable used in "builtin/fetch.c" to store the parsed filters specified by `--filter=` is currently a static variable global to the file. As we are going to use it more in a following commit, it could become a bit less easy to understand how it's managed. To avoid that, let's make it clear that it's owned by cmd_fetch() by moving its definition into that function and making it non-static. This requires passing a pointer to it through the prepare_transport(), do_fetch(), backfill_tags(), fetch_one_setup_partial(), and fetch_one() functions, but it's quite straightforward. Signed-off-by: Christian Couder Signed-off-by: Junio C Hamano --- builtin/fetch.c | 48 +++++++++++++++++++++++++++--------------------- 1 file changed, 27 insertions(+), 21 deletions(-) diff --git a/builtin/fetch.c b/builtin/fetch.c index a3bc7e9380b9b6..8fbf3557ceba3b 100644 --- a/builtin/fetch.c +++ b/builtin/fetch.c @@ -97,7 +97,6 @@ static struct strbuf default_rla = STRBUF_INIT; static struct transport *gtransport; static struct transport *gsecondary; static struct refspec refmap = REFSPEC_INIT_FETCH; -static struct list_objects_filter_options filter_options = LIST_OBJECTS_FILTER_INIT; static struct string_list server_options = STRING_LIST_INIT_DUP; static struct string_list negotiation_tip = STRING_LIST_INIT_NODUP; @@ -1562,7 +1561,8 @@ static void add_negotiation_tips(struct git_transport_options *smart_options) smart_options->negotiation_tips = oids; } -static struct transport *prepare_transport(struct remote *remote, int deepen) +static struct transport *prepare_transport(struct remote *remote, int deepen, + struct list_objects_filter_options *filter_options) { struct transport *transport; @@ -1586,9 +1586,9 @@ static struct transport *prepare_transport(struct remote *remote, int deepen) set_option(transport, TRANS_OPT_UPDATE_SHALLOW, "yes"); if (refetch) set_option(transport, TRANS_OPT_REFETCH, "yes"); - if (filter_options.choice) { + if (filter_options->choice) { const char *spec = - expand_list_objects_filter_spec(&filter_options); + expand_list_objects_filter_spec(filter_options); set_option(transport, TRANS_OPT_LIST_OBJECTS_FILTER, spec); set_option(transport, TRANS_OPT_FROM_PROMISOR, "1"); } @@ -1607,7 +1607,8 @@ static int backfill_tags(struct display_state *display_state, struct ref *ref_map, struct fetch_head *fetch_head, const struct fetch_config *config, - struct ref_update_display_info_array *display_array) + struct ref_update_display_info_array *display_array, + struct list_objects_filter_options *filter_options) { int retcode, cannot_reuse; @@ -1621,7 +1622,7 @@ static int backfill_tags(struct display_state *display_state, cannot_reuse = transport->cannot_reuse || deepen_since || deepen_not.nr; if (cannot_reuse) { - gsecondary = prepare_transport(transport->remote, 0); + gsecondary = prepare_transport(transport->remote, 0, filter_options); transport = gsecondary; } @@ -1834,7 +1835,8 @@ static int commit_ref_transaction(struct ref_transaction **transaction, static int do_fetch(struct transport *transport, struct refspec *rs, - const struct fetch_config *config) + const struct fetch_config *config, + struct list_objects_filter_options *filter_options) { struct ref_transaction *transaction = NULL; struct ref *ref_map = NULL; @@ -1997,7 +1999,7 @@ static int do_fetch(struct transport *transport, * the transaction and don't commit anything. */ if (backfill_tags(&display_state, transport, transaction, tags_ref_map, - &fetch_head, config, &display_array)) + &fetch_head, config, &display_array, filter_options)) retcode = 1; } @@ -2339,20 +2341,21 @@ static int fetch_multiple(struct string_list *list, int max_children, * Fetching from the promisor remote should use the given filter-spec * or inherit the default filter-spec from the config. */ -static inline void fetch_one_setup_partial(struct remote *remote) +static inline void fetch_one_setup_partial(struct remote *remote, + struct list_objects_filter_options *filter_options) { /* * Explicit --no-filter argument overrides everything, regardless * of any prior partial clones and fetches. */ - if (filter_options.no_filter) + if (filter_options->no_filter) return; /* * If no prior partial clone/fetch and the current fetch DID NOT * request a partial-fetch, do a normal fetch. */ - if (!repo_has_promisor_remote(the_repository) && !filter_options.choice) + if (!repo_has_promisor_remote(the_repository) && !filter_options->choice) return; /* @@ -2361,8 +2364,8 @@ static inline void fetch_one_setup_partial(struct remote *remote) * filter-spec as the default for subsequent fetches to this * remote if there is currently no default filter-spec. */ - if (filter_options.choice) { - partial_clone_register(remote->name, &filter_options); + if (filter_options->choice) { + partial_clone_register(remote->name, filter_options); return; } @@ -2371,14 +2374,15 @@ static inline void fetch_one_setup_partial(struct remote *remote) * explicitly given filter-spec or inherit the filter-spec from * the config. */ - if (!filter_options.choice) - partial_clone_get_default_filter_spec(&filter_options, remote->name); + if (!filter_options->choice) + partial_clone_get_default_filter_spec(filter_options, remote->name); return; } static int fetch_one(struct remote *remote, int argc, const char **argv, int prune_tags_ok, int use_stdin_refspecs, - const struct fetch_config *config) + const struct fetch_config *config, + struct list_objects_filter_options *filter_options) { struct refspec rs = REFSPEC_INIT_FETCH; int i; @@ -2390,7 +2394,7 @@ static int fetch_one(struct remote *remote, int argc, const char **argv, die(_("no remote repository specified; please specify either a URL or a\n" "remote name from which new revisions should be fetched")); - gtransport = prepare_transport(remote, 1); + gtransport = prepare_transport(remote, 1, filter_options); if (prune < 0) { /* no command line request */ @@ -2445,7 +2449,7 @@ static int fetch_one(struct remote *remote, int argc, const char **argv, sigchain_push_common(unlock_pack_on_signal); atexit(unlock_pack_atexit); sigchain_push(SIGPIPE, SIG_IGN); - exit_code = do_fetch(gtransport, &rs, config); + exit_code = do_fetch(gtransport, &rs, config, filter_options); sigchain_pop(SIGPIPE); refspec_clear(&rs); transport_disconnect(gtransport); @@ -2470,6 +2474,7 @@ int cmd_fetch(int argc, const char *submodule_prefix = ""; const char *bundle_uri; struct string_list list = STRING_LIST_INIT_DUP; + struct list_objects_filter_options filter_options = LIST_OBJECTS_FILTER_INIT; struct remote *remote = NULL; int all = -1, multiple = 0; int result = 0; @@ -2735,7 +2740,7 @@ int cmd_fetch(int argc, trace2_region_enter("fetch", "negotiate-only", the_repository); if (!remote) die(_("must supply remote when using --negotiate-only")); - gtransport = prepare_transport(remote, 1); + gtransport = prepare_transport(remote, 1, &filter_options); if (gtransport->smart_options) { gtransport->smart_options->acked_commits = &acked_commits; } else { @@ -2757,12 +2762,12 @@ int cmd_fetch(int argc, } else if (remote) { if (filter_options.choice || repo_has_promisor_remote(the_repository)) { trace2_region_enter("fetch", "setup-partial", the_repository); - fetch_one_setup_partial(remote); + fetch_one_setup_partial(remote, &filter_options); trace2_region_leave("fetch", "setup-partial", the_repository); } trace2_region_enter("fetch", "fetch-one", the_repository); result = fetch_one(remote, argc, argv, prune_tags_ok, stdin_refspecs, - &config); + &config, &filter_options); trace2_region_leave("fetch", "fetch-one", the_repository); } else { int max_children = max_jobs; @@ -2868,5 +2873,6 @@ int cmd_fetch(int argc, cleanup: string_list_clear(&list, 0); + list_objects_filter_release(&filter_options); return result; } From 190438b62fa5624077c2ee48065f47e1a56d01a7 Mon Sep 17 00:00:00 2001 From: Christian Couder Date: Mon, 16 Feb 2026 14:23:11 +0100 Subject: [PATCH 19/24] doc: fetch: document `--filter=` option The `--filter=` option is documented in most commands that support it except `git fetch`. Let's fix that and document this option. To ensure consistency across commands, let's reuse the exact description currently found in `git clone`. Signed-off-by: Christian Couder Signed-off-by: Junio C Hamano --- Documentation/fetch-options.adoc | 10 ++++++++++ 1 file changed, 10 insertions(+) diff --git a/Documentation/fetch-options.adoc b/Documentation/fetch-options.adoc index fcba46ee9e5d61..1ef9807d00b55a 100644 --- a/Documentation/fetch-options.adoc +++ b/Documentation/fetch-options.adoc @@ -88,6 +88,16 @@ linkgit:git-config[1]. This is incompatible with `--recurse-submodules=(yes|on-demand)` and takes precedence over the `fetch.output` config option. +`--filter=`:: + Use the partial clone feature and request that the server sends + a subset of reachable objects according to a given object filter. + When using `--filter`, the supplied __ is used for + the partial fetch. For example, `--filter=blob:none` will filter + out all blobs (file contents) until needed by Git. Also, + `--filter=blob:limit=` will filter out all blobs of size + at least __. For more details on filter specifications, see + the `--filter` option in linkgit:git-rev-list[1]. + ifndef::git-pull[] `--write-fetch-head`:: `--no-write-fetch-head`:: From cd1a89838ad6753830afeed4ad8319e567b6e43d Mon Sep 17 00:00:00 2001 From: Christian Couder Date: Mon, 16 Feb 2026 14:23:12 +0100 Subject: [PATCH 20/24] list-objects-filter-options: support 'auto' mode for --filter In a following commit, we are going to allow passing "auto" as a to the `--filter=` option, but only for some commands. Other commands that support the `--filter=` option should still die() when 'auto' is passed. Let's set up the "list-objects-filter-options.{c,h}" infrastructure to support that: - Add a new `unsigned int allow_auto_filter : 1;` flag to `struct list_objects_filter_options` which specifies if "auto" is accepted or not by the current command. - Change gently_parse_list_objects_filter() to parse "auto" if it's accepted. - Make sure we die() if "auto" is combined with another filter. - Update list_objects_filter_release() to preserve the allow_auto_filter flag, as this function is often called (via opt_parse_list_objects_filter) to reset the struct before parsing a new value. Let's also update `list-objects-filter.c` to recognize the new `LOFC_AUTO` choice. Since "auto" must be resolved to a concrete filter before filtering actually begins, initializing a filter with `LOFC_AUTO` is invalid and will trigger a BUG(). Note that ideally combining "auto" with "auto" could be allowed, but in practice, it's probably not worth the added code complexity. And if we really want it, nothing prevents us to allow it in future work. If we ever want to give a meaning to combining "auto" with a different filter too, nothing prevents us to do that in future work either. Also note that the new `allow_auto_filter` flag depends on the command, not user choices, so it should be reset to the command default when `struct list_objects_filter_options` instances are reset. While at it, let's add a new "u-list-objects-filter-options.c" file for `struct list_objects_filter_options` related unit tests. For now it only tests gently_parse_list_objects_filter() though. Signed-off-by: Christian Couder Signed-off-by: Junio C Hamano --- Makefile | 1 + list-objects-filter-options.c | 39 ++++++++++++-- list-objects-filter-options.h | 6 +++ list-objects-filter.c | 8 +++ t/meson.build | 1 + t/unit-tests/u-list-objects-filter-options.c | 53 ++++++++++++++++++++ 6 files changed, 105 insertions(+), 3 deletions(-) create mode 100644 t/unit-tests/u-list-objects-filter-options.c diff --git a/Makefile b/Makefile index 4ac44331ea1ec0..9e174dd06c2d27 100644 --- a/Makefile +++ b/Makefile @@ -1518,6 +1518,7 @@ CLAR_TEST_SUITES += u-dir CLAR_TEST_SUITES += u-example-decorate CLAR_TEST_SUITES += u-hash CLAR_TEST_SUITES += u-hashmap +CLAR_TEST_SUITES += u-list-objects-filter-options CLAR_TEST_SUITES += u-mem-pool CLAR_TEST_SUITES += u-oid-array CLAR_TEST_SUITES += u-oidmap diff --git a/list-objects-filter-options.c b/list-objects-filter-options.c index 7420bf81fe0103..7f3e7b8f505ebc 100644 --- a/list-objects-filter-options.c +++ b/list-objects-filter-options.c @@ -20,6 +20,8 @@ const char *list_object_filter_config_name(enum list_objects_filter_choice c) case LOFC_DISABLED: /* we have no name for "no filter at all" */ break; + case LOFC_AUTO: + return "auto"; case LOFC_BLOB_NONE: return "blob:none"; case LOFC_BLOB_LIMIT: @@ -52,7 +54,16 @@ int gently_parse_list_objects_filter( if (filter_options->choice) BUG("filter_options already populated"); - if (!strcmp(arg, "blob:none")) { + if (!strcmp(arg, "auto")) { + if (!filter_options->allow_auto_filter) { + strbuf_addstr(errbuf, + _("'auto' filter not supported by this command")); + return 1; + } + filter_options->choice = LOFC_AUTO; + return 0; + + } else if (!strcmp(arg, "blob:none")) { filter_options->choice = LOFC_BLOB_NONE; return 0; @@ -146,10 +157,22 @@ static int parse_combine_subfilter( decoded = url_percent_decode(subspec->buf); - result = has_reserved_character(subspec, errbuf) || - gently_parse_list_objects_filter( + result = has_reserved_character(subspec, errbuf); + if (result) + goto cleanup; + + result = gently_parse_list_objects_filter( &filter_options->sub[new_index], decoded, errbuf); + if (result) + goto cleanup; + + result = (filter_options->sub[new_index].choice == LOFC_AUTO); + if (result) { + strbuf_addstr(errbuf, _("an 'auto' filter cannot be combined")); + goto cleanup; + } +cleanup: free(decoded); return result; } @@ -263,6 +286,9 @@ void parse_list_objects_filter( } else { struct list_objects_filter_options *sub; + if (filter_options->choice == LOFC_AUTO) + die(_("an 'auto' filter is incompatible with any other filter")); + /* * Make filter_options an LOFC_COMBINE spec so we can trivially * add subspecs to it. @@ -277,6 +303,9 @@ void parse_list_objects_filter( if (gently_parse_list_objects_filter(sub, arg, &errbuf)) die("%s", errbuf.buf); + if (sub->choice == LOFC_AUTO) + die(_("an 'auto' filter is incompatible with any other filter")); + strbuf_addch(&filter_options->filter_spec, '+'); filter_spec_append_urlencode(filter_options, arg); } @@ -317,15 +346,19 @@ void list_objects_filter_release( struct list_objects_filter_options *filter_options) { size_t sub; + unsigned int allow_auto_filter; if (!filter_options) return; + + allow_auto_filter = filter_options->allow_auto_filter; strbuf_release(&filter_options->filter_spec); free(filter_options->sparse_oid_name); for (sub = 0; sub < filter_options->sub_nr; sub++) list_objects_filter_release(&filter_options->sub[sub]); free(filter_options->sub); list_objects_filter_init(filter_options); + filter_options->allow_auto_filter = allow_auto_filter; } void partial_clone_register( diff --git a/list-objects-filter-options.h b/list-objects-filter-options.h index 7b2108b98662c9..77d7bbc846e1c1 100644 --- a/list-objects-filter-options.h +++ b/list-objects-filter-options.h @@ -18,6 +18,7 @@ enum list_objects_filter_choice { LOFC_SPARSE_OID, LOFC_OBJECT_TYPE, LOFC_COMBINE, + LOFC_AUTO, LOFC__COUNT /* must be last */ }; @@ -50,6 +51,11 @@ struct list_objects_filter_options { */ unsigned int no_filter : 1; + /* + * Is LOFC_AUTO a valid option? + */ + unsigned int allow_auto_filter : 1; + /* * BEGIN choice-specific parsed values from within the filter-spec. Only * some values will be defined for any given choice. diff --git a/list-objects-filter.c b/list-objects-filter.c index acd65ebb734523..78316e7f90c8d4 100644 --- a/list-objects-filter.c +++ b/list-objects-filter.c @@ -745,6 +745,13 @@ static void filter_combine__init( filter->finalize_omits_fn = filter_combine__finalize_omits; } +static void filter_auto__init( + struct list_objects_filter_options *filter_options UNUSED, + struct filter *filter UNUSED) +{ + BUG("LOFC_AUTO should have been resolved before initializing the filter"); +} + typedef void (*filter_init_fn)( struct list_objects_filter_options *filter_options, struct filter *filter); @@ -760,6 +767,7 @@ static filter_init_fn s_filters[] = { filter_sparse_oid__init, filter_object_type__init, filter_combine__init, + filter_auto__init, }; struct filter *list_objects_filter__init( diff --git a/t/meson.build b/t/meson.build index a04a7a86cfe4af..bec4c72327530a 100644 --- a/t/meson.build +++ b/t/meson.build @@ -4,6 +4,7 @@ clar_test_suites = [ 'unit-tests/u-example-decorate.c', 'unit-tests/u-hash.c', 'unit-tests/u-hashmap.c', + 'unit-tests/u-list-objects-filter-options.c', 'unit-tests/u-mem-pool.c', 'unit-tests/u-oid-array.c', 'unit-tests/u-oidmap.c', diff --git a/t/unit-tests/u-list-objects-filter-options.c b/t/unit-tests/u-list-objects-filter-options.c new file mode 100644 index 00000000000000..f7d73701b5c69e --- /dev/null +++ b/t/unit-tests/u-list-objects-filter-options.c @@ -0,0 +1,53 @@ +#include "unit-test.h" +#include "list-objects-filter-options.h" +#include "strbuf.h" + +/* Helper to test gently_parse_list_objects_filter() */ +static void check_gentle_parse(const char *filter_spec, + int expect_success, + int allow_auto, + enum list_objects_filter_choice expected_choice) +{ + struct list_objects_filter_options filter_options = LIST_OBJECTS_FILTER_INIT; + struct strbuf errbuf = STRBUF_INIT; + int ret; + + filter_options.allow_auto_filter = allow_auto; + + ret = gently_parse_list_objects_filter(&filter_options, filter_spec, &errbuf); + + if (expect_success) { + cl_assert_equal_i(ret, 0); + cl_assert_equal_i(expected_choice, filter_options.choice); + cl_assert_equal_i(errbuf.len, 0); + } else { + cl_assert(ret != 0); + cl_assert(errbuf.len > 0); + } + + strbuf_release(&errbuf); + list_objects_filter_release(&filter_options); +} + +void test_list_objects_filter_options__regular_filters(void) +{ + check_gentle_parse("blob:none", 1, 0, LOFC_BLOB_NONE); + check_gentle_parse("blob:none", 1, 1, LOFC_BLOB_NONE); + check_gentle_parse("blob:limit=5k", 1, 0, LOFC_BLOB_LIMIT); + check_gentle_parse("blob:limit=5k", 1, 1, LOFC_BLOB_LIMIT); + check_gentle_parse("combine:blob:none+tree:0", 1, 0, LOFC_COMBINE); + check_gentle_parse("combine:blob:none+tree:0", 1, 1, LOFC_COMBINE); +} + +void test_list_objects_filter_options__auto_allowed(void) +{ + check_gentle_parse("auto", 1, 1, LOFC_AUTO); + check_gentle_parse("auto", 0, 0, 0); +} + +void test_list_objects_filter_options__combine_auto_fails(void) +{ + check_gentle_parse("combine:auto+blob:none", 0, 1, 0); + check_gentle_parse("combine:blob:none+auto", 0, 1, 0); + check_gentle_parse("combine:auto+auto", 0, 1, 0); +} From 257f2db5d3e6d734861890ed4f1d81607f1702fe Mon Sep 17 00:00:00 2001 From: Christian Couder Date: Mon, 16 Feb 2026 14:23:13 +0100 Subject: [PATCH 21/24] promisor-remote: keep advertised filters in memory Currently, advertised filters are only kept in memory temporarily during parsing, or persisted to disk if `promisor.storeFields` contains 'partialCloneFilter'. In a following commit though, we will add a `--filter=auto` option. This option will enable the client to use the filters that the server is suggesting for the promisor remotes the client accepts. To use them even if `promisor.storeFields` is not configured, these filters should be stored somewhere for the current session. Let's add an `advertised_filter` field to `struct promisor_remote` for that purpose. To ensure that the filters are available in all cases, filter_promisor_remote() captures them into a temporary list and applies them to the `promisor_remote` structs after the potential configuration reload. Then the accepted remotes are marked as `accepted` in the repository state. This ensures that subsequent calls to look up accepted remotes (like in the filter construction below) actually find them. In a following commit, we will add a `--filter=auto` option that will enable a client to use the filters suggested by the server for the promisor remotes the client accepted. To enable the client to construct a filter spec based on these filters, let's also add a `promisor_remote_construct_filter(repo)` function. This function: - iterates over all accepted promisor remotes in the repository, - collects the filters advertised for them (using `advertised_filter` added in this commit, and - generates a single filter spec for them. Signed-off-by: Christian Couder Signed-off-by: Junio C Hamano --- promisor-remote.c | 58 +++++++++++++++++++++++++++++++++++++++++++++++ promisor-remote.h | 7 ++++++ 2 files changed, 65 insertions(+) diff --git a/promisor-remote.c b/promisor-remote.c index 59997dd4c78812..f3bafb7731c962 100644 --- a/promisor-remote.c +++ b/promisor-remote.c @@ -193,6 +193,7 @@ void promisor_remote_clear(struct promisor_remote_config *config) while (config->promisors) { struct promisor_remote *r = config->promisors; free(r->partial_clone_filter); + free(r->advertised_filter); config->promisors = config->promisors->next; free(r); } @@ -837,6 +838,7 @@ static void filter_promisor_remote(struct repository *repo, struct store_info *store_info = NULL; struct string_list_item *item; bool reload_config = false; + struct string_list accepted_filters = STRING_LIST_INIT_DUP; if (!repo_config_get_string_tmp(the_repository, "promisor.acceptfromserver", &accept_str)) { if (!*accept_str || !strcasecmp("None", accept_str)) @@ -879,6 +881,13 @@ static void filter_promisor_remote(struct repository *repo, reload_config = true; strvec_push(accepted, advertised->name); + + /* Capture advertised filters for accepted remotes */ + if (advertised->filter) { + struct string_list_item *i; + i = string_list_append(&accepted_filters, advertised->name); + i->util = xstrdup(advertised->filter); + } } promisor_info_free(advertised); @@ -890,6 +899,25 @@ static void filter_promisor_remote(struct repository *repo, if (reload_config) repo_promisor_remote_reinit(repo); + + /* Apply accepted remote filters to the stable repo state */ + for_each_string_list_item(item, &accepted_filters) { + struct promisor_remote *r = repo_promisor_remote_find(repo, item->string); + if (r) { + free(r->advertised_filter); + r->advertised_filter = item->util; + item->util = NULL; + } + } + + string_list_clear(&accepted_filters, 1); + + /* Mark the remotes as accepted in the repository state */ + for (size_t i = 0; i < accepted->nr; i++) { + struct promisor_remote *r = repo_promisor_remote_find(repo, accepted->v[i]); + if (r) + r->accepted = 1; + } } char *promisor_remote_reply(const char *info) @@ -935,3 +963,33 @@ void mark_promisor_remotes_as_accepted(struct repository *r, const char *remotes string_list_clear(&accepted_remotes, 0); } + +char *promisor_remote_construct_filter(struct repository *repo) +{ + struct promisor_remote *r; + struct list_objects_filter_options filter_options = LIST_OBJECTS_FILTER_INIT; + struct strbuf err = STRBUF_INIT; + char *result = NULL; + + promisor_remote_init(repo); + + for (r = repo->promisor_remote_config->promisors; r; r = r->next) { + if (r->accepted && r->advertised_filter) + if (gently_parse_list_objects_filter(&filter_options, + r->advertised_filter, + &err)) { + warning(_("promisor remote '%s' advertised invalid filter '%s': %s"), + r->name, r->advertised_filter, err.buf); + strbuf_reset(&err); + continue; + } + } + + if (filter_options.choice) + result = xstrdup(expand_list_objects_filter_spec(&filter_options)); + + list_objects_filter_release(&filter_options); + strbuf_release(&err); + + return result; +} diff --git a/promisor-remote.h b/promisor-remote.h index 263d331a551b6c..d227299fd0d276 100644 --- a/promisor-remote.h +++ b/promisor-remote.h @@ -15,6 +15,7 @@ struct object_id; struct promisor_remote { struct promisor_remote *next; char *partial_clone_filter; + char *advertised_filter; unsigned int accepted : 1; const char name[FLEX_ARRAY]; }; @@ -67,4 +68,10 @@ void mark_promisor_remotes_as_accepted(struct repository *repo, const char *remo */ int repo_has_accepted_promisor_remote(struct repository *r); +/* + * Use the filters from the accepted remotes to create a combined + * filter (useful in `--filter=auto` mode). + */ +char *promisor_remote_construct_filter(struct repository *repo); + #endif /* PROMISOR_REMOTE_H */ From e15a6b2f8b9f62206e9d0b2a57a81da19cdcf3de Mon Sep 17 00:00:00 2001 From: Christian Couder Date: Mon, 16 Feb 2026 14:23:14 +0100 Subject: [PATCH 22/24] promisor-remote: change promisor_remote_reply()'s signature The `promisor_remote_reply()` function performs two tasks: 1. It uses filter_promisor_remote() to parse the server's "promisor-remote" advertisement and to mark accepted remotes in the repository configuration. 2. It assembles a reply string containing the accepted remote names to send back to the server. In a following commit, the fetch-pack logic will need to trigger the side effect (1) to ensure the repository state is correct, but it will not need to send a reply (2). To avoid assembling a reply string when it is not needed, let's change the signature of promisor_remote_reply(). It will now return `void` and accept a second `char **accepted_out` argument. Only if that argument is not NULL will a reply string be assembled and returned back to the caller via that argument. Signed-off-by: Christian Couder Signed-off-by: Junio C Hamano --- connect.c | 3 ++- promisor-remote.c | 24 +++++++++++++----------- promisor-remote.h | 10 +++++----- 3 files changed, 20 insertions(+), 17 deletions(-) diff --git a/connect.c b/connect.c index c6f76e30829ff2..a02583a1027241 100644 --- a/connect.c +++ b/connect.c @@ -505,7 +505,8 @@ static void send_capabilities(int fd_out, struct packet_reader *reader) reader->hash_algo = &hash_algos[GIT_HASH_SHA1_LEGACY]; } if (server_feature_v2("promisor-remote", &promisor_remote_info)) { - char *reply = promisor_remote_reply(promisor_remote_info); + char *reply; + promisor_remote_reply(promisor_remote_info, &reply); if (reply) { packet_write_fmt(fd_out, "promisor-remote=%s", reply); free(reply); diff --git a/promisor-remote.c b/promisor-remote.c index f3bafb7731c962..96fa215b06a924 100644 --- a/promisor-remote.c +++ b/promisor-remote.c @@ -920,25 +920,27 @@ static void filter_promisor_remote(struct repository *repo, } } -char *promisor_remote_reply(const char *info) +void promisor_remote_reply(const char *info, char **accepted_out) { struct strvec accepted = STRVEC_INIT; - struct strbuf reply = STRBUF_INIT; filter_promisor_remote(the_repository, &accepted, info); - if (!accepted.nr) - return NULL; - - for (size_t i = 0; i < accepted.nr; i++) { - if (i) - strbuf_addch(&reply, ';'); - strbuf_addstr_urlencode(&reply, accepted.v[i], allow_unsanitized); + if (accepted_out) { + if (accepted.nr) { + struct strbuf reply = STRBUF_INIT; + for (size_t i = 0; i < accepted.nr; i++) { + if (i) + strbuf_addch(&reply, ';'); + strbuf_addstr_urlencode(&reply, accepted.v[i], allow_unsanitized); + } + *accepted_out = strbuf_detach(&reply, NULL); + } else { + *accepted_out = NULL; + } } strvec_clear(&accepted); - - return strbuf_detach(&reply, NULL); } void mark_promisor_remotes_as_accepted(struct repository *r, const char *remotes) diff --git a/promisor-remote.h b/promisor-remote.h index d227299fd0d276..3d4d2de01818ea 100644 --- a/promisor-remote.h +++ b/promisor-remote.h @@ -49,12 +49,12 @@ char *promisor_remote_info(struct repository *repo); /* * Prepare a reply to a "promisor-remote" advertisement from a server. * Check the value of "promisor.acceptfromserver" and maybe the - * configured promisor remotes, if any, to prepare the reply. - * Return value is NULL if no promisor remote from the server - * is accepted. Otherwise it contains the names of the accepted promisor - * remotes separated by ';'. See gitprotocol-v2(5). + * configured promisor remotes, if any, to prepare the reply. If the + * `accepted_out` argument is not NULL, it is set to either NULL or to + * the names of the accepted promisor remotes separated by ';' if + * any. See gitprotocol-v2(5). */ -char *promisor_remote_reply(const char *info); +void promisor_remote_reply(const char *info, char **accepted_out); /* * Set the 'accepted' flag for some promisor remotes. Useful on the From ef2f1845ec4b683df791bfd956f551b096a38009 Mon Sep 17 00:00:00 2001 From: Christian Couder Date: Mon, 16 Feb 2026 14:23:15 +0100 Subject: [PATCH 23/24] fetch-pack: wire up and enable auto filter logic Previous commits have set up an infrastructure for `--filter=auto` to automatically prepare a partial clone filter based on what the server advertised and the client accepted. Using that infrastructure, let's now enable the `--filter=auto` option in `git clone` and `git fetch` by setting `allow_auto_filter` to 1. Note that these small changes mean that when `git clone --filter=auto` or `git fetch --filter=auto` are used, "auto" is automatically saved as the partial clone filter for the server on the client. Therefore subsequent calls to `git fetch` on the client will automatically use this "auto" mode even without `--filter=auto`. Let's also set `allow_auto_filter` to 1 in `transport.c`, as the transport layer must be able to accept the "auto" filter spec even if the invoking command hasn't fully parsed it yet. When an "auto" filter is requested, let's have the "fetch-pack.c" code in `do_fetch_pack_v2()` compute a filter and send it to the server. In `do_fetch_pack_v2()` the logic also needs to check for the "promisor-remote" capability and call `promisor_remote_reply()` to parse advertised remotes and populate the list of those accepted (and their filters). Signed-off-by: Christian Couder Signed-off-by: Junio C Hamano --- Documentation/fetch-options.adoc | 19 ++++++--- Documentation/git-clone.adoc | 25 ++++++++--- Documentation/gitprotocol-v2.adoc | 16 ++++--- builtin/clone.c | 2 + builtin/fetch.c | 2 + fetch-pack.c | 24 +++++++++++ t/t5710-promisor-remote-capability.sh | 60 +++++++++++++++++++++++++++ transport.c | 1 + 8 files changed, 134 insertions(+), 15 deletions(-) diff --git a/Documentation/fetch-options.adoc b/Documentation/fetch-options.adoc index 1ef9807d00b55a..a0cfb50d89e2ef 100644 --- a/Documentation/fetch-options.adoc +++ b/Documentation/fetch-options.adoc @@ -92,11 +92,20 @@ precedence over the `fetch.output` config option. Use the partial clone feature and request that the server sends a subset of reachable objects according to a given object filter. When using `--filter`, the supplied __ is used for - the partial fetch. For example, `--filter=blob:none` will filter - out all blobs (file contents) until needed by Git. Also, - `--filter=blob:limit=` will filter out all blobs of size - at least __. For more details on filter specifications, see - the `--filter` option in linkgit:git-rev-list[1]. + the partial fetch. ++ +If `--filter=auto` is used, the filter specification is determined +automatically by combining the filter specifications advertised by +the server for the promisor remotes that the client accepts (see +linkgit:gitprotocol-v2[5] and the `promisor.acceptFromServer` +configuration option in linkgit:git-config[1]). ++ +For details on all other available filter specifications, see the +`--filter=` option in linkgit:git-rev-list[1]. ++ +For example, `--filter=blob:none` will filter out all blobs (file +contents) until needed by Git. Also, `--filter=blob:limit=` will +filter out all blobs of size at least __. ifndef::git-pull[] `--write-fetch-head`:: diff --git a/Documentation/git-clone.adoc b/Documentation/git-clone.adoc index 57cdfb7620571c..0db2d1e5f0465b 100644 --- a/Documentation/git-clone.adoc +++ b/Documentation/git-clone.adoc @@ -187,11 +187,26 @@ objects from the source repository into a pack in the cloned repository. Use the partial clone feature and request that the server sends a subset of reachable objects according to a given object filter. When using `--filter`, the supplied __ is used for - the partial clone filter. For example, `--filter=blob:none` will - filter out all blobs (file contents) until needed by Git. Also, - `--filter=blob:limit=` will filter out all blobs of size - at least __. For more details on filter specifications, see - the `--filter` option in linkgit:git-rev-list[1]. + the partial clone filter. ++ +If `--filter=auto` is used the filter specification is determined +automatically through the 'promisor-remote' protocol (see +linkgit:gitprotocol-v2[5]) by combining the filter specifications +advertised by the server for the promisor remotes that the client +accepts (see the `promisor.acceptFromServer` configuration option in +linkgit:git-config[1]). This allows the server to suggest the optimal +filter for the available promisor remotes. ++ +As with other filter specifications, the "auto" value is persisted in +the configuration. This ensures that future fetches will continue to +adapt to the server's current recommendation. ++ +For details on all other available filter specifications, see the +`--filter=` option in linkgit:git-rev-list[1]. ++ +For example, `--filter=blob:none` will filter out all blobs (file +contents) until needed by Git. Also, `--filter=blob:limit=` will +filter out all blobs of size at least __. `--also-filter-submodules`:: Also apply the partial clone filter to any submodules in the repository. diff --git a/Documentation/gitprotocol-v2.adoc b/Documentation/gitprotocol-v2.adoc index d93dd279ead7b2..f985cb4c474953 100644 --- a/Documentation/gitprotocol-v2.adoc +++ b/Documentation/gitprotocol-v2.adoc @@ -812,10 +812,15 @@ MUST appear first in each pr-fields, in that order. After these mandatory fields, the server MAY advertise the following optional fields in any order: -`partialCloneFilter`:: The filter specification used by the remote. +`partialCloneFilter`:: The filter specification for the remote. It +corresponds to the "remote..partialCloneFilter" config setting. Clients can use this to determine if the remote's filtering strategy -is compatible with their needs (e.g., checking if both use "blob:none"). -It corresponds to the "remote..partialCloneFilter" config setting. +is compatible with their needs (e.g., checking if both use +"blob:none"). Additionally they can use this through the +`--filter=auto` option in linkgit:git-clone[1]. With that option, the +filter specification of the clone will be automatically computed by +combining the filter specifications of the promisor remotes the client +accepts. `token`:: An authentication token that clients can use when connecting to the remote. It corresponds to the "remote..token" @@ -828,8 +833,9 @@ future protocol extensions. The client can use information transmitted through these fields to decide if it accepts the advertised promisor remote. Also, the client -can be configured to store the values of these fields (see -"promisor.storeFields" in linkgit:git-config[1]). +can be configured to store the values of these fields or use them +to automatically configure the repository (see "promisor.storeFields" +in linkgit:git-config[1] and `--filter=auto` in linkgit:git-clone[1]). Field values MUST be urlencoded. diff --git a/builtin/clone.c b/builtin/clone.c index bb27472020dc46..45d8fa0eed78c4 100644 --- a/builtin/clone.c +++ b/builtin/clone.c @@ -1001,6 +1001,8 @@ int cmd_clone(int argc, NULL }; + filter_options.allow_auto_filter = 1; + packet_trace_identity("clone"); repo_config(the_repository, git_clone_config, NULL); diff --git a/builtin/fetch.c b/builtin/fetch.c index 8fbf3557ceba3b..573c2952415bc2 100644 --- a/builtin/fetch.c +++ b/builtin/fetch.c @@ -2580,6 +2580,8 @@ int cmd_fetch(int argc, OPT_END() }; + filter_options.allow_auto_filter = 1; + packet_trace_identity("fetch"); /* Record the command line for the reflog */ diff --git a/fetch-pack.c b/fetch-pack.c index 40316c9a348f23..9f8f980516d790 100644 --- a/fetch-pack.c +++ b/fetch-pack.c @@ -35,6 +35,7 @@ #include "sigchain.h" #include "mergesort.h" #include "prio-queue.h" +#include "promisor-remote.h" static int transfer_unpack_limit = -1; static int fetch_unpack_limit = -1; @@ -1661,6 +1662,29 @@ static struct ref *do_fetch_pack_v2(struct fetch_pack_args *args, struct string_list packfile_uris = STRING_LIST_INIT_DUP; int i; struct strvec index_pack_args = STRVEC_INIT; + const char *promisor_remote_config; + + if (server_feature_v2("promisor-remote", &promisor_remote_config)) + promisor_remote_reply(promisor_remote_config, NULL); + + if (args->filter_options.choice == LOFC_AUTO) { + struct strbuf errbuf = STRBUF_INIT; + char *constructed_filter = promisor_remote_construct_filter(r); + + list_objects_filter_release(&args->filter_options); + /* Disallow 'auto' as a result of the resolution of this 'auto' filter below */ + args->filter_options.allow_auto_filter = 0; + + if (constructed_filter && + gently_parse_list_objects_filter(&args->filter_options, + constructed_filter, + &errbuf)) + die(_("couldn't resolve 'auto' filter '%s': %s"), + constructed_filter, errbuf.buf); + + free(constructed_filter); + strbuf_release(&errbuf); + } negotiator = &negotiator_alloc; if (args->refetch) diff --git a/t/t5710-promisor-remote-capability.sh b/t/t5710-promisor-remote-capability.sh index 6ef6431bd7dd25..532e6f0fea125b 100755 --- a/t/t5710-promisor-remote-capability.sh +++ b/t/t5710-promisor-remote-capability.sh @@ -423,6 +423,66 @@ test_expect_success "clone with promisor.storeFields=partialCloneFilter" ' test_grep "'\''blob:limit=8k'\'' -> '\''blob:limit=7k'\''" err ' +test_expect_success "clone and fetch with --filter=auto" ' + git -C server config promisor.advertise true && + test_when_finished "rm -rf client trace" && + + git -C server config remote.lop.partialCloneFilter "blob:limit=9500" && + test_config -C server promisor.sendFields "partialCloneFilter" && + + GIT_TRACE_PACKET="$(pwd)/trace" GIT_NO_LAZY_FETCH=0 git clone \ + -c remote.lop.promisor=true \ + -c remote.lop.url="file://$(pwd)/lop" \ + -c promisor.acceptfromserver=All \ + --no-local --filter=auto server client 2>err && + + test_grep "filter blob:limit=9500" trace && + test_grep ! "filter auto" trace && + + # Verify "auto" is persisted in config + echo auto >expected && + git -C client config remote.origin.partialCloneFilter >actual && + test_cmp expected actual && + + # Check that the largest object is still missing on the server + check_missing_objects server 1 "$oid" && + + # Now change the filter on the server + git -C server config remote.lop.partialCloneFilter "blob:limit=5678" && + + # Get a new commit on the server to ensure "git fetch" actually runs fetch-pack + test_commit -C template new-commit && + git -C template push --all "$(pwd)/server" && + + # Perform a fetch WITH --filter=auto + rm -rf trace && + GIT_TRACE_PACKET="$(pwd)/trace" git -C client fetch --filter=auto && + + # Verify that the new filter was used + test_grep "filter blob:limit=5678" trace && + + # Check that the largest object is still missing on the server + check_missing_objects server 1 "$oid" && + + # Change the filter on the server again + git -C server config remote.lop.partialCloneFilter "blob:limit=5432" && + + # Get yet a new commit on the server to ensure fetch-pack runs + test_commit -C template yet-a-new-commit && + git -C template push --all "$(pwd)/server" && + + # Perform a fetch WITHOUT --filter=auto + # Relies on "auto" being persisted in the client config + rm -rf trace && + GIT_TRACE_PACKET="$(pwd)/trace" git -C client fetch && + + # Verify that the new filter was used + test_grep "filter blob:limit=5432" trace && + + # Check that the largest object is still missing on the server + check_missing_objects server 1 "$oid" +' + test_expect_success "clone with promisor.advertise set to 'true' but don't delete the client" ' git -C server config promisor.advertise true && diff --git a/transport.c b/transport.c index c7f06a7382e605..cde8d83a575a76 100644 --- a/transport.c +++ b/transport.c @@ -1219,6 +1219,7 @@ struct transport *transport_get(struct remote *remote, const char *url) */ struct git_transport_data *data = xcalloc(1, sizeof(*data)); list_objects_filter_init(&data->options.filter_options); + data->options.filter_options.allow_auto_filter = 1; ret->data = data; ret->vtable = &builtin_smart_vtable; ret->smart_options = &(data->options); From 7b2bccb0d58d4f24705bf985de1f4612e4cf06e5 Mon Sep 17 00:00:00 2001 From: Junio C Hamano Date: Wed, 25 Feb 2026 10:49:04 -0800 Subject: [PATCH 24/24] The 7th batch Signed-off-by: Junio C Hamano --- Documentation/RelNotes/2.54.0.adoc | 24 ++++++++++++++++++++++++ 1 file changed, 24 insertions(+) diff --git a/Documentation/RelNotes/2.54.0.adoc b/Documentation/RelNotes/2.54.0.adoc index 5801d8a39ba850..9988979772019c 100644 --- a/Documentation/RelNotes/2.54.0.adoc +++ b/Documentation/RelNotes/2.54.0.adoc @@ -24,6 +24,14 @@ UI, Workflows & Features all configuration, even the per-user ones. The command now uses available configuration files to find its customization. + * "auto filter" logic for large-object promisor remote. + + * "git rev-list" and friends learn "--maximal-only" to show only the + commits that are not reachable by other commits. + + * Command line completion (in contrib/) update for + "stash import/export". + Performance, Internal Implementation, Development Support etc. -------------------------------------------------------------- @@ -62,6 +70,12 @@ Performance, Internal Implementation, Development Support etc. * "git diff --anchored=" has been optimized. + * A CodingGuidelines update. + + * Add process ancestry data to trace2 on macOS to match what we + already do on Linux and Windows. Also adjust the way Windows + implementation reports this information to match the other two. + Fixes since v2.53 ----------------- @@ -107,6 +121,15 @@ Fixes since v2.53 duplication. (merge 5086213bd2 pw/xdiff-cleanups later to maint). + * Update sample commit-msg hook to complain when a log message has + material mailinfo considers the end of log message in the middle. + (merge 83804c361b pw/commit-msg-sample-hook later to maint). + + * "git pack-objects --stdin-packs" with "--exclude-promisor-objects" + fetched objects that are promised, which was not wanted. This has + been fixed. + (merge f4eff7116d ps/pack-concat-wo-backfill later to maint). + * Other code cleanup, docfix, build fix, etc. (merge d79fff4a11 jk/remote-tracking-ref-leakfix later to maint). (merge 7a747f972d dd/t5403-modernise later to maint). @@ -127,3 +150,4 @@ Fixes since v2.53 (merge aaf3cc3d8d sd/t7003-test-path-is-helpers later to maint). (merge 2668b6bdc4 jc/doc-rerere-update later to maint). (merge 2f99f50f2d jc/doc-cg-c-comment later to maint). + (merge a454cdca42 kh/doc-am-format-sendmail later to maint).