Merge branch 'kk/streaming-walk-pqueue' into jch

Streaming revision walks have been optimized by using a priority queue
for date-sorting commits, speeding up walks repositories with many
merges.

* kk/streaming-walk-pqueue:
  revision: use priority queue for non-limited streaming walks
  revision: introduce rev_walk_mode to clarify get_revision_1()
  pack-objects: call release_revisions() after cruft traversal
This commit is contained in:
Junio C Hamano
2026-06-13 09:22:00 -07:00
5 changed files with 88 additions and 53 deletions

View File

@@ -4281,6 +4281,7 @@ static void enumerate_and_traverse_cruft_objects(struct string_list *fresh_packs
traverse_commit_list(&revs, show_cruft_commit, show_cruft_object, NULL);
stop_progress(&progress_state);
release_revisions(&revs);
}
static void read_cruft_objects(void)

View File

@@ -760,19 +760,6 @@ void commit_list_free(struct commit_list *list)
pop_commit(&list);
}
struct commit_list * commit_list_insert_by_date(struct commit *item, struct commit_list **list)
{
struct commit_list **pp = list;
struct commit_list *p;
while ((p = *pp) != NULL) {
if (p->item->date < item->date) {
break;
}
pp = &p->next;
}
return commit_list_insert(item, pp);
}
static int commit_list_compare_by_date(const struct commit_list *a,
const struct commit_list *b)
{

View File

@@ -191,8 +191,6 @@ int commit_list_contains(struct commit *item,
struct commit_list **commit_list_append(struct commit *commit,
struct commit_list **next);
unsigned commit_list_count(const struct commit_list *l);
struct commit_list *commit_list_insert_by_date(struct commit *item,
struct commit_list **list);
void commit_list_sort_by_date(struct commit_list **list);
/* Shallow copy of the input list */

View File

@@ -1116,7 +1116,7 @@ static void try_to_simplify_commit(struct rev_info *revs, struct commit *commit)
}
static int process_parents(struct rev_info *revs, struct commit *commit,
struct commit_list **list, struct prio_queue *queue)
struct prio_queue *queue)
{
struct commit_list *parent = commit->parents;
unsigned pass_flags;
@@ -1158,8 +1158,6 @@ static int process_parents(struct rev_info *revs, struct commit *commit,
if (p->object.flags & SEEN)
continue;
p->object.flags |= (SEEN | NOT_USER_GIVEN);
if (list)
commit_list_insert_by_date(p, list);
if (queue)
prio_queue_put(queue, p);
if (revs->exclude_first_parent_only)
@@ -1207,8 +1205,6 @@ static int process_parents(struct rev_info *revs, struct commit *commit,
p->object.flags |= pass_flags | CHILD_VISITED;
if (!(p->object.flags & SEEN)) {
p->object.flags |= (SEEN | NOT_USER_GIVEN);
if (list)
commit_list_insert_by_date(p, list);
if (queue)
prio_queue_put(queue, p);
}
@@ -1470,7 +1466,7 @@ static int limit_list(struct rev_info *revs)
if (revs->max_age != -1 && (commit->date < revs->max_age))
obj->flags |= UNINTERESTING;
if (process_parents(revs, commit, NULL, &queue) < 0) {
if (process_parents(revs, commit, &queue) < 0) {
clear_prio_queue(&queue);
return -1;
}
@@ -3286,6 +3282,7 @@ static void free_void_commit_list(void *list)
void release_revisions(struct rev_info *revs)
{
commit_list_free(revs->commits);
clear_prio_queue(&revs->commit_queue);
commit_list_free(revs->ancestry_path_bottoms);
release_display_notes(&revs->notes_opt);
object_array_clear(&revs->pending);
@@ -3755,7 +3752,7 @@ static void explore_walk_step(struct rev_info *revs)
if (revs->max_age != -1 && (c->date < revs->max_age))
c->object.flags |= UNINTERESTING;
if (process_parents(revs, c, NULL, NULL) < 0)
if (process_parents(revs, c, NULL) < 0)
return;
if (c->object.flags & UNINTERESTING)
@@ -3931,7 +3928,7 @@ static void expand_topo_walk(struct rev_info *revs, struct commit *commit)
{
struct commit_list *p;
struct topo_walk_info *info = revs->topo_walk_info;
if (process_parents(revs, commit, NULL, NULL) < 0) {
if (process_parents(revs, commit, NULL) < 0) {
if (!revs->ignore_missing_links)
die("Failed to traverse parents of commit %s",
oid_to_hex(&commit->object.oid));
@@ -3967,6 +3964,13 @@ static void expand_topo_walk(struct rev_info *revs, struct commit *commit)
}
}
void rev_info_commit_list_to_queue(struct rev_info *revs)
{
while (revs->commits)
prio_queue_put(&revs->commit_queue, pop_commit(&revs->commits));
}
int prepare_revision_walk(struct rev_info *revs)
{
int i;
@@ -4035,7 +4039,7 @@ static enum rewrite_result rewrite_one_1(struct rev_info *revs,
for (;;) {
struct commit *p = *pp;
if (!revs->limited)
if (process_parents(revs, p, NULL, queue) < 0)
if (process_parents(revs, p, queue) < 0)
return rewrite_one_error;
if (p->object.flags & UNINTERESTING)
return rewrite_one_ok;
@@ -4049,27 +4053,18 @@ static enum rewrite_result rewrite_one_1(struct rev_info *revs,
}
}
static void merge_queue_into_list(struct prio_queue *q, struct commit_list **list)
static void merge_queue_into_prio_queue(struct prio_queue *from,
struct prio_queue *to)
{
while (q->nr) {
struct commit *item = prio_queue_peek(q);
struct commit_list *p = *list;
if (p && p->item->date >= item->date)
list = &p->next;
else {
p = commit_list_insert(item, list);
list = &p->next; /* skip newly added item */
prio_queue_get(q); /* pop item */
}
}
while (from->nr)
prio_queue_put(to, prio_queue_get(from));
}
static enum rewrite_result rewrite_one(struct rev_info *revs, struct commit **pp)
{
struct prio_queue queue = { compare_commits_by_commit_date };
enum rewrite_result ret = rewrite_one_1(revs, pp, &queue);
merge_queue_into_list(&queue, &revs->commits);
merge_queue_into_prio_queue(&queue, &revs->commit_queue);
clear_prio_queue(&queue);
return ret;
}
@@ -4356,22 +4351,57 @@ static void track_linear(struct rev_info *revs, struct commit *commit)
revs->previous_parents = commit_list_copy(commit->parents);
}
enum rev_walk_mode {
REV_WALK_REFLOG,
REV_WALK_TOPO,
REV_WALK_LIMITED,
REV_WALK_NO_WALK,
REV_WALK_STREAMING,
};
static enum rev_walk_mode get_walk_mode(struct rev_info *revs)
{
if (revs->reflog_info)
return REV_WALK_REFLOG;
if (revs->topo_walk_info)
return REV_WALK_TOPO;
if (revs->limited)
return REV_WALK_LIMITED;
if (revs->no_walk)
return REV_WALK_NO_WALK;
return REV_WALK_STREAMING;
}
static struct commit *get_revision_1(struct rev_info *revs)
{
enum rev_walk_mode mode = get_walk_mode(revs);
if (mode == REV_WALK_STREAMING && revs->commits)
rev_info_commit_list_to_queue(revs);
while (1) {
struct commit *commit;
if (revs->reflog_info)
switch (mode) {
case REV_WALK_REFLOG:
commit = next_reflog_entry(revs->reflog_info);
else if (revs->topo_walk_info)
break;
case REV_WALK_TOPO:
commit = next_topo_commit(revs);
else
break;
case REV_WALK_LIMITED:
case REV_WALK_NO_WALK:
commit = pop_commit(&revs->commits);
break;
case REV_WALK_STREAMING:
commit = prio_queue_get(&revs->commit_queue);
break;
}
if (!commit)
return NULL;
if (revs->reflog_info)
if (mode == REV_WALK_REFLOG)
commit->object.flags &= ~(ADDED | SEEN | SHOWN);
/*
@@ -4379,20 +4409,29 @@ static struct commit *get_revision_1(struct rev_info *revs)
* the parents here. We also need to do the date-based limiting
* that we'd otherwise have done in limit_list().
*/
if (!revs->limited) {
if (revs->max_age != -1 &&
comparison_date(revs, commit) < revs->max_age)
continue;
if (mode != REV_WALK_LIMITED &&
revs->max_age != -1 &&
comparison_date(revs, commit) < revs->max_age)
continue;
if (revs->reflog_info)
try_to_simplify_commit(revs, commit);
else if (revs->topo_walk_info)
expand_topo_walk(revs, commit);
else if (process_parents(revs, commit, &revs->commits, NULL) < 0) {
switch (mode) {
case REV_WALK_REFLOG:
try_to_simplify_commit(revs, commit);
break;
case REV_WALK_TOPO:
expand_topo_walk(revs, commit);
break;
case REV_WALK_STREAMING:
if (process_parents(revs, commit,
&revs->commit_queue) < 0) {
if (!revs->ignore_missing_links)
die("Failed to traverse parents of commit %s",
oid_to_hex(&commit->object.oid));
oid_to_hex(&commit->object.oid));
}
break;
case REV_WALK_NO_WALK:
case REV_WALK_LIMITED:
break;
}
switch (simplify_commit(revs, commit)) {

View File

@@ -12,6 +12,7 @@
#include "decorate.h"
#include "ident.h"
#include "list-objects-filter-options.h"
#include "prio-queue.h"
#include "strvec.h"
/**
@@ -122,8 +123,14 @@ struct oidset;
struct topo_walk_info;
struct rev_info {
/* Starting list */
/*
* Work queue of commits, stored as either a linked list or a
* priority queue, but never both at the same time.
* rev_info_commit_list_to_queue() converts list to queue.
*/
struct commit_list *commits;
struct prio_queue commit_queue;
struct object_array pending;
struct repository *repo;
@@ -403,6 +410,7 @@ struct rev_info {
* uninitialized.
*/
#define REV_INFO_INIT { \
.commit_queue = { .compare = compare_commits_by_commit_date }, \
.abbrev = DEFAULT_ABBREV, \
.simplify_history = 1, \
.pruning.flags.recursive = 1, \
@@ -481,6 +489,8 @@ void reset_revision_walk(void);
*/
int prepare_revision_walk(struct rev_info *revs);
/* Drain the commits linked list into the priority queue. */
void rev_info_commit_list_to_queue(struct rev_info *revs);
/**
* Takes a pointer to a `rev_info` structure and iterates over it, returning a
* `struct commit *` each time you call it. The end of the revision list is