Skip to content

Commit cb02b38

Browse files
committed
revision: use priority queue for non-limited streaming walks
Replace the O(N) sorted linked-list insertion used by get_revision_1() for non-limited walks with an O(log N) priority queue. Add a commit_queue field to rev_info alongside the existing commits linked list. The two representations are mutually exclusive: setup and external callers that need list access (mark_edges_uninteresting, bisect, etc.) use the linked list, then get_revision_1() lazily drains it into the priority queue on the first call for non-limited, non-no_walk, non-topo, non-reflog walks. The conversion function rev_info_commit_list_to_queue() is public so callers that know they will iterate can convert early. The parent-rewriting path (rewrite_one) always merges newly discovered commits into the priority queue, which is correct because rewrite_one_1 only calls process_parents for non-limited walks where the queue is active. Combined with the limit_list() priority queue change in the parent commit, this eliminates all O(N) sorted linked-list insertion from the revision walk machinery. Performance: git rev-list HEAD on a 2.3M-commit repository improves from ~21s to ~6.4s (3.3x speedup). Signed-off-by: Kristoffer Gleditsch <krka@spotify.com>
1 parent ce64d4c commit cb02b38

4 files changed

Lines changed: 38 additions & 41 deletions

File tree

commit.c

Lines changed: 0 additions & 13 deletions
Original file line numberDiff line numberDiff line change
@@ -729,19 +729,6 @@ void commit_list_free(struct commit_list *list)
729729
pop_commit(&list);
730730
}
731731

732-
struct commit_list * commit_list_insert_by_date(struct commit *item, struct commit_list **list)
733-
{
734-
struct commit_list **pp = list;
735-
struct commit_list *p;
736-
while ((p = *pp) != NULL) {
737-
if (p->item->date < item->date) {
738-
break;
739-
}
740-
pp = &p->next;
741-
}
742-
return commit_list_insert(item, pp);
743-
}
744-
745732
static int commit_list_compare_by_date(const struct commit_list *a,
746733
const struct commit_list *b)
747734
{

commit.h

Lines changed: 0 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -191,8 +191,6 @@ int commit_list_contains(struct commit *item,
191191
struct commit_list **commit_list_append(struct commit *commit,
192192
struct commit_list **next);
193193
unsigned commit_list_count(const struct commit_list *l);
194-
struct commit_list *commit_list_insert_by_date(struct commit *item,
195-
struct commit_list **list);
196194
void commit_list_sort_by_date(struct commit_list **list);
197195

198196
/* Shallow copy of the input list */

revision.c

Lines changed: 27 additions & 25 deletions
Original file line numberDiff line numberDiff line change
@@ -1116,7 +1116,7 @@ static void try_to_simplify_commit(struct rev_info *revs, struct commit *commit)
11161116
}
11171117

11181118
static int process_parents(struct rev_info *revs, struct commit *commit,
1119-
struct commit_list **list, struct prio_queue *queue)
1119+
struct prio_queue *queue)
11201120
{
11211121
struct commit_list *parent = commit->parents;
11221122
unsigned pass_flags;
@@ -1158,8 +1158,6 @@ static int process_parents(struct rev_info *revs, struct commit *commit,
11581158
if (p->object.flags & SEEN)
11591159
continue;
11601160
p->object.flags |= (SEEN | NOT_USER_GIVEN);
1161-
if (list)
1162-
commit_list_insert_by_date(p, list);
11631161
if (queue)
11641162
prio_queue_put(queue, p);
11651163
if (revs->exclude_first_parent_only)
@@ -1207,8 +1205,6 @@ static int process_parents(struct rev_info *revs, struct commit *commit,
12071205
p->object.flags |= pass_flags | CHILD_VISITED;
12081206
if (!(p->object.flags & SEEN)) {
12091207
p->object.flags |= (SEEN | NOT_USER_GIVEN);
1210-
if (list)
1211-
commit_list_insert_by_date(p, list);
12121208
if (queue)
12131209
prio_queue_put(queue, p);
12141210
}
@@ -1470,7 +1466,7 @@ static int limit_list(struct rev_info *revs)
14701466

14711467
if (revs->max_age != -1 && (commit->date < revs->max_age))
14721468
obj->flags |= UNINTERESTING;
1473-
if (process_parents(revs, commit, NULL, &queue) < 0) {
1469+
if (process_parents(revs, commit, &queue) < 0) {
14741470
clear_prio_queue(&queue);
14751471
return -1;
14761472
}
@@ -3257,6 +3253,7 @@ static void free_void_commit_list(void *list)
32573253
void release_revisions(struct rev_info *revs)
32583254
{
32593255
commit_list_free(revs->commits);
3256+
clear_prio_queue(&revs->commit_queue);
32603257
commit_list_free(revs->ancestry_path_bottoms);
32613258
release_display_notes(&revs->notes_opt);
32623259
object_array_clear(&revs->pending);
@@ -3726,7 +3723,7 @@ static void explore_walk_step(struct rev_info *revs)
37263723
if (revs->max_age != -1 && (c->date < revs->max_age))
37273724
c->object.flags |= UNINTERESTING;
37283725

3729-
if (process_parents(revs, c, NULL, NULL) < 0)
3726+
if (process_parents(revs, c, NULL) < 0)
37303727
return;
37313728

37323729
if (c->object.flags & UNINTERESTING)
@@ -3902,7 +3899,7 @@ static void expand_topo_walk(struct rev_info *revs, struct commit *commit)
39023899
{
39033900
struct commit_list *p;
39043901
struct topo_walk_info *info = revs->topo_walk_info;
3905-
if (process_parents(revs, commit, NULL, NULL) < 0) {
3902+
if (process_parents(revs, commit, NULL) < 0) {
39063903
if (!revs->ignore_missing_links)
39073904
die("Failed to traverse parents of commit %s",
39083905
oid_to_hex(&commit->object.oid));
@@ -3938,6 +3935,13 @@ static void expand_topo_walk(struct rev_info *revs, struct commit *commit)
39383935
}
39393936
}
39403937

3938+
void rev_info_commit_list_to_queue(struct rev_info *revs)
3939+
{
3940+
while (revs->commits)
3941+
prio_queue_put(&revs->commit_queue, pop_commit(&revs->commits));
3942+
}
3943+
3944+
39413945
int prepare_revision_walk(struct rev_info *revs)
39423946
{
39433947
int i;
@@ -4006,7 +4010,7 @@ static enum rewrite_result rewrite_one_1(struct rev_info *revs,
40064010
for (;;) {
40074011
struct commit *p = *pp;
40084012
if (!revs->limited)
4009-
if (process_parents(revs, p, NULL, queue) < 0)
4013+
if (process_parents(revs, p, queue) < 0)
40104014
return rewrite_one_error;
40114015
if (p->object.flags & UNINTERESTING)
40124016
return rewrite_one_ok;
@@ -4020,27 +4024,18 @@ static enum rewrite_result rewrite_one_1(struct rev_info *revs,
40204024
}
40214025
}
40224026

4023-
static void merge_queue_into_list(struct prio_queue *q, struct commit_list **list)
4027+
static void merge_queue_into_prio_queue(struct prio_queue *from,
4028+
struct prio_queue *to)
40244029
{
4025-
while (q->nr) {
4026-
struct commit *item = prio_queue_peek(q);
4027-
struct commit_list *p = *list;
4028-
4029-
if (p && p->item->date >= item->date)
4030-
list = &p->next;
4031-
else {
4032-
p = commit_list_insert(item, list);
4033-
list = &p->next; /* skip newly added item */
4034-
prio_queue_get(q); /* pop item */
4035-
}
4036-
}
4030+
while (from->nr)
4031+
prio_queue_put(to, prio_queue_get(from));
40374032
}
40384033

40394034
static enum rewrite_result rewrite_one(struct rev_info *revs, struct commit **pp)
40404035
{
40414036
struct prio_queue queue = { compare_commits_by_commit_date };
40424037
enum rewrite_result ret = rewrite_one_1(revs, pp, &queue);
4043-
merge_queue_into_list(&queue, &revs->commits);
4038+
merge_queue_into_prio_queue(&queue, &revs->commit_queue);
40444039
clear_prio_queue(&queue);
40454040
return ret;
40464041
}
@@ -4336,8 +4331,13 @@ static struct commit *get_revision_1(struct rev_info *revs)
43364331
commit = next_reflog_entry(revs->reflog_info);
43374332
else if (revs->topo_walk_info)
43384333
commit = next_topo_commit(revs);
4339-
else
4334+
else if (revs->limited || revs->no_walk)
43404335
commit = pop_commit(&revs->commits);
4336+
else {
4337+
if (!revs->commit_queue.nr && revs->commits)
4338+
rev_info_commit_list_to_queue(revs);
4339+
commit = prio_queue_get(&revs->commit_queue);
4340+
}
43414341

43424342
if (!commit)
43434343
return NULL;
@@ -4359,7 +4359,9 @@ static struct commit *get_revision_1(struct rev_info *revs)
43594359
try_to_simplify_commit(revs, commit);
43604360
else if (revs->topo_walk_info)
43614361
expand_topo_walk(revs, commit);
4362-
else if (process_parents(revs, commit, &revs->commits, NULL) < 0) {
4362+
else if (process_parents(revs, commit,
4363+
revs->no_walk ?
4364+
NULL : &revs->commit_queue) < 0) {
43634365
if (!revs->ignore_missing_links)
43644366
die("Failed to traverse parents of commit %s",
43654367
oid_to_hex(&commit->object.oid));

revision.h

Lines changed: 11 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -12,6 +12,7 @@
1212
#include "decorate.h"
1313
#include "ident.h"
1414
#include "list-objects-filter-options.h"
15+
#include "prio-queue.h"
1516
#include "strvec.h"
1617

1718
/**
@@ -122,8 +123,14 @@ struct oidset;
122123
struct topo_walk_info;
123124

124125
struct rev_info {
125-
/* Starting list */
126+
/*
127+
* Work queue of commits, stored as either a linked list or a
128+
* priority queue, but never both at the same time.
129+
* rev_info_commit_list_to_queue() converts list to queue.
130+
*/
126131
struct commit_list *commits;
132+
struct prio_queue commit_queue;
133+
127134
struct object_array pending;
128135
struct repository *repo;
129136

@@ -400,6 +407,7 @@ struct rev_info {
400407
* uninitialized.
401408
*/
402409
#define REV_INFO_INIT { \
410+
.commit_queue = { .compare = compare_commits_by_commit_date }, \
403411
.abbrev = DEFAULT_ABBREV, \
404412
.simplify_history = 1, \
405413
.pruning.flags.recursive = 1, \
@@ -478,6 +486,8 @@ void reset_revision_walk(void);
478486
*/
479487
int prepare_revision_walk(struct rev_info *revs);
480488

489+
/* Drain the commits linked list into the priority queue. */
490+
void rev_info_commit_list_to_queue(struct rev_info *revs);
481491
/**
482492
* Takes a pointer to a `rev_info` structure and iterates over it, returning a
483493
* `struct commit *` each time you call it. The end of the revision list is

0 commit comments

Comments
 (0)