Skip to content

Commit 5fa4e71

Browse files
feat(backend): write changed-path Bloom filters to commit-graph
Writes --changed-paths when building the commit-graph, which adds Bloom filters that let git cheaply skip commits that did not touch a given path. This dramatically accelerates `git log -- <path>` and modestly helps `git blame` on large repos. Existing repos that already have a Bloom-less commit-graph get a one-time `--split=replace` rewrite on their next fetch, gated on a new `commitGraphChangedPathsBackfilledAt` timestamp stored in repo metadata. Subsequent fetches do a cheap incremental write. Also moves the `writeCommitGraph` call out of `cloneRepository` and into `RepoIndexManager.indexRepository` so clone and fetch paths handle the commit-graph symmetrically. Drops `--write-commit-graph` from the fetch invocation since that flag does not honor `--changed-paths`. Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
1 parent 99b9af5 commit 5fa4e71

5 files changed

Lines changed: 70 additions & 12 deletions

File tree

packages/backend/src/git.ts

Lines changed: 23 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -105,12 +105,6 @@ export const cloneRepository = async (
105105
keys: ["remote.origin.url"],
106106
signal,
107107
});
108-
109-
// @note: operations that need to iterate over a lot of commits (e.g., rev-list --count)
110-
// can be slow on larger repositories. Commit graphs are a acceleration structure that
111-
// speed up these operations.
112-
// @see: https://git-scm.com/docs/commit-graph
113-
await writeCommitGraph({ path, signal });
114108
} catch (error: unknown) {
115109
const baseLog = `Failed to clone repository: ${path}`;
116110

@@ -151,9 +145,6 @@ export const fetchRepository = async (
151145
"+refs/heads/*:refs/heads/*",
152146
"--prune",
153147
"--progress",
154-
// On fetch, ensure the commit graph is up to date.
155-
// @see: https://git-scm.com/docs/commit-graph
156-
"--write-commit-graph"
157148
]);
158149

159150
// Update HEAD to match the remote's default branch. This handles the case where the remote's
@@ -490,20 +481,42 @@ export const isRepoEmpty = async ({
490481
* Writes or updates the commit-graph file for the repository.
491482
* This pre-computes commit metadata to speed up operations like
492483
* rev-list --count, log, and merge-base.
484+
*
485+
* Also writes changed-path Bloom filters (--changed-paths), which let git
486+
* quickly skip commits that didn't touch a given path. This accelerates
487+
* `git log -- <path>` dramatically and `git blame` modestly on large repos.
488+
*
489+
* For incremental writes (the default), Bloom filters are only computed for
490+
* commits being added in this write. Repos that already have a Bloom-less
491+
* commit-graph from a prior version need a one-time `forceBackfill: true`
492+
* write to backfill filters for their historical commits.
493+
*
494+
* @see: https://git-scm.com/docs/commit-graph
493495
*/
494496
export const writeCommitGraph = async ({
495497
path,
498+
forceBackfill,
496499
onProgress,
497500
signal,
498501
}: {
499502
path: string,
503+
forceBackfill?: boolean,
500504
onProgress?: onProgressFn,
501505
signal?: AbortSignal,
502506
}): Promise<void> => {
503507
const git = createGitClientForPath(path, onProgress, signal);
504508

505509
try {
506-
await git.raw(['commit-graph', 'write', '--reachable']);
510+
const args = ['commit-graph', 'write', '--reachable', '--changed-paths'];
511+
if (forceBackfill) {
512+
// --split=replace consolidates any existing layers into a single new layer,
513+
// which forces git to recompute Bloom filters for every commit (not just commits
514+
// added since the last write). Used for the one-time migration of repos that have
515+
// a Bloom-less commit-graph from before --changed-paths was enabled.
516+
// @see: https://git-scm.com/docs/git-commit-graph#Documentation/git-commit-graph.txt-write
517+
args.push('--split=replace');
518+
}
519+
await git.raw(args);
507520
} catch (error) {
508521
// Don't throw an exception here since this is just a performance optimization.
509522
logger.debug(`Failed to write commit-graph for ${path}:`, error);

packages/backend/src/repoIndexManager.test.ts

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -51,6 +51,7 @@ vi.mock('./git.js', () => ({
5151
isRepoEmpty: vi.fn().mockResolvedValue(false),
5252
unsetGitConfig: vi.fn(),
5353
upsertGitConfig: vi.fn(),
54+
writeCommitGraph: vi.fn(),
5455
}));
5556

5657
vi.mock('./zoekt.js', () => ({
@@ -178,6 +179,8 @@ const createMockSettings = (): Settings => ({
178179
enablePublicAccess: false,
179180
experiment_repoDrivenPermissionSyncIntervalMs: 1000 * 60 * 60 * 24,
180181
experiment_userDrivenPermissionSyncIntervalMs: 1000 * 60 * 60 * 24,
182+
repoDrivenPermissionSyncIntervalMs: 1000 * 60 * 60 * 24,
183+
userDrivenPermissionSyncIntervalMs: 1000 * 60 * 60 * 24,
181184
maxAccountPermissionSyncJobConcurrency: 8,
182185
maxRepoPermissionSyncJobConcurrency: 8,
183186
});

packages/backend/src/repoIndexManager.ts

Lines changed: 35 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -8,7 +8,7 @@ import { Redis } from 'ioredis';
88
import micromatch from 'micromatch';
99
import Redlock, { ExecutionError } from 'redlock';
1010
import { INDEX_CACHE_DIR, REPOS_CACHE_DIR, WORKER_STOP_GRACEFUL_TIMEOUT_MS } from './constants.js';
11-
import { cloneRepository, fetchRepository, getBranches, getCommitHashForRefName, getLatestCommitTimestamp, getLocalDefaultBranch, getTags, isPathAValidGitRepoRoot, isRepoEmpty, unsetGitConfig, upsertGitConfig } from './git.js';
11+
import { cloneRepository, fetchRepository, getBranches, getCommitHashForRefName, getLatestCommitTimestamp, getLocalDefaultBranch, getTags, isPathAValidGitRepoRoot, isRepoEmpty, unsetGitConfig, upsertGitConfig, writeCommitGraph } from './git.js';
1212
import { captureEvent } from './posthog.js';
1313
import { PromClient } from './promClient.js';
1414
import { RepoWithConnections, Settings } from "./types.js";
@@ -396,6 +396,19 @@ export class RepoIndexManager {
396396
const fetchDuration_s = durationMs / 1000;
397397

398398
logger.debug(`Fetched ${repo.name} (id: ${repo.id}) in ${fetchDuration_s}s`);
399+
400+
// Update the commit-graph after fetch. Force a full backfill the first time we
401+
// see this repo after the --changed-paths rollout, so historical commits get
402+
// Bloom filters. Subsequent fetches do a cheap incremental write.
403+
const needsBackfill = !metadata.commitGraphChangedPathsBackfilledAt;
404+
if (needsBackfill) {
405+
logger.debug(`Backfilling changed-path Bloom filters for ${repo.name} (id: ${repo.id})...`);
406+
}
407+
await writeCommitGraph({
408+
path: repoPath,
409+
forceBackfill: needsBackfill,
410+
signal,
411+
});
399412
} else if (!isReadOnly) {
400413
logger.debug(`Cloning ${repo.name} (id: ${repo.id})...`);
401414

@@ -411,6 +424,27 @@ export class RepoIndexManager {
411424
const cloneDuration_s = durationMs / 1000;
412425

413426
logger.debug(`Cloned ${repo.name} (id: ${repo.id}) in ${cloneDuration_s}s`);
427+
428+
// Write the commit-graph for the freshly cloned repo.
429+
await writeCommitGraph({
430+
path: repoPath,
431+
signal,
432+
});
433+
}
434+
435+
// Record that this repo's commit-graph now includes changed-path Bloom filters
436+
// for its full history (either freshly written during clone, or backfilled above
437+
// during fetch).
438+
if (!isReadOnly && !metadata.commitGraphChangedPathsBackfilledAt) {
439+
await this.db.repo.update({
440+
where: { id: repo.id },
441+
data: {
442+
metadata: {
443+
...metadata,
444+
commitGraphChangedPathsBackfilledAt: new Date().toISOString(),
445+
} satisfies RepoMetadata,
446+
},
447+
});
414448
}
415449

416450
// Regardless of clone or fetch, always upsert the git config for the repo.

packages/shared/src/types.ts

Lines changed: 8 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -32,6 +32,14 @@ export const repoMetadataSchema = z.object({
3232
*/
3333
indexedRevisions: z.array(z.string()).optional(),
3434

35+
/**
36+
* Timestamp of when changed-path Bloom filters were written into the
37+
* commit-graph for this repo's full history. Undefined means the one-time
38+
* backfill has not yet run, so historical commits still lack Bloom filters.
39+
* @see writeCommitGraph in packages/backend/src/git.ts
40+
*/
41+
commitGraphChangedPathsBackfilledAt: z.string().datetime().optional(),
42+
3543
/**
3644
* Code host specific metadata, keyed by code host type.
3745
*/

packages/web/src/app/(app)/search/components/codePreviewPanel/codePreview.tsx

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -112,7 +112,7 @@ export const CodePreview = ({
112112

113113
return (
114114
<div className="flex flex-col h-full">
115-
<div className="flex flex-row bg-accent items-center justify-between pr-3 py-0.5 mt-7">
115+
<div className="flex flex-row bg-accent items-center justify-between pr-3 py-[3px] mt-7">
116116

117117
{/* Gutter icon */}
118118
<div className="flex flex-row">

0 commit comments

Comments
 (0)