From 8104c79042bc2998e71bfe3f5e6b556fdbac1b7d Mon Sep 17 00:00:00 2001 From: Omar Ramadan Date: Mon, 18 May 2026 04:01:23 +0000 Subject: [PATCH] fix(sync-gbrain): fall through to filesystem walk when source has 0 code pages MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit `/sync-gbrain --full` was a silent no-op on sources that had never been initially synced. The orchestrator's --full path routed to `gbrain reindex-code`, which only re-chunks existing `type='code'` pages — it does NOT walk the filesystem (see reindex-code.ts docstring: "Explicit backfill for v0.19.0 → v0.21.0 brains... walk every page where type='code'"). For a source with zero code pages, reindex-code exits 0 with `codePages: 0` and the orchestrator reports success despite indexing nothing. Side-by-side proof on the same source ID: gbrain sync --strategy code --source --dry-run -> 14,799 files gbrain reindex-code --source --dry-run -> 0 code page(s) Fix: probe the source's existing code page count via `reindex-code --dry-run --json` before deciding the subcommand. If zero pages (or probe fails), fall through to `sync --strategy code` which does the filesystem walk via `performFullSync`. Preserves the v0.21.0 backfill semantics for sources that already have code pages. Repro: 1. Fresh source: `gbrain sources add my-repo --path ~/my-repo --federated` 2. `/sync-gbrain --full` (or invoke orchestrator directly) 3. Before this fix: completes in ~10s, page_count=0 4. After this fix: walks the filesystem as expected Co-Authored-By: Claude Opus 4.7 (1M context) --- bin/gstack-gbrain-sync.ts | 28 ++++++++++++++++++++++++++-- 1 file changed, 26 insertions(+), 2 deletions(-) diff --git a/bin/gstack-gbrain-sync.ts b/bin/gstack-gbrain-sync.ts index 732ee430c4..bf0e19bfba 100644 --- a/bin/gstack-gbrain-sync.ts +++ b/bin/gstack-gbrain-sync.ts @@ -401,8 +401,32 @@ async function runCodeImport(args: CliArgs): Promise { }; } - // Step 2: Run sync or reindex. - const syncArgs = args.mode === "full" + // Step 2: Choose between filesystem walk and existing-page reindex. + // `gbrain reindex-code` only re-chunks existing `type='code'` pages — it + // does NOT walk the filesystem (see reindex-code.ts docstring). For a + // source that has never been initially synced, reindex-code is a silent + // no-op. So for --full we first probe the source's existing code page + // count; if zero, fall back to `sync --strategy code` which walks the + // filesystem via performFullSync. Preserves the v0.21.0 backfill + // semantics for sources that already have code pages. + let useReindex = false; + if (args.mode === "full") { + const probe = spawnSync( + "gbrain", + ["reindex-code", "--source", sourceId, "--dry-run", "--json"], + { encoding: "utf-8", timeout: 30_000, stdio: ["ignore", "pipe", "pipe"] }, + ); + if (probe.status === 0) { + try { + const out = JSON.parse(probe.stdout.trim()); + useReindex = typeof out.codePages === "number" && out.codePages > 0; + } catch { + // Unparseable JSON → fall through to sync (safe default). + } + } + // Probe failure (non-zero exit) → fall through to sync (safe default). + } + const syncArgs = useReindex ? ["reindex-code", "--source", sourceId, "--yes"] : ["sync", "--strategy", "code", "--source", sourceId];