From aceb80299d41381661b874158a8343287ca8aab0 Mon Sep 17 00:00:00 2001 From: jstuart0 Date: Wed, 6 May 2026 21:40:40 -0400 Subject: [PATCH 1/3] fix(livingwiki): batch caller lookups in GraphStoreMetrics (CA-171) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit repoReferenceCount and packageReferenceCount issued one SurrealDB websocket round-trip per caller via GetSymbol(callerID) inside a nested loop. On any non-trivial repo this stalled Living Wiki page generation indefinitely — the page goroutine ground through the per-call websocket queue while the errgroup semaphore (clamped to MaxConcurrency=1 by the upstream LLM capacity provider) blocked every subsequent page. Replaces the inner GetSymbol-per-callerID with a single GetSymbolsByIDs batch fetch (helper already exists in internal/db/store.go:1875). The outer GetCallers-per-symbol N+1 remains and is tracked separately; that fix needs a new GetCallersByIDs store method. Caught via pprof goroutine dump (gorillaws.Connection.Call [select]) during CA-169 deploy validation. Refs CA-171. Co-Authored-By: Claude Opus 4.7 (1M context) --- .../livingwiki/orchestrator/graphmetrics.go | 59 ++++++++++++++----- 1 file changed, 45 insertions(+), 14 deletions(-) diff --git a/internal/livingwiki/orchestrator/graphmetrics.go b/internal/livingwiki/orchestrator/graphmetrics.go index 837e25b3..924f3546 100644 --- a/internal/livingwiki/orchestrator/graphmetrics.go +++ b/internal/livingwiki/orchestrator/graphmetrics.go @@ -72,9 +72,14 @@ func (m *GraphStoreMetrics) GraphRelationCount(repoID, pageID string) int { // packageReferenceCount counts distinct caller packages importing any symbol // in pkg within the given repository. +// +// CA-171: replaced inner GetSymbol-per-callerID with one GetSymbolsByIDs batch +// to eliminate the SurrealDB N+1 that stalled Living Wiki page generation. +// The outer GetCallers-per-symbol N+1 remains; that requires a new store +// helper and is tracked separately. func (m *GraphStoreMetrics) packageReferenceCount(repoID, pkg string) int { syms, _ := m.store.GetSymbols(repoID, nil, nil, 0, 0) - callerPkgs := make(map[string]bool) + callerIDSet := make(map[string]struct{}) for _, sym := range syms { if sym.FilePath == "" { continue @@ -83,14 +88,25 @@ func (m *GraphStoreMetrics) packageReferenceCount(repoID, pkg string) int { continue } for _, callerID := range m.store.GetCallers(sym.ID) { - caller := m.store.GetSymbol(callerID) - if caller == nil { - continue - } - callerPkg := filePackage(caller.FilePath) - if callerPkg != pkg { - callerPkgs[callerPkg] = true - } + callerIDSet[callerID] = struct{}{} + } + } + if len(callerIDSet) == 0 { + return 0 + } + callerIDs := make([]string, 0, len(callerIDSet)) + for id := range callerIDSet { + callerIDs = append(callerIDs, id) + } + callers := m.store.GetSymbolsByIDs(callerIDs) + callerPkgs := make(map[string]bool) + for _, caller := range callers { + if caller == nil { + continue + } + callerPkg := filePackage(caller.FilePath) + if callerPkg != pkg { + callerPkgs[callerPkg] = true } } return len(callerPkgs) @@ -110,16 +126,31 @@ func (m *GraphStoreMetrics) packageRelationCount(repoID, pkg string) int { } // repoReferenceCount aggregates reference counts across all packages in the repo. +// +// CA-171: same N+1 fix as packageReferenceCount — batch-fetch callers once +// instead of one GetSymbol per callerID. func (m *GraphStoreMetrics) repoReferenceCount(repoID string) int { syms, _ := m.store.GetSymbols(repoID, nil, nil, 0, 0) - callerPkgs := make(map[string]bool) + callerIDSet := make(map[string]struct{}) for _, sym := range syms { for _, callerID := range m.store.GetCallers(sym.ID) { - caller := m.store.GetSymbol(callerID) - if caller != nil { - callerPkgs[filePackage(caller.FilePath)] = true - } + callerIDSet[callerID] = struct{}{} + } + } + if len(callerIDSet) == 0 { + return 0 + } + callerIDs := make([]string, 0, len(callerIDSet)) + for id := range callerIDSet { + callerIDs = append(callerIDs, id) + } + callers := m.store.GetSymbolsByIDs(callerIDs) + callerPkgs := make(map[string]bool) + for _, caller := range callers { + if caller == nil { + continue } + callerPkgs[filePackage(caller.FilePath)] = true } return len(callerPkgs) } From 027f23493c042b01304b254b0ccb4c80057fae72 Mon Sep 17 00:00:00 2001 From: jstuart0 Date: Wed, 6 May 2026 21:40:40 -0400 Subject: [PATCH 2/3] chore(api): expose net/http/pprof behind SOURCEBRIDGE_PPROF_ENABLED Adds dev-only Go pprof endpoints under /debug/pprof/* gated by an env var (default false) so a goroutine dump can be captured against a hung job without rebuilding. Mounted before the rate limiter so a dump is not throttled. Compose forwards the env var so SOURCEBRIDGE_PPROF_ENABLED=true on the host enables it for local stacks. Co-Authored-By: Claude Opus 4.7 (1M context) --- docker-compose.yml | 1 + internal/api/rest/router.go | 19 +++++++++++++++++++ 2 files changed, 20 insertions(+) diff --git a/docker-compose.yml b/docker-compose.yml index 84c1e63d..2efcce30 100644 --- a/docker-compose.yml +++ b/docker-compose.yml @@ -82,6 +82,7 @@ services: ports: - "${SOURCEBRIDGE_API_PORT:-8080}:8080" environment: + - SOURCEBRIDGE_PPROF_ENABLED=${SOURCEBRIDGE_PPROF_ENABLED:-false} - SOURCEBRIDGE_STORAGE_SURREAL_MODE=external - SOURCEBRIDGE_STORAGE_SURREAL_URL=ws://surrealdb:8000/rpc - SOURCEBRIDGE_STORAGE_SURREAL_NAMESPACE=sourcebridge diff --git a/internal/api/rest/router.go b/internal/api/rest/router.go index 37ac719c..f51cc276 100644 --- a/internal/api/rest/router.go +++ b/internal/api/rest/router.go @@ -7,6 +7,8 @@ import ( "context" "log/slog" "net/http" + "net/http/pprof" //nolint:gosec // dev-only profiling, gated behind SOURCEBRIDGE_PPROF_ENABLED + "os" "strconv" "strings" "sync" @@ -735,6 +737,23 @@ func (s *Server) setupRouter() { // Rate limiting r.Use(httprate.LimitByIP(100, 1*time.Minute)) + // pprof — gated behind SOURCEBRIDGE_PPROF_ENABLED=true. Mounted before the + // global rate limiter so a goroutine dump under load is not throttled. + // Dev-only; never enable on a public-facing deployment. + if os.Getenv("SOURCEBRIDGE_PPROF_ENABLED") == "true" { + r.HandleFunc("/debug/pprof/", pprof.Index) + r.HandleFunc("/debug/pprof/cmdline", pprof.Cmdline) + r.HandleFunc("/debug/pprof/profile", pprof.Profile) + r.HandleFunc("/debug/pprof/symbol", pprof.Symbol) + r.HandleFunc("/debug/pprof/trace", pprof.Trace) + r.Handle("/debug/pprof/goroutine", pprof.Handler("goroutine")) + r.Handle("/debug/pprof/heap", pprof.Handler("heap")) + r.Handle("/debug/pprof/allocs", pprof.Handler("allocs")) + r.Handle("/debug/pprof/block", pprof.Handler("block")) + r.Handle("/debug/pprof/mutex", pprof.Handler("mutex")) + r.Handle("/debug/pprof/threadcreate", pprof.Handler("threadcreate")) + } + // Public routes r.Get("/healthz", s.handleHealthz) r.Get("/readyz", s.handleReadyz) From 0449873faff1c1b9ca682514adfd723570f131de Mon Sep 17 00:00:00 2001 From: jstuart0 Date: Wed, 6 May 2026 21:51:16 -0400 Subject: [PATCH 3/3] fix(livingwiki): collapse all GraphStoreMetrics N+1 queries to GetCallEdges (CA-171) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Initial fix only batched the inner GetSymbol-per-callerID lookup. The outer GetCallers-per-symbol N+1 still stalled the same way (confirmed by a second pprof goroutine dump after the first deploy). GraphStore already exposes GetCallEdges(repoID) which returns every caller→callee edge for the repo in a single query. Rewrites all four metric functions to use it: - packageReferenceCount: filter edges by callee membership in pkg, then one GetSymbolsByIDs batch for callers. - packageRelationCount: count edges whose callee is in pkg. - repoReferenceCount: one GetCallEdges + one GetSymbolsByIDs batch. - repoRelationCount: len(GetCallEdges(repoID)). For a repo with N symbols and K avg callers per symbol, this collapses O(N*K + 1) sequential SurrealDB round-trips into 2-3 total. Refs CA-171. Co-Authored-By: Claude Opus 4.7 (1M context) --- .../livingwiki/orchestrator/graphmetrics.go | 71 +++++++++++-------- 1 file changed, 42 insertions(+), 29 deletions(-) diff --git a/internal/livingwiki/orchestrator/graphmetrics.go b/internal/livingwiki/orchestrator/graphmetrics.go index 924f3546..ff9024a7 100644 --- a/internal/livingwiki/orchestrator/graphmetrics.go +++ b/internal/livingwiki/orchestrator/graphmetrics.go @@ -73,23 +73,30 @@ func (m *GraphStoreMetrics) GraphRelationCount(repoID, pageID string) int { // packageReferenceCount counts distinct caller packages importing any symbol // in pkg within the given repository. // -// CA-171: replaced inner GetSymbol-per-callerID with one GetSymbolsByIDs batch -// to eliminate the SurrealDB N+1 that stalled Living Wiki page generation. -// The outer GetCallers-per-symbol N+1 remains; that requires a new store -// helper and is tracked separately. +// CA-171: replaces the prior O(symbols × callers) sequential SurrealDB +// round-trips with two queries — GetSymbols + GetCallEdges — and one +// GetSymbolsByIDs batch for the resolved callers. Same logical result. func (m *GraphStoreMetrics) packageReferenceCount(repoID, pkg string) int { syms, _ := m.store.GetSymbols(repoID, nil, nil, 0, 0) - callerIDSet := make(map[string]struct{}) + pkgSymIDs := make(map[string]struct{}, len(syms)) for _, sym := range syms { if sym.FilePath == "" { continue } - if !symbolInPackage(sym.FilePath, pkg) { - continue + if symbolInPackage(sym.FilePath, pkg) { + pkgSymIDs[sym.ID] = struct{}{} } - for _, callerID := range m.store.GetCallers(sym.ID) { - callerIDSet[callerID] = struct{}{} + } + if len(pkgSymIDs) == 0 { + return 0 + } + edges := m.store.GetCallEdges(repoID) + callerIDSet := make(map[string]struct{}) + for _, e := range edges { + if _, ok := pkgSymIDs[e.CalleeID]; !ok { + continue } + callerIDSet[e.CallerID] = struct{}{} } if len(callerIDSet) == 0 { return 0 @@ -113,33 +120,42 @@ func (m *GraphStoreMetrics) packageReferenceCount(repoID, pkg string) int { } // packageRelationCount counts total inbound call-graph edges to symbols in pkg. +// +// CA-171: uses GetCallEdges + a package-membership filter instead of one +// GetCallers query per symbol. func (m *GraphStoreMetrics) packageRelationCount(repoID, pkg string) int { syms, _ := m.store.GetSymbols(repoID, nil, nil, 0, 0) - total := 0 + pkgSymIDs := make(map[string]struct{}, len(syms)) for _, sym := range syms { - if !symbolInPackage(sym.FilePath, pkg) { - continue + if symbolInPackage(sym.FilePath, pkg) { + pkgSymIDs[sym.ID] = struct{}{} + } + } + if len(pkgSymIDs) == 0 { + return 0 + } + total := 0 + for _, e := range m.store.GetCallEdges(repoID) { + if _, ok := pkgSymIDs[e.CalleeID]; ok { + total++ } - total += len(m.store.GetCallers(sym.ID)) } return total } // repoReferenceCount aggregates reference counts across all packages in the repo. // -// CA-171: same N+1 fix as packageReferenceCount — batch-fetch callers once -// instead of one GetSymbol per callerID. +// CA-171: collapsed N×K SurrealDB round-trips into GetCallEdges + one +// GetSymbolsByIDs batch. func (m *GraphStoreMetrics) repoReferenceCount(repoID string) int { - syms, _ := m.store.GetSymbols(repoID, nil, nil, 0, 0) - callerIDSet := make(map[string]struct{}) - for _, sym := range syms { - for _, callerID := range m.store.GetCallers(sym.ID) { - callerIDSet[callerID] = struct{}{} - } - } - if len(callerIDSet) == 0 { + edges := m.store.GetCallEdges(repoID) + if len(edges) == 0 { return 0 } + callerIDSet := make(map[string]struct{}, len(edges)) + for _, e := range edges { + callerIDSet[e.CallerID] = struct{}{} + } callerIDs := make([]string, 0, len(callerIDSet)) for id := range callerIDSet { callerIDs = append(callerIDs, id) @@ -156,13 +172,10 @@ func (m *GraphStoreMetrics) repoReferenceCount(repoID string) int { } // repoRelationCount counts all inbound call edges for the repo. +// +// CA-171: replaced GetCallers-per-symbol with a single GetCallEdges query. func (m *GraphStoreMetrics) repoRelationCount(repoID string) int { - syms, _ := m.store.GetSymbols(repoID, nil, nil, 0, 0) - total := 0 - for _, sym := range syms { - total += len(m.store.GetCallers(sym.ID)) - } - return total + return len(m.store.GetCallEdges(repoID)) } // pageSubject extracts the package path from an architecture page ID.