From d55e6ccfa7839e3a02a99374f28521f35ba18706 Mon Sep 17 00:00:00 2001 From: shivasurya Date: Sat, 2 May 2026 12:34:37 -0400 Subject: [PATCH] feat(callgraph): C call graph builder MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Add BuildCCallGraph — a four-pass algorithm that produces a *core.CallGraph for C projects: Pass 1 Index every C function_definition under "::" and ensure the FQN appears in the module registry's FunctionIndex for cross-file resolution. Pass 2 Register explicit return types with the type engine (skipping void) and emit ParameterSymbol entries for every named parameter. Pass 3 Walk the parser-emitted edges (function_definition → call_expression) to extract one CallSiteInternal per call, deterministically and without a second AST traversal. Pass 4 Resolve targets in a definition-preferring order: same-file definition → global definition → same-file declaration → declaration reachable through #include "...". Resolved sites add an edge; unresolved sites are recorded as CallSite{Resolved:false} so external/stdlib calls remain visible to rule writers. The result merges cleanly into a unified graph via the existing MergeCallGraphs since C FQNs ("src/main.c::main") share no namespace with Python, Go, or Java. Co-Authored-By: Claude Sonnet 4.5 --- .../graph/callgraph/builder/c_builder.go | 391 ++++++++++++++++ .../graph/callgraph/builder/c_builder_test.go | 433 ++++++++++++++++++ 2 files changed, 824 insertions(+) create mode 100644 sast-engine/graph/callgraph/builder/c_builder.go create mode 100644 sast-engine/graph/callgraph/builder/c_builder_test.go diff --git a/sast-engine/graph/callgraph/builder/c_builder.go b/sast-engine/graph/callgraph/builder/c_builder.go new file mode 100644 index 00000000..19d51403 --- /dev/null +++ b/sast-engine/graph/callgraph/builder/c_builder.go @@ -0,0 +1,391 @@ +package builder + +import ( + "slices" + + "github.com/shivasurya/code-pathfinder/sast-engine/graph" + "github.com/shivasurya/code-pathfinder/sast-engine/graph/callgraph/core" + "github.com/shivasurya/code-pathfinder/sast-engine/graph/callgraph/resolution" +) + +// languageC is the Node.Language tag emitted by the C parser. Mirrors +// `parser_c.go:languageC`; duplicated here to avoid importing the +// parser package into a builder. +const languageC = "c" + +// metaIsDeclaration is the Node.Metadata key set on function nodes that +// represent declarations (prototypes / extern decls) rather than +// definitions. Mirrors `parser_c.go:metaIsDeclaration`. +const metaIsDeclaration = "is_declaration" + +// fqnSeparator is the C/C++ FQN delimiter — `relpath::funcname`. +const fqnSeparator = "::" + +// declarationConfidenceSource is the TypeInfo.Source used for return +// types lifted from explicit declarations. +const declarationConfidenceSource = "declaration" + +// resolutionFailedExternal is the CallSite.FailureReason used when a +// callee cannot be resolved within the project. C code calling stdlib +// (`printf`, `malloc`) and unknown function pointers both end up here. +const resolutionFailedExternal = "external_or_unresolved" + +// BuildCCallGraph constructs the C call graph using a four-pass +// algorithm. The result is a stand-alone `*core.CallGraph` that can be +// merged into a unified graph via `MergeCallGraphs`. +// +// Passes: +// +// Pass 1 — index every C function_definition under its FQN +// ("relpath::name") and populate registry.FunctionIndex for +// cross-file lookup. Declarations are kept with +// Metadata["is_declaration"]=true so Pass 4 can prefer +// definitions. +// +// Pass 2 — populate return-type and parameter symbol tables from the +// already-parsed AST nodes (no AST walk). The type engine +// records every non-void return type with Confidence=1.0. +// +// Pass 3 — emit one CallSiteInternal per call_expression child of a +// function_definition. Edges from the parser already link +// each call to its enclosing function, so this pass is a +// single deterministic walk over those edges. +// +// Pass 4 — resolve each call site to a concrete FQN using the +// definition-preferring search order documented on +// resolveCCallTarget. Resolved sites add an edge; unresolved +// sites are stored as CallSite{Resolved:false} for +// diagnostics. +// +// Parameters: +// - codeGraph: parsed graph from graph.Initialize. Must be non-nil. +// - registry: C module registry from PR-05. Must be non-nil. +// - typeEngine: C type inference engine from PR-06. Must be non-nil. +// +// Returns a fully-populated *core.CallGraph and a nil error. Errors are +// reserved for future failure modes (e.g. cancelled context); the +// current implementation never returns one. +func BuildCCallGraph( + codeGraph *graph.CodeGraph, + registry *core.CModuleRegistry, + typeEngine *resolution.CTypeInferenceEngine, +) (*core.CallGraph, error) { + callGraph := core.NewCallGraph() + if codeGraph == nil || registry == nil { + return callGraph, nil + } + + indexCFunctions(codeGraph, callGraph, registry) + extractCFunctionTypes(callGraph, typeEngine) + callSites := extractCCallSites(callGraph) + resolveCCallSites(callSites, callGraph, registry) + + return callGraph, nil +} + +// ============================================================================= +// Pass 1 — index functions +// ============================================================================= + +// indexCFunctions records every C function_definition node in +// callGraph.Functions under "relpath::name" and ensures the same FQN +// appears in registry.FunctionIndex (so `BuildCCallGraph` works even +// when the registry was constructed before the parser populated all +// node metadata). +func indexCFunctions(codeGraph *graph.CodeGraph, callGraph *core.CallGraph, registry *core.CModuleRegistry) { + for _, node := range codeGraph.Nodes { + if !isCFunctionNode(node) { + continue + } + prefix, ok := registry.FileToPrefix[node.File] + if !ok { + continue + } + fqn := prefix + fqnSeparator + node.Name + callGraph.Functions[fqn] = node + appendUniqueFQN(registry.FunctionIndex, node.Name, fqn) + } +} + +// isCFunctionNode is true when node is a C function_definition with a +// usable name. Anonymous declarations (rare in practice) are skipped. +func isCFunctionNode(node *graph.Node) bool { + return node != nil && + node.Language == languageC && + node.Type == "function_definition" && + node.Name != "" && + node.File != "" +} + +// appendUniqueFQN adds fqn to index[name] unless it is already present. +func appendUniqueFQN(index map[string][]string, name, fqn string) { + if slices.Contains(index[name], fqn) { + return + } + index[name] = append(index[name], fqn) +} + +// ============================================================================= +// Pass 2 — extract return types and parameter symbols +// ============================================================================= + +// extractCFunctionTypes registers every function's return type with the +// type engine and records each parameter as a typed ParameterSymbol on +// the call graph. Both pieces of metadata are consumed by later passes +// (resolution and rule-engine queries). +func extractCFunctionTypes(callGraph *core.CallGraph, typeEngine *resolution.CTypeInferenceEngine) { + for fqn, node := range callGraph.Functions { + if isDeclaration(node) { + continue + } + if typeEngine != nil { + typeEngine.ExtractReturnType(fqn, node.ReturnType) + } + registerCParameters(callGraph, fqn, node) + } +} + +// registerCParameters writes one ParameterSymbol per declared parameter +// of node into callGraph.Parameters. Parameters with no name (anonymous +// or void) are skipped — they cannot be referenced and would only +// pollute symbol queries. +func registerCParameters(callGraph *core.CallGraph, fqn string, node *graph.Node) { + if node == nil { + return + } + for i, paramName := range node.MethodArgumentsValue { + if paramName == "" { + continue + } + typeAnnotation := "" + if i < len(node.MethodArgumentsType) { + typeAnnotation = node.MethodArgumentsType[i] + } + paramFQN := fqn + "." + paramName + callGraph.Parameters[paramFQN] = &core.ParameterSymbol{ + Name: paramName, + TypeAnnotation: typeAnnotation, + ParentFQN: fqn, + File: node.File, + Line: node.LineNumber, + } + } +} + +// ============================================================================= +// Pass 3 — extract call sites +// ============================================================================= + +// extractCCallSites walks the outgoing edges added by the parser +// (`function_definition → call_expression`) and emits one +// CallSiteInternal per call. The parser already links every call to +// its enclosing function, so this pass is deterministic and avoids a +// second AST traversal. +func extractCCallSites(callGraph *core.CallGraph) []*CallSiteInternal { + sites := make([]*CallSiteInternal, 0) + for callerFQN, fnNode := range callGraph.Functions { + if isDeclaration(fnNode) { + continue + } + for _, edge := range fnNode.OutgoingEdges { + callNode := edge.To + if !isCCallNode(callNode) { + continue + } + sites = append(sites, &CallSiteInternal{ + CallerFQN: callerFQN, + CallerFile: fnNode.File, + CallLine: callNode.LineNumber, + FunctionName: callNode.Name, + ObjectName: stringMetadata(callNode, "receiver"), + Arguments: append([]string(nil), callNode.MethodArgumentsValue...), + }) + } + } + return sites +} + +// isCCallNode is true when node represents a C call_expression with a +// usable target name. +func isCCallNode(node *graph.Node) bool { + return node != nil && + node.Language == languageC && + node.Type == "call_expression" && + node.Name != "" +} + +// stringMetadata returns the string at node.Metadata[key], or "" when +// the key is missing or the value is not a string. +func stringMetadata(node *graph.Node, key string) string { + if node == nil || node.Metadata == nil { + return "" + } + v, ok := node.Metadata[key].(string) + if !ok { + return "" + } + return v +} + +// ============================================================================= +// Pass 4 — resolve call sites +// ============================================================================= + +// resolveCCallSites attempts to resolve each call site to a concrete +// FQN. Resolved sites add a forward edge in the call graph and record a +// CallSite{Resolved:true} entry; unresolved sites are still recorded so +// rule writers and diagnostics can see external/unknown calls. +func resolveCCallSites(sites []*CallSiteInternal, callGraph *core.CallGraph, registry *core.CModuleRegistry) { + for _, cs := range sites { + targetFQN, resolved := resolveCCallTarget(cs, callGraph, registry) + callSite := buildCCallSite(cs, targetFQN, resolved) + callGraph.AddCallSite(cs.CallerFQN, callSite) + if resolved { + callGraph.AddEdge(cs.CallerFQN, targetFQN) + } + } +} + +// resolveCCallTarget implements the definition-preferring resolution +// strategy. The order is intentional and is documented inline so future +// maintainers can adjust it without re-reading the spec: +// +// 1. Same-file lookup — the most common pattern (helper function in +// the same .c file). Always wins because it is deterministic and +// does not depend on include-resolution state. +// 2. Global definition lookup — scan registry.FunctionIndex for an +// FQN that the call graph knows is a definition (not just a +// declaration). This handles cross-file calls into another .c file. +// 3. Same-file declaration — accept a same-file declaration when no +// definition exists project-wide (forward decls). +// 4. Included declaration — fall back to a header declaration +// reachable through `#include "..."`. The edge points at the +// declaration FQN; later phases can still treat it as the entry +// point for a stdlib/third-party call. +// +// Returns ("", false) when no candidate matches. +func resolveCCallTarget( + cs *CallSiteInternal, + callGraph *core.CallGraph, + registry *core.CModuleRegistry, +) (string, bool) { + if cs.FunctionName == "" { + return "", false + } + + if fqn, ok := lookupSameFile(cs.CallerFile, cs.FunctionName, registry, callGraph, true); ok { + return fqn, true + } + if fqn, ok := lookupGlobalDefinition(cs.FunctionName, registry, callGraph); ok { + return fqn, true + } + if fqn, ok := lookupSameFile(cs.CallerFile, cs.FunctionName, registry, callGraph, false); ok { + return fqn, true + } + if fqn, ok := lookupViaIncludes(cs.CallerFile, cs.FunctionName, registry, callGraph); ok { + return fqn, true + } + return "", false +} + +// lookupSameFile returns the FQN of a function named `name` declared in +// callerFile. When definitionsOnly is true, only definitions are +// returned (declarations are skipped); when false, any matching node +// is accepted. +func lookupSameFile( + callerFile, name string, + registry *core.CModuleRegistry, + callGraph *core.CallGraph, + definitionsOnly bool, +) (string, bool) { + prefix, ok := registry.FileToPrefix[callerFile] + if !ok { + return "", false + } + fqn := prefix + fqnSeparator + name + node, ok := callGraph.Functions[fqn] + if !ok { + return "", false + } + if definitionsOnly && isDeclaration(node) { + return "", false + } + return fqn, true +} + +// lookupGlobalDefinition scans every FQN registered for `name` in the +// module registry and returns the first one whose call-graph entry is +// a definition. Order follows registry.FunctionIndex insertion order +// (stable across runs because the registry walks codeGraph.Nodes which +// is append-only during build). +func lookupGlobalDefinition(name string, registry *core.CModuleRegistry, callGraph *core.CallGraph) (string, bool) { + for _, candidate := range registry.FunctionIndex[name] { + node, ok := callGraph.Functions[candidate] + if !ok || isDeclaration(node) { + continue + } + return candidate, true + } + return "", false +} + +// lookupViaIncludes searches the headers transitively included by +// callerFile for a declaration of `name`. Used as a last resort so +// edges to declared-but-undefined functions (e.g. an extern handed off +// to another translation unit) still appear in the graph. +func lookupViaIncludes( + callerFile, name string, + registry *core.CModuleRegistry, + callGraph *core.CallGraph, +) (string, bool) { + callerPrefix, ok := registry.FileToPrefix[callerFile] + if !ok { + return "", false + } + for _, includedRel := range registry.Includes[callerPrefix] { + fqn := includedRel + fqnSeparator + name + if _, exists := callGraph.Functions[fqn]; exists { + return fqn, true + } + } + return "", false +} + +// ============================================================================= +// Helpers +// ============================================================================= + +// isDeclaration reports whether node is a function declaration (no +// body) rather than a definition. Used by Pass 4's resolution order to +// prefer the FQN backed by an actual function body. +func isDeclaration(node *graph.Node) bool { + if node == nil { + return false + } + v, ok := node.Metadata[metaIsDeclaration].(bool) + return ok && v +} + +// buildCCallSite composes a core.CallSite from the internal record and +// the resolution outcome. Tracking unresolved calls (rather than +// dropping them) enables stdlib/third-party rules to inspect external +// invocations. +func buildCCallSite(cs *CallSiteInternal, targetFQN string, resolved bool) core.CallSite { + site := core.CallSite{ + Target: cs.FunctionName, + Location: core.Location{File: cs.CallerFile, Line: int(cs.CallLine)}, + Arguments: buildCallSiteArguments(cs.Arguments), + Resolved: resolved, + } + if resolved { + site.TargetFQN = targetFQN + // Confidence 1.0 because resolution went through the FQN + // registry, not type inference. Source kept consistent with + // the explicit-types convention used by the type engine. + site.TypeConfidence = 1.0 + site.TypeSource = declarationConfidenceSource + } else { + site.FailureReason = resolutionFailedExternal + } + return site +} diff --git a/sast-engine/graph/callgraph/builder/c_builder_test.go b/sast-engine/graph/callgraph/builder/c_builder_test.go new file mode 100644 index 00000000..48d64e0f --- /dev/null +++ b/sast-engine/graph/callgraph/builder/c_builder_test.go @@ -0,0 +1,433 @@ +package builder_test + +import ( + "testing" + + "github.com/shivasurya/code-pathfinder/sast-engine/graph" + "github.com/shivasurya/code-pathfinder/sast-engine/graph/callgraph/builder" + "github.com/shivasurya/code-pathfinder/sast-engine/graph/callgraph/core" + "github.com/shivasurya/code-pathfinder/sast-engine/graph/callgraph/resolution" + "github.com/stretchr/testify/assert" + "github.com/stretchr/testify/require" +) + +// cFixture builds a small parsed CodeGraph + CModuleRegistry to drive +// BuildCCallGraph in unit tests. Tests opt into specific node shapes by +// passing nodes into the helpers; the registry is wired automatically +// from the function set so cross-file resolution works. +type cFixture struct { + root string + cg *graph.CodeGraph + registry *core.CModuleRegistry + functions map[string]*graph.Node +} + +// fixtureRoot is the absolute project root used by every cFixture. +// Tests share it so file paths in assertions stay readable. +const fixtureRoot = "/projects/app" + +func newCFixture(t *testing.T) *cFixture { + t.Helper() + return &cFixture{ + root: fixtureRoot, + cg: graph.NewCodeGraph(), + registry: core.NewCModuleRegistry(fixtureRoot), + functions: make(map[string]*graph.Node), + } +} + +// addFunction registers a function_definition (or declaration) under +// the given absolute file path. relPath is the project-relative form +// the registry would compute. Returns the node so callers can attach +// outgoing call edges. +func (f *cFixture) addFunction(t *testing.T, file, relPath, name, returnType string, isDecl bool) *graph.Node { + t.Helper() + node := &graph.Node{ + ID: "fn:" + relPath + "::" + name, + Type: "function_definition", + Name: name, + File: file, + Language: "c", + ReturnType: returnType, + LineNumber: 1, + } + if isDecl { + node.Metadata = map[string]any{"is_declaration": true} + } + f.cg.AddNode(node) + f.registry.FileToPrefix[file] = relPath + fqn := relPath + "::" + name + f.functions[fqn] = node + f.registry.FunctionIndex[name] = append(f.registry.FunctionIndex[name], fqn) + return node +} + +// addCall attaches a call_expression node to caller, mimicking the +// edge the parser emits during AST traversal. +func (f *cFixture) addCall(t *testing.T, caller *graph.Node, target string, args []string) { + t.Helper() + call := &graph.Node{ + ID: "call:" + caller.ID + "->" + target, + Type: "call_expression", + Name: target, + File: caller.File, + Language: "c", + LineNumber: caller.LineNumber + 1, + MethodArgumentsValue: args, + } + f.cg.AddNode(call) + f.cg.AddEdge(caller, call) +} + +func (f *cFixture) build(t *testing.T) (*core.CallGraph, *resolution.CTypeInferenceEngine) { + t.Helper() + engine := resolution.NewCTypeInferenceEngine(f.registry) + cg, err := builder.BuildCCallGraph(f.cg, f.registry, engine) + require.NoError(t, err) + require.NotNil(t, cg) + return cg, engine +} + +// TestBuildCCallGraph_NilInputs verifies the builder degrades gracefully +// when given a nil CodeGraph or registry — useful for callers that +// guard upstream errors with optional chaining. +func TestBuildCCallGraph_NilInputs(t *testing.T) { + cg, err := builder.BuildCCallGraph(nil, nil, nil) + require.NoError(t, err) + require.NotNil(t, cg) + assert.Empty(t, cg.Functions) + assert.Empty(t, cg.Edges) +} + +// TestBuildCCallGraph_SingleFile_BasicEdge covers the simplest call +// graph: main() calls add(); both are in the same .c file. The edge +// must resolve to the same-file FQN. +func TestBuildCCallGraph_SingleFile_BasicEdge(t *testing.T) { + root := fixtureRoot + mainC := root + "/src/main.c" + + f := newCFixture(t) + mainFn := f.addFunction(t, mainC, "src/main.c", "main", "int", false) + f.addFunction(t, mainC, "src/main.c", "add", "int", false) + f.addCall(t, mainFn, "add", []string{"1", "2"}) + + cg, _ := f.build(t) + + assert.Contains(t, cg.Functions, "src/main.c::main") + assert.Contains(t, cg.Functions, "src/main.c::add") + assert.Equal(t, []string{"src/main.c::add"}, cg.Edges["src/main.c::main"]) + assert.Equal(t, []string{"src/main.c::main"}, cg.ReverseEdges["src/main.c::add"]) + + sites := cg.CallSites["src/main.c::main"] + require.Len(t, sites, 1) + assert.True(t, sites[0].Resolved) + assert.Equal(t, "src/main.c::add", sites[0].TargetFQN) + assert.Equal(t, "add", sites[0].Target) + assert.InDelta(t, 1.0, sites[0].TypeConfidence, 1e-6) + assert.Equal(t, "declaration", sites[0].TypeSource) + assert.Empty(t, sites[0].FailureReason) + require.Len(t, sites[0].Arguments, 2) + assert.Equal(t, "1", sites[0].Arguments[0].Value) + assert.False(t, sites[0].Arguments[0].IsVariable, "numeric literal must not be flagged as a variable") +} + +// TestBuildCCallGraph_PrefersDefinitionOverDeclaration covers the +// definition-preferring resolution order: when the same function name +// appears as both a header declaration and a source-file definition, +// the edge must point at the definition. +func TestBuildCCallGraph_PrefersDefinitionOverDeclaration(t *testing.T) { + root := fixtureRoot + mainC := root + "/src/main.c" + utilsH := root + "/include/utils.h" + utilsC := root + "/src/utils.c" + + f := newCFixture(t) + mainFn := f.addFunction(t, mainC, "src/main.c", "main", "int", false) + // Header declaration (no body). + f.addFunction(t, utilsH, "include/utils.h", "create_buffer", "Buffer*", true) + // Source definition (with body). + f.addFunction(t, utilsC, "src/utils.c", "create_buffer", "Buffer*", false) + + f.addCall(t, mainFn, "create_buffer", nil) + + cg, _ := f.build(t) + + require.Len(t, cg.Edges["src/main.c::main"], 1) + assert.Equal(t, "src/utils.c::create_buffer", cg.Edges["src/main.c::main"][0], + "resolver must prefer the .c definition over the .h declaration") +} + +// TestBuildCCallGraph_DeclarationFallbackThroughIncludes confirms that +// when no project-wide definition exists, the resolver falls back to a +// declaration reachable through #include "...". +func TestBuildCCallGraph_DeclarationFallbackThroughIncludes(t *testing.T) { + root := fixtureRoot + mainC := root + "/src/main.c" + apiH := root + "/include/api.h" + + f := newCFixture(t) + mainFn := f.addFunction(t, mainC, "src/main.c", "main", "int", false) + f.addFunction(t, apiH, "include/api.h", "external_op", "int", true) + f.registry.Includes["src/main.c"] = []string{"include/api.h"} + + f.addCall(t, mainFn, "external_op", nil) + + cg, _ := f.build(t) + + require.Len(t, cg.Edges["src/main.c::main"], 1) + assert.Equal(t, "include/api.h::external_op", cg.Edges["src/main.c::main"][0]) + sites := cg.CallSites["src/main.c::main"] + require.Len(t, sites, 1) + assert.True(t, sites[0].Resolved) +} + +// TestBuildCCallGraph_StdlibCallUnresolved verifies that calls to +// functions not present in the registry (e.g. printf, malloc) are +// recorded as Resolved:false with a failure reason — they must not +// contribute an edge but must remain visible to rule writers. +func TestBuildCCallGraph_StdlibCallUnresolved(t *testing.T) { + root := fixtureRoot + mainC := root + "/src/main.c" + + f := newCFixture(t) + mainFn := f.addFunction(t, mainC, "src/main.c", "main", "int", false) + f.addCall(t, mainFn, "printf", []string{"\"hello\""}) + + cg, _ := f.build(t) + + assert.Empty(t, cg.Edges["src/main.c::main"], "external call must not produce an edge") + sites := cg.CallSites["src/main.c::main"] + require.Len(t, sites, 1) + assert.False(t, sites[0].Resolved) + assert.Equal(t, "printf", sites[0].Target) + assert.Empty(t, sites[0].TargetFQN) + assert.NotEmpty(t, sites[0].FailureReason, "unresolved sites must record a failure reason") +} + +// TestBuildCCallGraph_RecursiveCall verifies that a function calling +// itself produces a self-edge (caller == callee FQN). +func TestBuildCCallGraph_RecursiveCall(t *testing.T) { + root := fixtureRoot + mainC := root + "/src/util.c" + + f := newCFixture(t) + process := f.addFunction(t, mainC, "src/util.c", "process", "void", false) + f.addCall(t, process, "process", nil) + + cg, _ := f.build(t) + + assert.Equal(t, []string{"src/util.c::process"}, cg.Edges["src/util.c::process"]) + assert.Equal(t, []string{"src/util.c::process"}, cg.ReverseEdges["src/util.c::process"]) +} + +// TestBuildCCallGraph_StaticAndOtherFileSameName covers two functions +// with the same bare name in different files (e.g. file-scope statics). +// The same-file caller must bind to the local definition; the other +// file's definition must remain reachable via global lookup. +func TestBuildCCallGraph_StaticAndOtherFileSameName(t *testing.T) { + root := fixtureRoot + aC := root + "/src/a.c" + bC := root + "/src/b.c" + + f := newCFixture(t) + aMain := f.addFunction(t, aC, "src/a.c", "main", "int", false) + bMain := f.addFunction(t, bC, "src/b.c", "main", "int", false) + f.addFunction(t, aC, "src/a.c", "init", "void", false) + f.addFunction(t, bC, "src/b.c", "init", "void", false) + + f.addCall(t, aMain, "init", nil) + f.addCall(t, bMain, "init", nil) + + cg, _ := f.build(t) + + assert.Equal(t, []string{"src/a.c::init"}, cg.Edges["src/a.c::main"]) + assert.Equal(t, []string{"src/b.c::init"}, cg.Edges["src/b.c::main"]) +} + +// TestBuildCCallGraph_TypeEnginePopulated verifies Pass 2 populates the +// type engine with explicit return types and skips void. +func TestBuildCCallGraph_TypeEnginePopulated(t *testing.T) { + root := fixtureRoot + mainC := root + "/src/main.c" + + f := newCFixture(t) + f.addFunction(t, mainC, "src/main.c", "main", "int", false) + f.addFunction(t, mainC, "src/main.c", "do_nothing", "void", false) + + cg, engine := f.build(t) + + got := engine.GetReturnType("src/main.c::main") + require.NotNil(t, got) + assert.Equal(t, "int", got.TypeFQN) + assert.InDelta(t, 1.0, got.Confidence, 1e-6) + assert.Equal(t, "declaration", got.Source) + + assert.Nil(t, engine.GetReturnType("src/main.c::do_nothing"), "void must not be stored") + _ = cg +} + +// TestBuildCCallGraph_ParameterSymbols verifies Pass 2 records every +// named parameter as a ParameterSymbol with its declared type. +func TestBuildCCallGraph_ParameterSymbols(t *testing.T) { + root := fixtureRoot + mainC := root + "/src/main.c" + + f := newCFixture(t) + fn := f.addFunction(t, mainC, "src/main.c", "handle", "void", false) + fn.MethodArgumentsType = []string{"int", "const char*", "Request*"} + fn.MethodArgumentsValue = []string{"id", "name", ""} // anonymous third param dropped + + cg, _ := f.build(t) + + param := cg.Parameters["src/main.c::handle.id"] + require.NotNil(t, param) + assert.Equal(t, "id", param.Name) + assert.Equal(t, "int", param.TypeAnnotation) + assert.Equal(t, "src/main.c::handle", param.ParentFQN) + assert.Equal(t, mainC, param.File) + + name := cg.Parameters["src/main.c::handle.name"] + require.NotNil(t, name) + assert.Equal(t, "const char*", name.TypeAnnotation) + + assert.NotContains(t, cg.Parameters, "src/main.c::handle.") +} + +// TestBuildCCallGraph_DeclarationsSkippedFromTypePass verifies that +// declarations (no body) do not register return types — only definitions +// contribute to the type engine. +func TestBuildCCallGraph_DeclarationsSkippedFromTypePass(t *testing.T) { + root := fixtureRoot + utilsH := root + "/include/utils.h" + + f := newCFixture(t) + f.addFunction(t, utilsH, "include/utils.h", "compute", "int", true) + + _, engine := f.build(t) + + assert.Nil(t, engine.GetReturnType("include/utils.h::compute"), + "declarations must not pollute the return-type table") +} + +// TestBuildCCallGraph_MergeIntoUnifiedGraph confirms a built C call +// graph merges cleanly into an empty destination (the Python/Go entry +// point) with no key collisions. +func TestBuildCCallGraph_MergeIntoUnifiedGraph(t *testing.T) { + root := fixtureRoot + mainC := root + "/src/main.c" + + f := newCFixture(t) + mainFn := f.addFunction(t, mainC, "src/main.c", "main", "int", false) + f.addFunction(t, mainC, "src/main.c", "init", "void", false) + f.addCall(t, mainFn, "init", nil) + + src, _ := f.build(t) + dst := core.NewCallGraph() + builder.MergeCallGraphs(dst, src) + + assert.Contains(t, dst.Functions, "src/main.c::main") + assert.Contains(t, dst.Functions, "src/main.c::init") + assert.Equal(t, []string{"src/main.c::init"}, dst.Edges["src/main.c::main"]) + assert.Len(t, dst.CallSites["src/main.c::main"], 1) +} + +// TestBuildCCallGraph_IgnoresNonCNodes guards the language filter: +// Python/Go function nodes must not enter the C call graph even when +// their file is registered (e.g. mixed-language project). +func TestBuildCCallGraph_IgnoresNonCNodes(t *testing.T) { + root := fixtureRoot + cFile := root + "/src/main.c" + pyFile := root + "/lib/x.py" + + f := newCFixture(t) + f.addFunction(t, cFile, "src/main.c", "main", "int", false) + + // Manually inject a Python function — addFunction would tag it as C. + f.cg.AddNode(&graph.Node{ + Type: "function_definition", + Name: "py_fn", + File: pyFile, + Language: "python", + }) + + cg, _ := f.build(t) + assert.Contains(t, cg.Functions, "src/main.c::main") + assert.NotContains(t, cg.Functions, "lib/x.py::py_fn") + assert.NotContains(t, cg.Functions, "::py_fn") +} + +// TestBuildCCallGraph_AnonymousAndMissingFiltered ensures functions +// without a name or file are skipped entirely (defensive — the parser +// should never emit them, but the builder must not panic). +func TestBuildCCallGraph_AnonymousAndMissingFiltered(t *testing.T) { + cg := graph.NewCodeGraph() + cg.AddNode(&graph.Node{Type: "function_definition", Name: "", File: "/x.c", Language: "c"}) + cg.AddNode(&graph.Node{Type: "function_definition", Name: "fn", File: "", Language: "c"}) + + registry := core.NewCModuleRegistry("/x") + got, err := builder.BuildCCallGraph(cg, registry, resolution.NewCTypeInferenceEngine(registry)) + require.NoError(t, err) + assert.Empty(t, got.Functions) + assert.Empty(t, got.Edges) +} + +// TestBuildCCallGraph_EmptyTargetCallSkipped covers the defensive +// branch in resolveCCallTarget: a call_expression with no target name +// (parser bug or pathological input) is recorded as an unresolved +// call site rather than ignored entirely. +func TestBuildCCallGraph_EmptyTargetCallSkipped(t *testing.T) { + root := fixtureRoot + mainC := root + "/src/main.c" + + f := newCFixture(t) + mainFn := f.addFunction(t, mainC, "src/main.c", "main", "int", false) + + // Direct injection: addCall would set Name; here we want it empty. + bad := &graph.Node{ + ID: "bad-call", + Type: "call_expression", + Name: "", + File: mainC, + Language: "c", + } + f.cg.AddNode(bad) + f.cg.AddEdge(mainFn, bad) + + cg, _ := f.build(t) + assert.Empty(t, cg.Edges["src/main.c::main"], "anonymous calls produce no edge") + assert.Empty(t, cg.CallSites["src/main.c::main"], "anonymous calls are not recorded as call sites") +} + +// TestBuildCCallGraph_GlobalDefinitionFromOtherFileWithoutInclude +// covers Source 2: a function defined in another .c file but called +// without an include directive present. The global FunctionIndex +// search must still find it. +func TestBuildCCallGraph_GlobalDefinitionFromOtherFileWithoutInclude(t *testing.T) { + root := fixtureRoot + aC := root + "/src/a.c" + bC := root + "/src/b.c" + + f := newCFixture(t) + caller := f.addFunction(t, aC, "src/a.c", "main", "int", false) + f.addFunction(t, bC, "src/b.c", "helper", "void", false) + f.addCall(t, caller, "helper", nil) + + cg, _ := f.build(t) + assert.Equal(t, []string{"src/b.c::helper"}, cg.Edges["src/a.c::main"]) +} + +// TestBuildCCallGraph_SameFileDeclarationAcceptedWhenNoDefinition +// verifies the third resolution step: when no definition exists +// project-wide, a same-file declaration is accepted. +func TestBuildCCallGraph_SameFileDeclarationAcceptedWhenNoDefinition(t *testing.T) { + root := fixtureRoot + mainC := root + "/src/main.c" + + f := newCFixture(t) + caller := f.addFunction(t, mainC, "src/main.c", "main", "int", false) + f.addFunction(t, mainC, "src/main.c", "extern_op", "int", true) // forward decl + f.addCall(t, caller, "extern_op", nil) + + cg, _ := f.build(t) + assert.Equal(t, []string{"src/main.c::extern_op"}, cg.Edges["src/main.c::main"]) +}