diff --git a/sast-engine/graph/callgraph/builder/c_builder.go b/sast-engine/graph/callgraph/builder/c_builder.go index 1cf58a8c..02970623 100644 --- a/sast-engine/graph/callgraph/builder/c_builder.go +++ b/sast-engine/graph/callgraph/builder/c_builder.go @@ -304,18 +304,45 @@ func resolveCCallTarget( return "", false, nil } -// lookupCStdlib walks the caller file's `#include <...>` list and asks the -// stdlib registry for a function with the requested name in each header. -// First match wins; ties are unlikely (stdlib symbols are uniquely owned by -// one header) but if they do happen, the include order in source decides. +// lookupCStdlib resolves a free function against the C stdlib registry. +// +// The lookup runs in two stages: +// +// 1. Direct system includes — walk the caller file's own `#include <…>` +// list. This is the fast path and covers the case where the calling +// file directly pulls in the header that owns the symbol. +// 2. Manifest-wide fallback — when the direct walk doesn't yield a hit, +// scan every header in the manifest. Real-world C codebases routinely +// route stdlib pulls through a project-internal "common.h" header +// (redis does this with `server.h`), so without this fallback every +// `.c` file that doesn't redundantly `#include ` itself +// would lose strlen / strcmp / memcpy / etc. +// +// First hit wins. Stdlib symbols are uniquely owned by one header in the +// real world, so first-hit-wins is effectively the same as best-match. +// The fallback is bounded to the manifest's loaded headers — symbols not +// in the manifest can never resolve, so there's no risk of binding a +// project-internal symbol to a stdlib FQN. func lookupCStdlib(callerFile, funcName string, registry *core.CModuleRegistry) (string, *core.CStdlibFunction) { - prefix, ok := registry.FileToPrefix[callerFile] - if !ok { + loader := registry.StdlibRegistry + if loader == nil { return "", nil } - for _, header := range registry.SystemIncludes[prefix] { - fn, err := registry.StdlibRegistry.GetFunction(header, funcName) - if err == nil && fn != nil { + + // Stage 1: direct system includes from the caller file. + if prefix, ok := registry.FileToPrefix[callerFile]; ok { + for _, header := range registry.SystemIncludes[prefix] { + if fn, err := loader.GetFunction(header, funcName); err == nil && fn != nil { + return fn.FQN, fn + } + } + } + + // Stage 2: transitive-include fallback. Walk every manifest header. + // O(headers) per unresolved call — ~1900 headers on Linux glibc; + // bounded and predictable. Mirrors the PR-04 C++ resolver fallback. + for _, header := range loader.ListHeaders() { + if fn, err := loader.GetFunction(header, funcName); err == nil && fn != nil { return fn.FQN, fn } } diff --git a/sast-engine/graph/callgraph/builder/c_builder_stdlib_test.go b/sast-engine/graph/callgraph/builder/c_builder_stdlib_test.go index 59747d0b..3f19007a 100644 --- a/sast-engine/graph/callgraph/builder/c_builder_stdlib_test.go +++ b/sast-engine/graph/callgraph/builder/c_builder_stdlib_test.go @@ -143,10 +143,13 @@ func TestBuildCCallGraph_StdlibFallback_NotConsultedWhenProjectDefinitionExists( assert.Empty(t, sites[0].SecurityTag, "project resolution must not pick up stdlib SecurityTag") } -// TestBuildCCallGraph_StdlibFallback_NoIncludesLeavesUnresolved verifies -// that a call to an unknown function with no matching system include -// stays unresolved — the registry must not return arbitrary symbols. -func TestBuildCCallGraph_StdlibFallback_NoIncludesLeavesUnresolved(t *testing.T) { +// TestBuildCCallGraph_StdlibFallback_TransitiveIncludeResolves verifies +// the PR-05 transitive-include fallback. Real C codebases route stdlib +// pulls through a project-internal "common.h" header, so files that +// call printf without a direct `#include ` still need to +// resolve. The resolver scans every manifest header when the file's +// direct system-include list doesn't yield a hit. +func TestBuildCCallGraph_StdlibFallback_TransitiveIncludeResolves(t *testing.T) { root := fixtureRoot mainC := root + "/src/main.c" @@ -154,7 +157,9 @@ func TestBuildCCallGraph_StdlibFallback_NoIncludesLeavesUnresolved(t *testing.T) mainFn := f.addFunction(t, mainC, "src/main.c", "main", "int", false) f.addCall(t, mainFn, "printf", nil) - // No SystemIncludes entry for this file → stdlib lookup is a no-op. + // Notice: no SystemIncludes entry for src/main.c. Pre-PR-05 this + // stayed unresolved; with the manifest-wide fallback the resolver + // finds printf via the (only) loaded header. f.registry.StdlibRegistry = newFakeCStdlibLoader(map[string]map[string]*core.CStdlibFunction{ "stdio.h": {"printf": {FQN: "c::stdio::printf", ReturnType: "int"}}, }) @@ -163,5 +168,29 @@ func TestBuildCCallGraph_StdlibFallback_NoIncludesLeavesUnresolved(t *testing.T) sites := cg.CallSites["src/main.c::main"] require.Len(t, sites, 1) - assert.False(t, sites[0].Resolved, "no matching include => stdlib must not be consulted") + assert.True(t, sites[0].Resolved, "transitive fallback must resolve printf") + assert.Equal(t, "c::stdio::printf", sites[0].TargetFQN) +} + +// TestBuildCCallGraph_StdlibFallback_UnknownSymbolStaysUnresolved is the +// safety guard: a call whose name doesn't appear anywhere in the +// manifest must not bind to an arbitrary entry. Bounds the fallback to +// "names actually in the manifest". +func TestBuildCCallGraph_StdlibFallback_UnknownSymbolStaysUnresolved(t *testing.T) { + root := fixtureRoot + mainC := root + "/src/main.c" + + f := newCFixture(t) + mainFn := f.addFunction(t, mainC, "src/main.c", "main", "int", false) + f.addCall(t, mainFn, "totally_made_up_function", nil) + + f.registry.StdlibRegistry = newFakeCStdlibLoader(map[string]map[string]*core.CStdlibFunction{ + "stdio.h": {"printf": {FQN: "c::stdio::printf", ReturnType: "int"}}, + }) + + cg, _ := f.build(t) + + sites := cg.CallSites["src/main.c::main"] + require.Len(t, sites, 1) + assert.False(t, sites[0].Resolved, "unknown symbol must stay unresolved") }