From 8f4b90a9080978e60c79628c6359881f0b73cfa6 Mon Sep 17 00:00:00 2001 From: shivasurya Date: Sun, 3 May 2026 20:46:05 -0400 Subject: [PATCH] feat(builder): transitive-include fallback for C stdlib calls MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Mirrors the PR-04 C++ fix on the C resolver. lookupCStdlib now runs in two stages: 1. Direct system includes (existing fast path). 2. Manifest-wide scan when the direct walk doesn't yield a hit. Real C codebases route stdlib pulls through a project-internal header (redis includes , , , from src/server.h, and every .c file just `#include "server.h"`). PR-02's direct-only walk meant strlen/memcpy/printf/malloc and friends never resolved when called from any file relying on that pattern. The fallback is bounded to the manifest's loaded headers — symbols not in the manifest can never bind, so there's no risk of a project-internal function accidentally resolving to a stdlib FQN. First hit wins; stdlib symbols are uniquely owned by one header in the real world so order doesn't affect correctness. The same path is exercised by C++ files that route stdlib calls through the C-fallthrough (printf/malloc/memcpy from .cpp), so the fix benefits both languages. Validation: - Redis full tree: 63.1% → 67.8% (+4.7 pp) - Proxygen tree: 17.1% → 25.4% (+8.3 pp, via C-fallthrough) - Smoke fixture: 100% → 100% (regression check) The PR-04 test that asserted "no direct include => unresolved" contradicts the new (intentional) contract; replaced with two tests: - transitive resolution works - unknown symbols stay unresolved (the safety guard) Co-Authored-By: Claude Sonnet 4.5 --- .../graph/callgraph/builder/c_builder.go | 45 +++++++++++++++---- .../builder/c_builder_stdlib_test.go | 41 ++++++++++++++--- 2 files changed, 71 insertions(+), 15 deletions(-) diff --git a/sast-engine/graph/callgraph/builder/c_builder.go b/sast-engine/graph/callgraph/builder/c_builder.go index 1cf58a8c..02970623 100644 --- a/sast-engine/graph/callgraph/builder/c_builder.go +++ b/sast-engine/graph/callgraph/builder/c_builder.go @@ -304,18 +304,45 @@ func resolveCCallTarget( return "", false, nil } -// lookupCStdlib walks the caller file's `#include <...>` list and asks the -// stdlib registry for a function with the requested name in each header. -// First match wins; ties are unlikely (stdlib symbols are uniquely owned by -// one header) but if they do happen, the include order in source decides. +// lookupCStdlib resolves a free function against the C stdlib registry. +// +// The lookup runs in two stages: +// +// 1. Direct system includes — walk the caller file's own `#include <…>` +// list. This is the fast path and covers the case where the calling +// file directly pulls in the header that owns the symbol. +// 2. Manifest-wide fallback — when the direct walk doesn't yield a hit, +// scan every header in the manifest. Real-world C codebases routinely +// route stdlib pulls through a project-internal "common.h" header +// (redis does this with `server.h`), so without this fallback every +// `.c` file that doesn't redundantly `#include ` itself +// would lose strlen / strcmp / memcpy / etc. +// +// First hit wins. Stdlib symbols are uniquely owned by one header in the +// real world, so first-hit-wins is effectively the same as best-match. +// The fallback is bounded to the manifest's loaded headers — symbols not +// in the manifest can never resolve, so there's no risk of binding a +// project-internal symbol to a stdlib FQN. func lookupCStdlib(callerFile, funcName string, registry *core.CModuleRegistry) (string, *core.CStdlibFunction) { - prefix, ok := registry.FileToPrefix[callerFile] - if !ok { + loader := registry.StdlibRegistry + if loader == nil { return "", nil } - for _, header := range registry.SystemIncludes[prefix] { - fn, err := registry.StdlibRegistry.GetFunction(header, funcName) - if err == nil && fn != nil { + + // Stage 1: direct system includes from the caller file. + if prefix, ok := registry.FileToPrefix[callerFile]; ok { + for _, header := range registry.SystemIncludes[prefix] { + if fn, err := loader.GetFunction(header, funcName); err == nil && fn != nil { + return fn.FQN, fn + } + } + } + + // Stage 2: transitive-include fallback. Walk every manifest header. + // O(headers) per unresolved call — ~1900 headers on Linux glibc; + // bounded and predictable. Mirrors the PR-04 C++ resolver fallback. + for _, header := range loader.ListHeaders() { + if fn, err := loader.GetFunction(header, funcName); err == nil && fn != nil { return fn.FQN, fn } } diff --git a/sast-engine/graph/callgraph/builder/c_builder_stdlib_test.go b/sast-engine/graph/callgraph/builder/c_builder_stdlib_test.go index 59747d0b..3f19007a 100644 --- a/sast-engine/graph/callgraph/builder/c_builder_stdlib_test.go +++ b/sast-engine/graph/callgraph/builder/c_builder_stdlib_test.go @@ -143,10 +143,13 @@ func TestBuildCCallGraph_StdlibFallback_NotConsultedWhenProjectDefinitionExists( assert.Empty(t, sites[0].SecurityTag, "project resolution must not pick up stdlib SecurityTag") } -// TestBuildCCallGraph_StdlibFallback_NoIncludesLeavesUnresolved verifies -// that a call to an unknown function with no matching system include -// stays unresolved — the registry must not return arbitrary symbols. -func TestBuildCCallGraph_StdlibFallback_NoIncludesLeavesUnresolved(t *testing.T) { +// TestBuildCCallGraph_StdlibFallback_TransitiveIncludeResolves verifies +// the PR-05 transitive-include fallback. Real C codebases route stdlib +// pulls through a project-internal "common.h" header, so files that +// call printf without a direct `#include ` still need to +// resolve. The resolver scans every manifest header when the file's +// direct system-include list doesn't yield a hit. +func TestBuildCCallGraph_StdlibFallback_TransitiveIncludeResolves(t *testing.T) { root := fixtureRoot mainC := root + "/src/main.c" @@ -154,7 +157,9 @@ func TestBuildCCallGraph_StdlibFallback_NoIncludesLeavesUnresolved(t *testing.T) mainFn := f.addFunction(t, mainC, "src/main.c", "main", "int", false) f.addCall(t, mainFn, "printf", nil) - // No SystemIncludes entry for this file → stdlib lookup is a no-op. + // Notice: no SystemIncludes entry for src/main.c. Pre-PR-05 this + // stayed unresolved; with the manifest-wide fallback the resolver + // finds printf via the (only) loaded header. f.registry.StdlibRegistry = newFakeCStdlibLoader(map[string]map[string]*core.CStdlibFunction{ "stdio.h": {"printf": {FQN: "c::stdio::printf", ReturnType: "int"}}, }) @@ -163,5 +168,29 @@ func TestBuildCCallGraph_StdlibFallback_NoIncludesLeavesUnresolved(t *testing.T) sites := cg.CallSites["src/main.c::main"] require.Len(t, sites, 1) - assert.False(t, sites[0].Resolved, "no matching include => stdlib must not be consulted") + assert.True(t, sites[0].Resolved, "transitive fallback must resolve printf") + assert.Equal(t, "c::stdio::printf", sites[0].TargetFQN) +} + +// TestBuildCCallGraph_StdlibFallback_UnknownSymbolStaysUnresolved is the +// safety guard: a call whose name doesn't appear anywhere in the +// manifest must not bind to an arbitrary entry. Bounds the fallback to +// "names actually in the manifest". +func TestBuildCCallGraph_StdlibFallback_UnknownSymbolStaysUnresolved(t *testing.T) { + root := fixtureRoot + mainC := root + "/src/main.c" + + f := newCFixture(t) + mainFn := f.addFunction(t, mainC, "src/main.c", "main", "int", false) + f.addCall(t, mainFn, "totally_made_up_function", nil) + + f.registry.StdlibRegistry = newFakeCStdlibLoader(map[string]map[string]*core.CStdlibFunction{ + "stdio.h": {"printf": {FQN: "c::stdio::printf", ReturnType: "int"}}, + }) + + cg, _ := f.build(t) + + sites := cg.CallSites["src/main.c::main"] + require.Len(t, sites, 1) + assert.False(t, sites[0].Resolved, "unknown symbol must stay unresolved") }