Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
45 changes: 36 additions & 9 deletions sast-engine/graph/callgraph/builder/c_builder.go
Original file line number Diff line number Diff line change
Expand Up @@ -304,18 +304,45 @@ func resolveCCallTarget(
return "", false, nil
}

// lookupCStdlib walks the caller file's `#include <...>` list and asks the
// stdlib registry for a function with the requested name in each header.
// First match wins; ties are unlikely (stdlib symbols are uniquely owned by
// one header) but if they do happen, the include order in source decides.
// lookupCStdlib resolves a free function against the C stdlib registry.
//
// The lookup runs in two stages:
//
// 1. Direct system includes — walk the caller file's own `#include <…>`
// list. This is the fast path and covers the case where the calling
// file directly pulls in the header that owns the symbol.
// 2. Manifest-wide fallback — when the direct walk doesn't yield a hit,
// scan every header in the manifest. Real-world C codebases routinely
// route stdlib pulls through a project-internal "common.h" header
// (redis does this with `server.h`), so without this fallback every
// `.c` file that doesn't redundantly `#include <string.h>` itself
// would lose strlen / strcmp / memcpy / etc.
//
// First hit wins. Stdlib symbols are uniquely owned by one header in the
// real world, so first-hit-wins is effectively the same as best-match.
// The fallback is bounded to the manifest's loaded headers — symbols not
// in the manifest can never resolve, so there's no risk of binding a
// project-internal symbol to a stdlib FQN.
func lookupCStdlib(callerFile, funcName string, registry *core.CModuleRegistry) (string, *core.CStdlibFunction) {
prefix, ok := registry.FileToPrefix[callerFile]
if !ok {
loader := registry.StdlibRegistry
if loader == nil {
return "", nil
}
for _, header := range registry.SystemIncludes[prefix] {
fn, err := registry.StdlibRegistry.GetFunction(header, funcName)
if err == nil && fn != nil {

// Stage 1: direct system includes from the caller file.
if prefix, ok := registry.FileToPrefix[callerFile]; ok {
for _, header := range registry.SystemIncludes[prefix] {
if fn, err := loader.GetFunction(header, funcName); err == nil && fn != nil {
return fn.FQN, fn
}
}
}

// Stage 2: transitive-include fallback. Walk every manifest header.
// O(headers) per unresolved call — ~1900 headers on Linux glibc;
// bounded and predictable. Mirrors the PR-04 C++ resolver fallback.
for _, header := range loader.ListHeaders() {
if fn, err := loader.GetFunction(header, funcName); err == nil && fn != nil {
return fn.FQN, fn
}
}
Expand Down
41 changes: 35 additions & 6 deletions sast-engine/graph/callgraph/builder/c_builder_stdlib_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -143,18 +143,23 @@ func TestBuildCCallGraph_StdlibFallback_NotConsultedWhenProjectDefinitionExists(
assert.Empty(t, sites[0].SecurityTag, "project resolution must not pick up stdlib SecurityTag")
}

// TestBuildCCallGraph_StdlibFallback_NoIncludesLeavesUnresolved verifies
// that a call to an unknown function with no matching system include
// stays unresolved — the registry must not return arbitrary symbols.
func TestBuildCCallGraph_StdlibFallback_NoIncludesLeavesUnresolved(t *testing.T) {
// TestBuildCCallGraph_StdlibFallback_TransitiveIncludeResolves verifies
// the PR-05 transitive-include fallback. Real C codebases route stdlib
// pulls through a project-internal "common.h" header, so files that
// call printf without a direct `#include <stdio.h>` still need to
// resolve. The resolver scans every manifest header when the file's
// direct system-include list doesn't yield a hit.
func TestBuildCCallGraph_StdlibFallback_TransitiveIncludeResolves(t *testing.T) {
root := fixtureRoot
mainC := root + "/src/main.c"

f := newCFixture(t)
mainFn := f.addFunction(t, mainC, "src/main.c", "main", "int", false)
f.addCall(t, mainFn, "printf", nil)

// No SystemIncludes entry for this file → stdlib lookup is a no-op.
// Notice: no SystemIncludes entry for src/main.c. Pre-PR-05 this
// stayed unresolved; with the manifest-wide fallback the resolver
// finds printf via the (only) loaded header.
f.registry.StdlibRegistry = newFakeCStdlibLoader(map[string]map[string]*core.CStdlibFunction{
"stdio.h": {"printf": {FQN: "c::stdio::printf", ReturnType: "int"}},
})
Expand All @@ -163,5 +168,29 @@ func TestBuildCCallGraph_StdlibFallback_NoIncludesLeavesUnresolved(t *testing.T)

sites := cg.CallSites["src/main.c::main"]
require.Len(t, sites, 1)
assert.False(t, sites[0].Resolved, "no matching include => stdlib must not be consulted")
assert.True(t, sites[0].Resolved, "transitive fallback must resolve printf")
assert.Equal(t, "c::stdio::printf", sites[0].TargetFQN)
}

// TestBuildCCallGraph_StdlibFallback_UnknownSymbolStaysUnresolved is the
// safety guard: a call whose name doesn't appear anywhere in the
// manifest must not bind to an arbitrary entry. Bounds the fallback to
// "names actually in the manifest".
func TestBuildCCallGraph_StdlibFallback_UnknownSymbolStaysUnresolved(t *testing.T) {
root := fixtureRoot
mainC := root + "/src/main.c"

f := newCFixture(t)
mainFn := f.addFunction(t, mainC, "src/main.c", "main", "int", false)
f.addCall(t, mainFn, "totally_made_up_function", nil)

f.registry.StdlibRegistry = newFakeCStdlibLoader(map[string]map[string]*core.CStdlibFunction{
"stdio.h": {"printf": {FQN: "c::stdio::printf", ReturnType: "int"}},
})

cg, _ := f.build(t)

sites := cg.CallSites["src/main.c::main"]
require.Len(t, sites, 1)
assert.False(t, sites[0].Resolved, "unknown symbol must stay unresolved")
}
Loading