Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
68 changes: 68 additions & 0 deletions sast-engine/cmd/scan.go
Original file line number Diff line number Diff line change
Expand Up @@ -288,6 +288,8 @@ Examples:
}
}

buildClikeCallGraphs(cg, codeGraph, projectPath, logger)

// Step 4: Load Python SDK rules
logger.StartProgress("Loading rules", -1)
rules, err := loader.LoadRules(logger)
Expand Down Expand Up @@ -478,6 +480,72 @@ func countTotalCallSites(cg *core.CallGraph) int {
return total
}

// buildClikeCallGraphs runs the C and C++ call-graph builders against
// codeGraph (when those languages are present) and merges the results
// into cg. Each builder is independent: a failure or skip on one
// language never blocks the other.
//
// Unlike Go (which checks `go.mod` up front), C/C++ has no single
// manifest file. We instead look at the already-parsed CodeGraph for
// nodes tagged with the right `Language` so the builder skips the
// work entirely on Python-only or Go-only projects.
func buildClikeCallGraphs(cg *core.CallGraph, codeGraph *graph.CodeGraph, projectPath string, logger *output.Logger) {
if hasLanguageNodes(codeGraph, "c") {
buildCCallGraphAndMerge(cg, codeGraph, projectPath, logger)
}
if hasLanguageNodes(codeGraph, "cpp") {
buildCppCallGraphAndMerge(cg, codeGraph, projectPath, logger)
}
}

// buildCCallGraphAndMerge constructs the C call graph and merges it
// into cg. Build failures emit a warning and leave cg untouched.
func buildCCallGraphAndMerge(cg *core.CallGraph, codeGraph *graph.CodeGraph, projectPath string, logger *output.Logger) {
logger.Debug("Detected C source files, building C call graph...")
cRegistry := registry.BuildCModuleRegistry(projectPath, codeGraph)
cTypeEngine := resolution.NewCTypeInferenceEngine(cRegistry)
cCG, err := builder.BuildCCallGraph(codeGraph, cRegistry, cTypeEngine)
if err != nil {
logger.Warning("Failed to build C call graph: %v", err)
return
}
builder.MergeCallGraphs(cg, cCG)
logger.Statistic("C call graph merged: %d functions, %d call sites",
len(cCG.Functions), countTotalCallSites(cCG))
}

// buildCppCallGraphAndMerge constructs the C++ call graph and merges
// it into cg. Build failures emit a warning and leave cg untouched.
func buildCppCallGraphAndMerge(cg *core.CallGraph, codeGraph *graph.CodeGraph, projectPath string, logger *output.Logger) {
logger.Debug("Detected C++ source files, building C++ call graph...")
cppRegistry := registry.BuildCppModuleRegistry(projectPath, codeGraph)
cppTypeEngine := resolution.NewCppTypeInferenceEngine(cppRegistry)
cppCG, err := builder.BuildCppCallGraph(codeGraph, cppRegistry, cppTypeEngine)
if err != nil {
logger.Warning("Failed to build C++ call graph: %v", err)
return
}
builder.MergeCallGraphs(cg, cppCG)
logger.Statistic("C++ call graph merged: %d functions, %d call sites",
len(cppCG.Functions), countTotalCallSites(cppCG))
}

// hasLanguageNodes reports whether codeGraph contains at least one
// node tagged with the given Language. Used to gate per-language call
// graph builders so we skip the work when no source files of that
// language were parsed.
func hasLanguageNodes(codeGraph *graph.CodeGraph, language string) bool {
if codeGraph == nil {
return false
}
for _, node := range codeGraph.Nodes {
if node != nil && node.Language == language {
return true
}
}
return false
}

// extractContainerFiles extracts unique Docker and docker-compose file paths from CodeGraph.
func extractContainerFiles(codeGraph *graph.CodeGraph) (dockerFiles []string, composeFiles []string) {
dockerFileSet := make(map[string]bool)
Expand Down
119 changes: 119 additions & 0 deletions sast-engine/cmd/scan_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -57,6 +57,125 @@ func TestCountTotalCallSites(t *testing.T) {
})
}

// newTestLogger returns a logger that writes to a discard buffer so
// tests do not pollute stdout. Using NewLoggerWithWriter avoids any
// dependency on the environment's terminal detection.
func newTestLogger() *output.Logger {
return output.NewLoggerWithWriter(output.VerbosityDebug, io.Discard)
}

// TestBuildClikeCallGraphs_NoNodes verifies the entry helper short-
// circuits when neither C nor C++ source files appear in the graph.
// `cg` must remain untouched.
func TestBuildClikeCallGraphs_NoNodes(t *testing.T) {
cg := core.NewCallGraph()
codeGraph := graph.NewCodeGraph()
codeGraph.AddNode(&graph.Node{ID: "py-1", Language: "python", Type: "function_definition", Name: "f"})

buildClikeCallGraphs(cg, codeGraph, "/projects/app", newTestLogger())

assert.Empty(t, cg.Functions, "no C/C++ nodes => no merge")
}

// TestBuildClikeCallGraphs_CFunctionsMerged constructs a tiny C
// CodeGraph and verifies the helper indexes the function and merges
// it into the destination graph.
func TestBuildClikeCallGraphs_CFunctionsMerged(t *testing.T) {
root := "/projects/app"
codeGraph := graph.NewCodeGraph()
codeGraph.AddNode(&graph.Node{
ID: "fn:src/main.c::main",
Type: "function_definition",
Name: "main",
File: root + "/src/main.c",
Language: "c",
ReturnType: "int",
})

cg := core.NewCallGraph()
buildClikeCallGraphs(cg, codeGraph, root, newTestLogger())

assert.Contains(t, cg.Functions, "src/main.c::main")
}

// TestBuildClikeCallGraphs_CppFunctionsMerged verifies C++ nodes flow
// through the helper without being misclassified as C.
func TestBuildClikeCallGraphs_CppFunctionsMerged(t *testing.T) {
root := "/projects/app"
codeGraph := graph.NewCodeGraph()
codeGraph.AddNode(&graph.Node{
ID: "fn:src/main.cpp::main",
Type: "function_definition",
Name: "main",
File: root + "/src/main.cpp",
Language: "cpp",
ReturnType: "int",
SourceLocation: &graph.SourceLocation{File: root + "/src/main.cpp", StartByte: 0, EndByte: 30},
})

cg := core.NewCallGraph()
buildClikeCallGraphs(cg, codeGraph, root, newTestLogger())

assert.Contains(t, cg.Functions, "src/main.cpp::main")
assert.NotContains(t, cg.Functions, "src/main.c::main", "C++ node must not appear in C namespace")
}

// TestBuildClikeCallGraphs_MixedProject confirms that a graph
// containing both C and C++ nodes produces both call graphs and
// merges them into the same destination.
func TestBuildClikeCallGraphs_MixedProject(t *testing.T) {
root := "/projects/app"
codeGraph := graph.NewCodeGraph()
codeGraph.AddNode(&graph.Node{
ID: "c-fn", Type: "function_definition", Name: "c_main",
File: root + "/src/main.c", Language: "c",
})
codeGraph.AddNode(&graph.Node{
ID: "cpp-fn", Type: "function_definition", Name: "cpp_main",
File: root + "/src/main.cpp", Language: "cpp",
SourceLocation: &graph.SourceLocation{File: root + "/src/main.cpp", StartByte: 0, EndByte: 30},
})

cg := core.NewCallGraph()
buildClikeCallGraphs(cg, codeGraph, root, newTestLogger())

assert.Contains(t, cg.Functions, "src/main.c::c_main")
assert.Contains(t, cg.Functions, "src/main.cpp::cpp_main")
}

// TestHasLanguageNodes covers the per-language gate that decides
// whether scan.go runs the C / C++ call-graph builders. The gate must
// be false for a nil graph, false when no nodes match, and true as
// soon as a single node carries the requested Language tag.
func TestHasLanguageNodes(t *testing.T) {
t.Run("nil graph returns false", func(t *testing.T) {
assert.False(t, hasLanguageNodes(nil, "c"))
})

t.Run("empty graph returns false", func(t *testing.T) {
assert.False(t, hasLanguageNodes(graph.NewCodeGraph(), "c"))
})

t.Run("returns false when no node matches", func(t *testing.T) {
cg := graph.NewCodeGraph()
cg.AddNode(&graph.Node{ID: "py-1", Language: "python"})
cg.AddNode(&graph.Node{ID: "go-1", Language: "go"})
assert.False(t, hasLanguageNodes(cg, "c"))
assert.False(t, hasLanguageNodes(cg, "cpp"))
})

t.Run("returns true on first matching node", func(t *testing.T) {
cg := graph.NewCodeGraph()
cg.AddNode(&graph.Node{ID: "py-1", Language: "python"})
cg.AddNode(&graph.Node{ID: "c-1", Language: "c"})
cg.AddNode(&graph.Node{ID: "cpp-1", Language: "cpp"})
assert.True(t, hasLanguageNodes(cg, "c"))
assert.True(t, hasLanguageNodes(cg, "cpp"))
assert.True(t, hasLanguageNodes(cg, "python"))
assert.False(t, hasLanguageNodes(cg, "rust"))
})
}

func TestPrintDetections(t *testing.T) {
t.Run("prints detections with all fields", func(t *testing.T) {
// Capture stdout
Expand Down
120 changes: 120 additions & 0 deletions sast-engine/graph/callgraph/cfg/builder.go
Original file line number Diff line number Diff line change
Expand Up @@ -112,6 +112,12 @@ func (b *cfgBuilder) processBody(bodyNode *sitter.Node, currentBlockID string) s
case "with_statement":
currentBlockID = b.processWith(actualNode, stmtNode, currentBlockID)

case "switch_statement":
currentBlockID = b.processSwitch(actualNode, stmtNode, currentBlockID)

case "do_statement":
currentBlockID = b.processDoWhile(actualNode, stmtNode, currentBlockID)

case "return_statement":
stmt := b.extractStatement(actualNode, stmtNode)
if stmt != nil {
Expand Down Expand Up @@ -452,6 +458,120 @@ func (b *cfgBuilder) processTry(tryNode, _ *sitter.Node, predBlockID string) str
return mergeBlockID
}

// processSwitch handles C/C++ switch statements.
//
// A switch produces a fan-out/fan-in CFG: a header block evaluates the
// condition and branches to one block per case (and optionally a
// default block); each case body falls through to the next case unless
// it ends with a break, in which case the edge goes to the merge block.
//
// CFG shape for `switch (x) { case 1: ...; case 2: ...; default: ... }`:
//
// [header: cond=x]
// ├──> [case_1] ──> [body_1] ──fallthrough/break──> [body_2] / [merge]
// ├──> [case_2] ──> [body_2] ──fallthrough/break──> [default_body] / [merge]
// └──> [default] ─> [body_default] ────────────────> [merge]
//
// Phase 1 keeps the model simple: every case head connects to its
// predecessor case body so fallthrough is the default; explicit `break`
// statements are emitted as ordinary statements inside each body and
// will be lifted into edges by a later refactor. The header's
// successors-only fan-out captures the most important property —
// reachability of every case — for current dataflow consumers.
func (b *cfgBuilder) processSwitch(switchNode, stmtNode *sitter.Node, predBlockID string) string {
headerBlockID := b.newBlockID("switch_header")
b.addBlock(headerBlockID, BlockTypeSwitch)
b.cfGraph.AddEdge(predBlockID, headerBlockID)

if condNode := switchNode.ChildByFieldName("condition"); condNode != nil {
b.appendStmt(headerBlockID, &core.Statement{
Type: core.StatementTypeIf,
LineNumber: stmtNode.StartPoint().Row + 1,
Uses: extractIdentifiers(condNode, b.sourceCode),
})
}

mergeBlockID := b.newBlockID("switch_after")
b.addBlock(mergeBlockID, BlockTypeNormal)

bodyNode := switchNode.ChildByFieldName("body")
if bodyNode == nil {
// Empty switch — header falls straight through to merge.
b.cfGraph.AddEdge(headerBlockID, mergeBlockID)
return mergeBlockID
}

var prevCaseEnd string
for i := 0; i < int(bodyNode.NamedChildCount()); i++ {
child := bodyNode.NamedChild(i)
if child == nil || child.Type() != "case_statement" {
continue
}
caseBlockID := b.newBlockID("case")
b.addBlock(caseBlockID, BlockTypeNormal)
b.cfGraph.AddEdge(headerBlockID, caseBlockID)
if prevCaseEnd != "" {
// Fallthrough from the previous case.
b.cfGraph.AddEdge(prevCaseEnd, caseBlockID)
}
prevCaseEnd = b.processBody(child, caseBlockID)
}

// Last case (or empty body) flows into the merge block. Without a
// trailing default, the header itself can also bypass every case
// (e.g. condition matches none) — represent that with a header→merge
// edge so reachability stays accurate.
if prevCaseEnd != "" {
b.cfGraph.AddEdge(prevCaseEnd, mergeBlockID)
}
b.cfGraph.AddEdge(headerBlockID, mergeBlockID)

return mergeBlockID
}

// processDoWhile handles C/C++ do-while loops.
//
// Unlike `while`, the body always executes at least once because the
// condition is evaluated at the end. The CFG mirrors that: the
// predecessor flows directly into the body (no header gate), the body
// flows into the condition, and the condition either loops back to the
// body or falls through to the after block.
//
// [pred] ──> [body] ──> [cond] ──true──> [body]
// [cond] ──false─> [after]
func (b *cfgBuilder) processDoWhile(doNode, stmtNode *sitter.Node, predBlockID string) string {
bodyBlockID := b.newBlockID("do_body")
b.addBlock(bodyBlockID, BlockTypeNormal)
b.cfGraph.AddEdge(predBlockID, bodyBlockID)

bodyNode := doNode.ChildByFieldName("body")
bodyEndID := bodyBlockID
if bodyNode != nil {
bodyEndID = b.processBody(bodyNode, bodyBlockID)
}

condBlockID := b.newBlockID("do_cond")
b.addBlock(condBlockID, BlockTypeLoop)
if bodyEndID != "" {
b.cfGraph.AddEdge(bodyEndID, condBlockID)
}
if condNode := doNode.ChildByFieldName("condition"); condNode != nil {
b.appendStmt(condBlockID, &core.Statement{
Type: core.StatementTypeWhile,
LineNumber: stmtNode.StartPoint().Row + 1,
Uses: extractIdentifiers(condNode, b.sourceCode),
})
}

// True edge loops back to the body; false edge falls through.
b.cfGraph.AddEdge(condBlockID, bodyBlockID)

afterBlockID := b.newBlockID("do_after")
b.addBlock(afterBlockID, BlockTypeNormal)
b.cfGraph.AddEdge(condBlockID, afterBlockID)
return afterBlockID
}

// processWith handles with-statements.
// Creates a block with the context variable def, then processes the body.
func (b *cfgBuilder) processWith(withNode, stmtNode *sitter.Node, predBlockID string) string {
Expand Down
Loading
Loading