diff --git a/sast-engine/cmd/scan.go b/sast-engine/cmd/scan.go index 7d50d68c..ae8dfb7a 100644 --- a/sast-engine/cmd/scan.go +++ b/sast-engine/cmd/scan.go @@ -288,6 +288,8 @@ Examples: } } + buildClikeCallGraphs(cg, codeGraph, projectPath, logger) + // Step 4: Load Python SDK rules logger.StartProgress("Loading rules", -1) rules, err := loader.LoadRules(logger) @@ -478,6 +480,72 @@ func countTotalCallSites(cg *core.CallGraph) int { return total } +// buildClikeCallGraphs runs the C and C++ call-graph builders against +// codeGraph (when those languages are present) and merges the results +// into cg. Each builder is independent: a failure or skip on one +// language never blocks the other. +// +// Unlike Go (which checks `go.mod` up front), C/C++ has no single +// manifest file. We instead look at the already-parsed CodeGraph for +// nodes tagged with the right `Language` so the builder skips the +// work entirely on Python-only or Go-only projects. +func buildClikeCallGraphs(cg *core.CallGraph, codeGraph *graph.CodeGraph, projectPath string, logger *output.Logger) { + if hasLanguageNodes(codeGraph, "c") { + buildCCallGraphAndMerge(cg, codeGraph, projectPath, logger) + } + if hasLanguageNodes(codeGraph, "cpp") { + buildCppCallGraphAndMerge(cg, codeGraph, projectPath, logger) + } +} + +// buildCCallGraphAndMerge constructs the C call graph and merges it +// into cg. Build failures emit a warning and leave cg untouched. +func buildCCallGraphAndMerge(cg *core.CallGraph, codeGraph *graph.CodeGraph, projectPath string, logger *output.Logger) { + logger.Debug("Detected C source files, building C call graph...") + cRegistry := registry.BuildCModuleRegistry(projectPath, codeGraph) + cTypeEngine := resolution.NewCTypeInferenceEngine(cRegistry) + cCG, err := builder.BuildCCallGraph(codeGraph, cRegistry, cTypeEngine) + if err != nil { + logger.Warning("Failed to build C call graph: %v", err) + return + } + builder.MergeCallGraphs(cg, cCG) + logger.Statistic("C call graph merged: %d functions, %d call sites", + len(cCG.Functions), countTotalCallSites(cCG)) +} + +// buildCppCallGraphAndMerge constructs the C++ call graph and merges +// it into cg. Build failures emit a warning and leave cg untouched. +func buildCppCallGraphAndMerge(cg *core.CallGraph, codeGraph *graph.CodeGraph, projectPath string, logger *output.Logger) { + logger.Debug("Detected C++ source files, building C++ call graph...") + cppRegistry := registry.BuildCppModuleRegistry(projectPath, codeGraph) + cppTypeEngine := resolution.NewCppTypeInferenceEngine(cppRegistry) + cppCG, err := builder.BuildCppCallGraph(codeGraph, cppRegistry, cppTypeEngine) + if err != nil { + logger.Warning("Failed to build C++ call graph: %v", err) + return + } + builder.MergeCallGraphs(cg, cppCG) + logger.Statistic("C++ call graph merged: %d functions, %d call sites", + len(cppCG.Functions), countTotalCallSites(cppCG)) +} + +// hasLanguageNodes reports whether codeGraph contains at least one +// node tagged with the given Language. Used to gate per-language call +// graph builders so we skip the work when no source files of that +// language were parsed. +func hasLanguageNodes(codeGraph *graph.CodeGraph, language string) bool { + if codeGraph == nil { + return false + } + for _, node := range codeGraph.Nodes { + if node != nil && node.Language == language { + return true + } + } + return false +} + // extractContainerFiles extracts unique Docker and docker-compose file paths from CodeGraph. func extractContainerFiles(codeGraph *graph.CodeGraph) (dockerFiles []string, composeFiles []string) { dockerFileSet := make(map[string]bool) diff --git a/sast-engine/cmd/scan_test.go b/sast-engine/cmd/scan_test.go index 8576d4e6..d9e9aaac 100644 --- a/sast-engine/cmd/scan_test.go +++ b/sast-engine/cmd/scan_test.go @@ -57,6 +57,125 @@ func TestCountTotalCallSites(t *testing.T) { }) } +// newTestLogger returns a logger that writes to a discard buffer so +// tests do not pollute stdout. Using NewLoggerWithWriter avoids any +// dependency on the environment's terminal detection. +func newTestLogger() *output.Logger { + return output.NewLoggerWithWriter(output.VerbosityDebug, io.Discard) +} + +// TestBuildClikeCallGraphs_NoNodes verifies the entry helper short- +// circuits when neither C nor C++ source files appear in the graph. +// `cg` must remain untouched. +func TestBuildClikeCallGraphs_NoNodes(t *testing.T) { + cg := core.NewCallGraph() + codeGraph := graph.NewCodeGraph() + codeGraph.AddNode(&graph.Node{ID: "py-1", Language: "python", Type: "function_definition", Name: "f"}) + + buildClikeCallGraphs(cg, codeGraph, "/projects/app", newTestLogger()) + + assert.Empty(t, cg.Functions, "no C/C++ nodes => no merge") +} + +// TestBuildClikeCallGraphs_CFunctionsMerged constructs a tiny C +// CodeGraph and verifies the helper indexes the function and merges +// it into the destination graph. +func TestBuildClikeCallGraphs_CFunctionsMerged(t *testing.T) { + root := "/projects/app" + codeGraph := graph.NewCodeGraph() + codeGraph.AddNode(&graph.Node{ + ID: "fn:src/main.c::main", + Type: "function_definition", + Name: "main", + File: root + "/src/main.c", + Language: "c", + ReturnType: "int", + }) + + cg := core.NewCallGraph() + buildClikeCallGraphs(cg, codeGraph, root, newTestLogger()) + + assert.Contains(t, cg.Functions, "src/main.c::main") +} + +// TestBuildClikeCallGraphs_CppFunctionsMerged verifies C++ nodes flow +// through the helper without being misclassified as C. +func TestBuildClikeCallGraphs_CppFunctionsMerged(t *testing.T) { + root := "/projects/app" + codeGraph := graph.NewCodeGraph() + codeGraph.AddNode(&graph.Node{ + ID: "fn:src/main.cpp::main", + Type: "function_definition", + Name: "main", + File: root + "/src/main.cpp", + Language: "cpp", + ReturnType: "int", + SourceLocation: &graph.SourceLocation{File: root + "/src/main.cpp", StartByte: 0, EndByte: 30}, + }) + + cg := core.NewCallGraph() + buildClikeCallGraphs(cg, codeGraph, root, newTestLogger()) + + assert.Contains(t, cg.Functions, "src/main.cpp::main") + assert.NotContains(t, cg.Functions, "src/main.c::main", "C++ node must not appear in C namespace") +} + +// TestBuildClikeCallGraphs_MixedProject confirms that a graph +// containing both C and C++ nodes produces both call graphs and +// merges them into the same destination. +func TestBuildClikeCallGraphs_MixedProject(t *testing.T) { + root := "/projects/app" + codeGraph := graph.NewCodeGraph() + codeGraph.AddNode(&graph.Node{ + ID: "c-fn", Type: "function_definition", Name: "c_main", + File: root + "/src/main.c", Language: "c", + }) + codeGraph.AddNode(&graph.Node{ + ID: "cpp-fn", Type: "function_definition", Name: "cpp_main", + File: root + "/src/main.cpp", Language: "cpp", + SourceLocation: &graph.SourceLocation{File: root + "/src/main.cpp", StartByte: 0, EndByte: 30}, + }) + + cg := core.NewCallGraph() + buildClikeCallGraphs(cg, codeGraph, root, newTestLogger()) + + assert.Contains(t, cg.Functions, "src/main.c::c_main") + assert.Contains(t, cg.Functions, "src/main.cpp::cpp_main") +} + +// TestHasLanguageNodes covers the per-language gate that decides +// whether scan.go runs the C / C++ call-graph builders. The gate must +// be false for a nil graph, false when no nodes match, and true as +// soon as a single node carries the requested Language tag. +func TestHasLanguageNodes(t *testing.T) { + t.Run("nil graph returns false", func(t *testing.T) { + assert.False(t, hasLanguageNodes(nil, "c")) + }) + + t.Run("empty graph returns false", func(t *testing.T) { + assert.False(t, hasLanguageNodes(graph.NewCodeGraph(), "c")) + }) + + t.Run("returns false when no node matches", func(t *testing.T) { + cg := graph.NewCodeGraph() + cg.AddNode(&graph.Node{ID: "py-1", Language: "python"}) + cg.AddNode(&graph.Node{ID: "go-1", Language: "go"}) + assert.False(t, hasLanguageNodes(cg, "c")) + assert.False(t, hasLanguageNodes(cg, "cpp")) + }) + + t.Run("returns true on first matching node", func(t *testing.T) { + cg := graph.NewCodeGraph() + cg.AddNode(&graph.Node{ID: "py-1", Language: "python"}) + cg.AddNode(&graph.Node{ID: "c-1", Language: "c"}) + cg.AddNode(&graph.Node{ID: "cpp-1", Language: "cpp"}) + assert.True(t, hasLanguageNodes(cg, "c")) + assert.True(t, hasLanguageNodes(cg, "cpp")) + assert.True(t, hasLanguageNodes(cg, "python")) + assert.False(t, hasLanguageNodes(cg, "rust")) + }) +} + func TestPrintDetections(t *testing.T) { t.Run("prints detections with all fields", func(t *testing.T) { // Capture stdout diff --git a/sast-engine/graph/callgraph/cfg/builder.go b/sast-engine/graph/callgraph/cfg/builder.go index 989e6d82..83b3d4ed 100644 --- a/sast-engine/graph/callgraph/cfg/builder.go +++ b/sast-engine/graph/callgraph/cfg/builder.go @@ -112,6 +112,12 @@ func (b *cfgBuilder) processBody(bodyNode *sitter.Node, currentBlockID string) s case "with_statement": currentBlockID = b.processWith(actualNode, stmtNode, currentBlockID) + case "switch_statement": + currentBlockID = b.processSwitch(actualNode, stmtNode, currentBlockID) + + case "do_statement": + currentBlockID = b.processDoWhile(actualNode, stmtNode, currentBlockID) + case "return_statement": stmt := b.extractStatement(actualNode, stmtNode) if stmt != nil { @@ -452,6 +458,120 @@ func (b *cfgBuilder) processTry(tryNode, _ *sitter.Node, predBlockID string) str return mergeBlockID } +// processSwitch handles C/C++ switch statements. +// +// A switch produces a fan-out/fan-in CFG: a header block evaluates the +// condition and branches to one block per case (and optionally a +// default block); each case body falls through to the next case unless +// it ends with a break, in which case the edge goes to the merge block. +// +// CFG shape for `switch (x) { case 1: ...; case 2: ...; default: ... }`: +// +// [header: cond=x] +// ├──> [case_1] ──> [body_1] ──fallthrough/break──> [body_2] / [merge] +// ├──> [case_2] ──> [body_2] ──fallthrough/break──> [default_body] / [merge] +// └──> [default] ─> [body_default] ────────────────> [merge] +// +// Phase 1 keeps the model simple: every case head connects to its +// predecessor case body so fallthrough is the default; explicit `break` +// statements are emitted as ordinary statements inside each body and +// will be lifted into edges by a later refactor. The header's +// successors-only fan-out captures the most important property — +// reachability of every case — for current dataflow consumers. +func (b *cfgBuilder) processSwitch(switchNode, stmtNode *sitter.Node, predBlockID string) string { + headerBlockID := b.newBlockID("switch_header") + b.addBlock(headerBlockID, BlockTypeSwitch) + b.cfGraph.AddEdge(predBlockID, headerBlockID) + + if condNode := switchNode.ChildByFieldName("condition"); condNode != nil { + b.appendStmt(headerBlockID, &core.Statement{ + Type: core.StatementTypeIf, + LineNumber: stmtNode.StartPoint().Row + 1, + Uses: extractIdentifiers(condNode, b.sourceCode), + }) + } + + mergeBlockID := b.newBlockID("switch_after") + b.addBlock(mergeBlockID, BlockTypeNormal) + + bodyNode := switchNode.ChildByFieldName("body") + if bodyNode == nil { + // Empty switch — header falls straight through to merge. + b.cfGraph.AddEdge(headerBlockID, mergeBlockID) + return mergeBlockID + } + + var prevCaseEnd string + for i := 0; i < int(bodyNode.NamedChildCount()); i++ { + child := bodyNode.NamedChild(i) + if child == nil || child.Type() != "case_statement" { + continue + } + caseBlockID := b.newBlockID("case") + b.addBlock(caseBlockID, BlockTypeNormal) + b.cfGraph.AddEdge(headerBlockID, caseBlockID) + if prevCaseEnd != "" { + // Fallthrough from the previous case. + b.cfGraph.AddEdge(prevCaseEnd, caseBlockID) + } + prevCaseEnd = b.processBody(child, caseBlockID) + } + + // Last case (or empty body) flows into the merge block. Without a + // trailing default, the header itself can also bypass every case + // (e.g. condition matches none) — represent that with a header→merge + // edge so reachability stays accurate. + if prevCaseEnd != "" { + b.cfGraph.AddEdge(prevCaseEnd, mergeBlockID) + } + b.cfGraph.AddEdge(headerBlockID, mergeBlockID) + + return mergeBlockID +} + +// processDoWhile handles C/C++ do-while loops. +// +// Unlike `while`, the body always executes at least once because the +// condition is evaluated at the end. The CFG mirrors that: the +// predecessor flows directly into the body (no header gate), the body +// flows into the condition, and the condition either loops back to the +// body or falls through to the after block. +// +// [pred] ──> [body] ──> [cond] ──true──> [body] +// [cond] ──false─> [after] +func (b *cfgBuilder) processDoWhile(doNode, stmtNode *sitter.Node, predBlockID string) string { + bodyBlockID := b.newBlockID("do_body") + b.addBlock(bodyBlockID, BlockTypeNormal) + b.cfGraph.AddEdge(predBlockID, bodyBlockID) + + bodyNode := doNode.ChildByFieldName("body") + bodyEndID := bodyBlockID + if bodyNode != nil { + bodyEndID = b.processBody(bodyNode, bodyBlockID) + } + + condBlockID := b.newBlockID("do_cond") + b.addBlock(condBlockID, BlockTypeLoop) + if bodyEndID != "" { + b.cfGraph.AddEdge(bodyEndID, condBlockID) + } + if condNode := doNode.ChildByFieldName("condition"); condNode != nil { + b.appendStmt(condBlockID, &core.Statement{ + Type: core.StatementTypeWhile, + LineNumber: stmtNode.StartPoint().Row + 1, + Uses: extractIdentifiers(condNode, b.sourceCode), + }) + } + + // True edge loops back to the body; false edge falls through. + b.cfGraph.AddEdge(condBlockID, bodyBlockID) + + afterBlockID := b.newBlockID("do_after") + b.addBlock(afterBlockID, BlockTypeNormal) + b.cfGraph.AddEdge(condBlockID, afterBlockID) + return afterBlockID +} + // processWith handles with-statements. // Creates a block with the context variable def, then processes the body. func (b *cfgBuilder) processWith(withNode, stmtNode *sitter.Node, predBlockID string) string { diff --git a/sast-engine/graph/callgraph/cfg/builder_clike_test.go b/sast-engine/graph/callgraph/cfg/builder_clike_test.go new file mode 100644 index 00000000..004f8161 --- /dev/null +++ b/sast-engine/graph/callgraph/cfg/builder_clike_test.go @@ -0,0 +1,183 @@ +package cfg + +import ( + "context" + "testing" + + sitter "github.com/smacker/go-tree-sitter" + clang "github.com/smacker/go-tree-sitter/c" + "github.com/stretchr/testify/assert" + "github.com/stretchr/testify/require" +) + +// parseCFunction parses C source and returns the function_definition node. +// Uses the C grammar (switch/do-while node types are identical in C++, +// so a single grammar is enough for these tests). +func parseCFunction(t *testing.T, source string) *sitter.Node { + t.Helper() + parser := sitter.NewParser() + parser.SetLanguage(clang.GetLanguage()) + defer parser.Close() + + tree, err := parser.ParseCtx(context.Background(), nil, []byte(source)) + require.NoError(t, err) + root := tree.RootNode() + for i := 0; i < int(root.ChildCount()); i++ { + if c := root.Child(i); c != nil && c.Type() == "function_definition" { + return c + } + } + t.Fatal("no function_definition found in C source") + return nil +} + +// blockTypes returns every distinct BlockType present in the CFG. +// Useful for asserting on structural shape without hard-coding block IDs. +func blockTypes(cfg *ControlFlowGraph) map[BlockType]int { + out := make(map[BlockType]int) + for _, blk := range cfg.Blocks { + out[blk.Type]++ + } + return out +} + +// TestBuildCFG_C_Switch_BasicShape verifies a simple switch with +// three cases produces one BlockTypeSwitch header, one block per case, +// and a merge block reachable from every case. +func TestBuildCFG_C_Switch_BasicShape(t *testing.T) { + src := `void f(int x) { + switch (x) { + case 1: a(); break; + case 2: b(); break; + default: c(); break; + } +}` + fn := parseCFunction(t, src) + cfg, _, err := BuildCFGFromAST("test::f", fn, []byte(src)) + require.NoError(t, err) + require.NotNil(t, cfg) + + types := blockTypes(cfg) + assert.Equal(t, 1, types[BlockTypeSwitch], "expected exactly one switch header") + assert.NotZero(t, types[BlockTypeNormal], "expected at least one normal block") + + // Every block type table should let us reach exit. + paths := cfg.GetAllPaths() + assert.NotEmpty(t, paths, "switch graph must have a path to exit") +} + +// TestBuildCFG_C_Switch_FallthroughEdge verifies that consecutive +// cases without an explicit edge gating still leave the previous case +// connected to the next — necessary for fallthrough semantics. +func TestBuildCFG_C_Switch_FallthroughEdge(t *testing.T) { + src := `void f(int x) { + switch (x) { + case 1: a(); /* no break */ + case 2: b(); break; + } +}` + fn := parseCFunction(t, src) + cfg, _, err := BuildCFGFromAST("test::f", fn, []byte(src)) + require.NoError(t, err) + + // Find the switch header and confirm it has fan-out edges. + var switchID string + for id, blk := range cfg.Blocks { + if blk.Type == BlockTypeSwitch { + switchID = id + break + } + } + require.NotEmpty(t, switchID, "expected a switch header block") + header := cfg.Blocks[switchID] + // Header should fan out to: each case + the merge block. + assert.GreaterOrEqual(t, len(header.Successors), 2) +} + +// TestBuildCFG_C_Switch_EmptyBody guards the empty-switch corner: the +// header must still connect to a merge block so reachability holds. +func TestBuildCFG_C_Switch_EmptyBody(t *testing.T) { + src := `void f(int x) { + switch (x) { } +}` + fn := parseCFunction(t, src) + cfg, _, err := BuildCFGFromAST("test::f", fn, []byte(src)) + require.NoError(t, err) + + types := blockTypes(cfg) + assert.Equal(t, 1, types[BlockTypeSwitch]) + paths := cfg.GetAllPaths() + assert.NotEmpty(t, paths, "empty switch must still reach exit") +} + +// TestBuildCFG_C_DoWhile verifies that do-while produces the +// `[body] -> [cond] -> [body]` loop shape and a falling-through +// after-block. +func TestBuildCFG_C_DoWhile(t *testing.T) { + src := `void f(int x) { + do { + consume(x); + } while (x > 0); +}` + fn := parseCFunction(t, src) + cfg, _, err := BuildCFGFromAST("test::f", fn, []byte(src)) + require.NoError(t, err) + + types := blockTypes(cfg) + assert.NotZero(t, types[BlockTypeLoop], "expected at least one loop header (do-cond)") + + // The loop header should have at least two successors (body and after). + var loopID string + for id, blk := range cfg.Blocks { + if blk.Type == BlockTypeLoop { + loopID = id + break + } + } + require.NotEmpty(t, loopID) + assert.GreaterOrEqual(t, len(cfg.Blocks[loopID].Successors), 2, "do-cond fan-out (body + after)") + + paths := cfg.GetAllPaths() + assert.NotEmpty(t, paths) +} + +// TestBuildCFG_C_DoWhile_BodyExecutesFirst verifies the entry block +// flows directly into the body block (no header gate before the first +// iteration), matching do-while's "always execute once" semantics. +func TestBuildCFG_C_DoWhile_BodyExecutesFirst(t *testing.T) { + src := `void f() { + do { a(); } while (1); +}` + fn := parseCFunction(t, src) + cfg, _, err := BuildCFGFromAST("test::f", fn, []byte(src)) + require.NoError(t, err) + + // The entry block's reachable successors should include a + // non-loop body block before the loop header. + entry := cfg.Blocks[cfg.EntryBlockID] + require.NotNil(t, entry) + + // Walk from entry until we find a loop block; everything reachable + // before it must be normal/conditional. + visited := map[string]bool{} + var seenLoop bool + var walk func(id string) + walk = func(id string) { + if visited[id] || id == cfg.ExitBlockID { + return + } + visited[id] = true + blk := cfg.Blocks[id] + if blk == nil { + return + } + if blk.Type == BlockTypeLoop { + seenLoop = true + } + for _, s := range blk.Successors { + walk(s) + } + } + walk(cfg.EntryBlockID) + assert.True(t, seenLoop, "do-while should include a loop block reachable from entry") +} diff --git a/sast-engine/output/enricher.go b/sast-engine/output/enricher.go index 75268f3d..4a78ca40 100644 --- a/sast-engine/output/enricher.go +++ b/sast-engine/output/enricher.go @@ -151,6 +151,34 @@ func (e *Enricher) fallbackLocation(detection dsl.DataflowDetection) dsl.Locatio } } + // C/C++ FQN format: "::" or + // "::::::". The first `::`-segment + // is the project-relative source path; everything after is the + // scope chain. Resolve the file path against the project root and + // extract the function name from the trailing component. + if strings.Contains(detection.FunctionFQN, "::") { + segments := strings.SplitN(detection.FunctionFQN, "::", 2) + relFile := segments[0] + if relFile != "" { + candidate := relFile + if e.options.ProjectRoot != "" { + candidate = filepath.Join(e.options.ProjectRoot, relFile) + } + if _, err := os.Stat(candidate); err == nil { + loc.FilePath = candidate + loc.RelPath = relFile + if len(segments) > 1 { + tail := strings.Split(segments[1], "::") + loc.Function = tail[len(tail)-1] + if len(tail) >= 2 { + loc.ClassName = tail[len(tail)-2] + } + } + return loc + } + } + } + // Try to extract file path from FQN // Format: module.submodule.function or package.Class.method parts := strings.Split(detection.FunctionFQN, ".") @@ -183,8 +211,21 @@ func (e *Enricher) fallbackLocation(detection dsl.DataflowDetection) dsl.Locatio return loc } -// extractFunctionFromFQN extracts function name from fully qualified name. +// extractFunctionFromFQN extracts the bare function name from a fully +// qualified name. Two FQN shapes are supported: +// +// - Dot-separated (Python, Go, Java): "pkg.Mod.func" → "func" +// - C/C++ scope-resolved: "src/main.c::main", +// "src/utils.cpp::ns::Class::method" → "main", "method" +// +// The C/C++ form is detected by the presence of "::" — C/C++ FQNs +// always contain at least one because the prefix is itself a path +// segment joined to the symbol with "::". func extractFunctionFromFQN(fqn string) string { + if strings.Contains(fqn, "::") { + parts := strings.Split(fqn, "::") + return parts[len(parts)-1] + } parts := strings.Split(fqn, ".") if len(parts) > 0 { return parts[len(parts)-1] diff --git a/sast-engine/output/enricher_test.go b/sast-engine/output/enricher_test.go index d51656ab..4aaa03ea 100644 --- a/sast-engine/output/enricher_test.go +++ b/sast-engine/output/enricher_test.go @@ -62,11 +62,19 @@ func TestExtractFunctionFromFQN(t *testing.T) { fqn string expected string }{ + // Dot-separated (Python, Go, Java). {"myapp.auth.login", "login"}, {"package.Class.method", "method"}, {"singlename", "singlename"}, {"", ""}, {"a.b.c.d.e.f.g", "g"}, + {"github.com/shivasurya/app/handlers.Handle", "Handle"}, + // C/C++ scope-resolved FQNs. + {"src/main.c::main", "main"}, + {"src/buffer.c::create_buffer", "create_buffer"}, + {"src/utils.cpp::mylib::process", "process"}, + {"src/socket.cpp::mylib::Socket::connect", "connect"}, + {"src/app.cpp::App::run", "run"}, } for _, tt := range tests { @@ -515,6 +523,75 @@ func TestFallbackLocationFQNToJavaFileResolution(t *testing.T) { assert.Equal(t, filepath.Join("com", "example", "Main.java"), loc.RelPath) } +// TestFallbackLocationCFQN verifies that a C scope-resolved FQN like +// "src/main.c::main" is split correctly: the file is resolved against +// the project root, the function name is taken from the trailing +// segment, and there is no class component. +func TestFallbackLocationCFQN(t *testing.T) { + tmpDir := t.TempDir() + srcDir := filepath.Join(tmpDir, "src") + require.NoError(t, os.MkdirAll(srcDir, 0o755)) + mainC := filepath.Join(srcDir, "main.c") + require.NoError(t, os.WriteFile(mainC, []byte("int main(){}"), 0o644)) + + e := NewEnricher(nil, &OutputOptions{ProjectRoot: tmpDir}) + + detection := dsl.DataflowDetection{ + FunctionFQN: "src/main.c::main", + SinkLine: 1, + } + loc := e.fallbackLocation(detection) + + assert.Equal(t, mainC, loc.FilePath) + assert.Equal(t, "src/main.c", loc.RelPath) + assert.Equal(t, "main", loc.Function) + assert.Empty(t, loc.ClassName, "C FQN has no class component") +} + +// TestFallbackLocationCppFQN verifies that a C++ scope-resolved FQN +// like "src/socket.cpp::ns::Socket::connect" extracts the file path, +// the trailing function name (`connect`), and the immediate class +// component (`Socket`). +func TestFallbackLocationCppFQN(t *testing.T) { + tmpDir := t.TempDir() + srcDir := filepath.Join(tmpDir, "src") + require.NoError(t, os.MkdirAll(srcDir, 0o755)) + socketCpp := filepath.Join(srcDir, "socket.cpp") + require.NoError(t, os.WriteFile(socketCpp, []byte("// stub"), 0o644)) + + e := NewEnricher(nil, &OutputOptions{ProjectRoot: tmpDir}) + + detection := dsl.DataflowDetection{ + FunctionFQN: "src/socket.cpp::mylib::Socket::connect", + SinkLine: 7, + } + loc := e.fallbackLocation(detection) + + assert.Equal(t, socketCpp, loc.FilePath) + assert.Equal(t, "src/socket.cpp", loc.RelPath) + assert.Equal(t, "connect", loc.Function) + assert.Equal(t, "Socket", loc.ClassName) +} + +// TestFallbackLocationCFQN_MissingFile guards the case where the FQN +// is well-formed but the resolved file doesn't exist on disk: the +// fallback drops through to dot-based resolution rather than returning +// a phantom path. +func TestFallbackLocationCFQN_MissingFile(t *testing.T) { + tmpDir := t.TempDir() + e := NewEnricher(nil, &OutputOptions{ProjectRoot: tmpDir}) + + detection := dsl.DataflowDetection{ + FunctionFQN: "src/missing.c::ghost", + SinkLine: 3, + } + loc := e.fallbackLocation(detection) + + assert.Empty(t, loc.FilePath, "must not invent a file path") + // Function name still parsed from the FQN tail via the dot-split fallback. + assert.NotEmpty(t, loc.Function) +} + func TestFallbackLocationUnresolvableFQN(t *testing.T) { tmpDir := t.TempDir() e := NewEnricher(nil, &OutputOptions{ProjectRoot: tmpDir})