diff --git a/sast-engine/graph/callgraph/core/c_module_types.go b/sast-engine/graph/callgraph/core/c_module_types.go new file mode 100644 index 00000000..0c2e1675 --- /dev/null +++ b/sast-engine/graph/callgraph/core/c_module_types.go @@ -0,0 +1,118 @@ +package core + +// CModuleRegistry indexes C source files for call-graph construction. +// +// C has no module system: a translation unit's identity IS its file path. +// The registry therefore turns the project-relative file path into the +// "module prefix" used to compose fully-qualified names (FQNs), and +// records every function definition under that prefix so the call-graph +// builder can resolve cross-file references. +// +// Lifecycle: the registry is built once after parsing (see +// `registry.BuildCModuleRegistry`) and consumed read-only by the call-graph +// builder. It is not safe for concurrent mutation; callers should treat it +// as immutable after construction. +// +// FQN format: +// +// "::" +// +// "src/net/socket.c::connect_to_server" +// "include/buffer.h::create_buffer" +type CModuleRegistry struct { + // FileToPrefix maps absolute file path to its project-relative prefix. + // The prefix is the path used in every FQN derived from the file: + // + // "/home/dev/proj/src/net/socket.c" -> "src/net/socket.c" + // + // Files outside the project root are intentionally absent (the build + // step skips them) so consumers can rely on prefixes being relative. + FileToPrefix map[string]string + + // Includes maps a project-relative file path to the relative paths of + // the headers it includes via `#include "..."`. System includes + // (`#include <...>`) are excluded — they have no project-local file. + // + // "src/net/socket.c" -> ["include/buffer.h", "include/net.h"] + Includes map[string][]string + + // FunctionIndex maps a bare function name to every FQN that defines + // that name. A name may resolve to multiple FQNs when it is declared + // in a header and defined in a .c file, or when distinct translation + // units declare static helpers with the same name. + // + // "create_buffer" -> [ + // "src/utils/buffer.c::create_buffer", + // "include/buffer.h::create_buffer", + // ] + FunctionIndex map[string][]string + + // ProjectRoot is the absolute path used as the base for relative-path + // computation. Stored so consumers can re-derive prefixes for ad-hoc + // files (e.g. an include resolved at query time). + ProjectRoot string +} + +// NewCModuleRegistry returns an empty CModuleRegistry rooted at projectRoot. +// All maps are pre-allocated so callers may write directly without nil +// checks. +func NewCModuleRegistry(projectRoot string) *CModuleRegistry { + return &CModuleRegistry{ + FileToPrefix: make(map[string]string), + Includes: make(map[string][]string), + FunctionIndex: make(map[string][]string), + ProjectRoot: projectRoot, + } +} + +// CppModuleRegistry extends CModuleRegistry with namespace and class +// indices required for C++ resolution. C++ FQNs include the namespace +// path and, for methods, the enclosing class: +// +// "src/net/socket.cpp::mylib::Socket::connect" // namespace + class + method +// "src/main.cpp::main" // free function, no namespace +// "src/app.cpp::App::run" // class method, no namespace +// +// CppModuleRegistry embeds CModuleRegistry so all C-level lookups +// (FileToPrefix, Includes, FunctionIndex) work uniformly across both +// languages. +type CppModuleRegistry struct { + // CModuleRegistry provides the file-to-prefix, include, and function + // indices shared with C. Free functions appear in FunctionIndex; the + // namespace- and class-qualified forms below complement (not replace) + // it. + CModuleRegistry + + // NamespaceIndex maps a namespace-qualified key to its single + // canonical FQN. Keys take one of three forms: + // + // "mylib::process" // namespaced free function + // "Socket::connect" // class method, no namespace + // "mylib::Socket::connect" // namespaced class method + // + // The map deliberately holds one FQN per key (the most recent + // definition wins) — overload resolution happens later in the call + // graph builder once parameter types are available. + NamespaceIndex map[string]string + + // ClassIndex maps a bare class name to every FQN that declares the + // class. Multiple FQNs are expected for forward declarations or for + // classes declared in distinct namespaces sharing a name: + // + // "Socket" -> [ + // "src/net/socket.cpp::mylib::Socket", + // "include/socket.hpp::mylib::Socket", + // ] + ClassIndex map[string][]string +} + +// NewCppModuleRegistry returns an empty CppModuleRegistry rooted at +// projectRoot. The embedded CModuleRegistry is initialised with the same +// root, so all maps are non-nil. +func NewCppModuleRegistry(projectRoot string) *CppModuleRegistry { + return &CppModuleRegistry{ + CModuleRegistry: *NewCModuleRegistry(projectRoot), + NamespaceIndex: make(map[string]string), + ClassIndex: make(map[string][]string), + } +} diff --git a/sast-engine/graph/callgraph/core/c_module_types_test.go b/sast-engine/graph/callgraph/core/c_module_types_test.go new file mode 100644 index 00000000..2ff1b8ae --- /dev/null +++ b/sast-engine/graph/callgraph/core/c_module_types_test.go @@ -0,0 +1,52 @@ +package core_test + +import ( + "testing" + + "github.com/shivasurya/code-pathfinder/sast-engine/graph/callgraph/core" + "github.com/stretchr/testify/assert" +) + +func TestNewCModuleRegistry_AllocatesMaps(t *testing.T) { + root := "/projects/myapp" + reg := core.NewCModuleRegistry(root) + + assert.NotNil(t, reg, "registry must be non-nil") + assert.Equal(t, root, reg.ProjectRoot, "ProjectRoot must round-trip") + assert.NotNil(t, reg.FileToPrefix, "FileToPrefix must be allocated") + assert.NotNil(t, reg.Includes, "Includes must be allocated") + assert.NotNil(t, reg.FunctionIndex, "FunctionIndex must be allocated") + + // Maps must be writable without nil panics. + reg.FileToPrefix["/abs/foo.c"] = "foo.c" + reg.Includes["foo.c"] = []string{"bar.h"} + reg.FunctionIndex["foo"] = []string{"foo.c::foo"} + assert.Len(t, reg.FileToPrefix, 1) + assert.Len(t, reg.Includes, 1) + assert.Len(t, reg.FunctionIndex, 1) +} + +func TestNewCppModuleRegistry_AllocatesMaps(t *testing.T) { + root := "/projects/cppapp" + reg := core.NewCppModuleRegistry(root) + + assert.NotNil(t, reg) + assert.Equal(t, root, reg.ProjectRoot, "embedded ProjectRoot must round-trip") + + // Embedded C registry maps. + assert.NotNil(t, reg.FileToPrefix) + assert.NotNil(t, reg.Includes) + assert.NotNil(t, reg.FunctionIndex) + + // C++-specific maps. + assert.NotNil(t, reg.NamespaceIndex, "NamespaceIndex must be allocated") + assert.NotNil(t, reg.ClassIndex, "ClassIndex must be allocated") + + // Embedded fields are addressable through the outer registry. + reg.NamespaceIndex["mylib::process"] = "src/utils.cpp::mylib::process" + reg.ClassIndex["Socket"] = []string{"src/net/socket.cpp::mylib::Socket"} + reg.FunctionIndex["main"] = []string{"src/main.cpp::main"} + assert.Len(t, reg.NamespaceIndex, 1) + assert.Len(t, reg.ClassIndex, 1) + assert.Len(t, reg.FunctionIndex, 1) +} diff --git a/sast-engine/graph/callgraph/registry/c_module.go b/sast-engine/graph/callgraph/registry/c_module.go new file mode 100644 index 00000000..f1e39661 --- /dev/null +++ b/sast-engine/graph/callgraph/registry/c_module.go @@ -0,0 +1,457 @@ +package registry + +import ( + "os" + "path/filepath" + "slices" + "strings" + + "github.com/shivasurya/code-pathfinder/sast-engine/graph" + "github.com/shivasurya/code-pathfinder/sast-engine/graph/callgraph/core" +) + +// Node-type constants mirror the literals emitted by the C/C++ parsers in +// `graph/parser_c.go` and `graph/parser_cpp.go`. Keeping them as package- +// local constants avoids importing the parser package (which would create +// a cycle through `graph -> callgraph -> graph`) while still documenting +// the contract between the two packages. +const ( + cNodeFunctionDefinition = "function_definition" + cNodeMethodDeclaration = "method_declaration" + cNodeClassDeclaration = "class_declaration" + cNodeIncludeStatement = "include_statement" +) + +// Language tags emitted by the C/C++ parsers. These mirror the +// `languageC` / `languageCpp` constants in the parser package. +const ( + languageC = "c" + languageCpp = "cpp" +) + +// metaSystemInclude is the metadata key set on `include_statement` nodes +// to distinguish `#include <...>` from `#include "..."`. Defined in the +// parser package; duplicated here to avoid an import cycle. +const metaSystemInclude = "system_include" + +// fqnSeparator is the delimiter used between every FQN component for +// C/C++. It matches C++'s native scope-resolution operator and is also +// used for C even though C has no namespaces — keeping a single +// separator simplifies cross-language consumers. +const fqnSeparator = "::" + +// Conventional include search directories for `#include "..."` +// resolution. Order matters: the first matching path wins. +const ( + includeDirInclude = "include" + includeDirSrc = "src" +) + +// BuildCModuleRegistry walks a CodeGraph and produces a CModuleRegistry +// suitable for C call-graph construction. +// +// The function: +// +// 1. Records every distinct C source file in `FileToPrefix`, mapping +// absolute path to its project-relative form. Files outside +// projectPath (relative path beginning with `..`) are skipped — they +// cannot participate in project-local FQNs. +// 2. Indexes every `function_definition` node under its bare name, so +// `FunctionIndex["create_buffer"]` lists every FQN defining that +// symbol. +// 3. Resolves project-local `#include "..."` directives into a +// file -> [included files] map via BuildCIncludeMap. +// +// The registry is the read-only foundation used by the call-graph +// builder (PR-07) to compute caller/callee FQNs and follow header-to-source +// edges. +// +// Parameters: +// - projectPath: absolute path to the project root used as the FQN base. +// - codeGraph: parsed graph whose Nodes will be filtered by Language. +// +// Returns a fully-initialised, non-nil registry. An empty graph yields a +// registry with empty (but allocated) maps. +func BuildCModuleRegistry(projectPath string, codeGraph *graph.CodeGraph) *core.CModuleRegistry { + registry := core.NewCModuleRegistry(projectPath) + if codeGraph == nil { + return registry + } + + indexFilesAndFunctions(codeGraph, projectPath, languageC, registry, nil) + registry.Includes = BuildCIncludeMap(projectPath, codeGraph, languageC) + return registry +} + +// BuildCppModuleRegistry walks a CodeGraph and produces a +// CppModuleRegistry suitable for C++ call-graph construction. +// +// In addition to everything BuildCModuleRegistry does for C, this: +// +// 1. Builds a class lookup table from `class_declaration` nodes so +// methods can be qualified with their enclosing class name. +// 2. Indexes free functions whose `PackageName` carries a namespace +// under `NamespaceIndex["ns::funcname"]`. +// 3. Indexes class methods (whether parsed as `method_declaration` or +// `function_definition` whose body is inside a class) under +// `NamespaceIndex["[ns::]Class::method"]` and records the class +// itself in `ClassIndex`. +// +// Method-to-class association uses byte-range containment within the +// same file: a method is associated with the innermost class whose +// `[StartByte, EndByte]` range encloses the method's start byte. This +// keeps the registry independent of parser-internal context tracking. +func BuildCppModuleRegistry(projectPath string, codeGraph *graph.CodeGraph) *core.CppModuleRegistry { + registry := core.NewCppModuleRegistry(projectPath) + if codeGraph == nil { + return registry + } + + classes := collectCppClasses(codeGraph, projectPath, registry) + indexFilesAndFunctions(codeGraph, projectPath, languageCpp, ®istry.CModuleRegistry, func(node *graph.Node, prefix string) { + switch node.Type { + case cNodeFunctionDefinition: + indexCppFreeFunction(node, classes, prefix, registry) + case cNodeMethodDeclaration: + indexCppMethod(node, classes, prefix, registry) + } + }) + + registry.Includes = BuildCIncludeMap(projectPath, codeGraph, languageCpp) + return registry +} + +// BuildCIncludeMap resolves project-local `#include "..."` directives in +// a CodeGraph into a relative-path-keyed map. +// +// For every `include_statement` node whose Language matches the language +// argument and whose `system_include` metadata is false (i.e. quoted +// includes), the function searches a fixed set of project directories +// for the named header. When found, both the source and resolved file +// are stored as project-relative paths in the result. +// +// Search order (first match wins): +// +// 1. The directory containing the source file. +// 2. `/include/
` +// 3. `/src/
` +// 4. `/
` +// +// System includes (`#include <...>`) are intentionally skipped — they +// are resolved later by a stdlib registry. Headers that cannot be +// located are silently dropped: a missing file is recorded by the +// parser as an include statement but contributes nothing to call-graph +// construction. +// +// The function never returns nil; an empty map signals "no resolvable +// project-local includes" rather than "registry not built". +func BuildCIncludeMap(projectPath string, codeGraph *graph.CodeGraph, language string) map[string][]string { + includes := make(map[string][]string) + if codeGraph == nil { + return includes + } + + for _, node := range codeGraph.Nodes { + if !isProjectInclude(node, language) { + continue + } + resolved := resolveLocalInclude(projectPath, node.File, node.Name) + if resolved == "" { + continue + } + relSource, ok := relativeProjectPath(projectPath, node.File) + if !ok { + continue + } + relResolved, ok := relativeProjectPath(projectPath, resolved) + if !ok { + continue + } + includes[relSource] = appendUnique(includes[relSource], relResolved) + } + return includes +} + +// indexFilesAndFunctions populates FileToPrefix and FunctionIndex on the +// supplied CModuleRegistry. The optional onFunction callback fires for +// every indexed function node with the file's project-relative prefix +// — C++ uses it to compose namespace- and class-qualified FQNs without +// re-walking the graph. +func indexFilesAndFunctions( + codeGraph *graph.CodeGraph, + projectPath, language string, + registry *core.CModuleRegistry, + onFunction func(node *graph.Node, prefix string), +) { + for _, node := range codeGraph.Nodes { + if node == nil || node.Language != language || node.File == "" { + continue + } + prefix, ok := ensureFilePrefix(registry, node.File, projectPath) + if !ok { + continue + } + if !isFunctionLikeNode(node) || node.Name == "" { + continue + } + + // Free functions go straight into FunctionIndex under the bare + // "prefix::name" form. C++ methods (Type=="method_declaration") + // are NOT recorded here because they are reachable only via + // NamespaceIndex; mixing them in would mask overload resolution + // downstream. + if node.Type == cNodeFunctionDefinition { + fqn := joinFQN(prefix, node.Name) + registry.FunctionIndex[node.Name] = appendUnique(registry.FunctionIndex[node.Name], fqn) + } + if onFunction != nil { + onFunction(node, prefix) + } + } +} + +// ensureFilePrefix records node.File in registry.FileToPrefix on first +// sight and returns the resulting prefix. Files that fall outside +// projectPath (relative path begins with `..`) are skipped: their +// (false, false) return tells the caller to drop the node. Already-seen +// files return their cached prefix. +func ensureFilePrefix(registry *core.CModuleRegistry, file, projectPath string) (string, bool) { + if prefix, seen := registry.FileToPrefix[file]; seen { + return prefix, true + } + rel, ok := relativeProjectPath(projectPath, file) + if !ok { + return "", false + } + registry.FileToPrefix[file] = rel + return rel, true +} + +// isFunctionLikeNode is true for graph node types that contribute a +// function-shaped entry to the registry. Free functions and method +// declarations are both function-like; class declarations are not. +func isFunctionLikeNode(node *graph.Node) bool { + switch node.Type { + case cNodeFunctionDefinition, cNodeMethodDeclaration: + return true + } + return false +} + +// isProjectInclude reports whether node is a quoted `#include "..."` +// for the given language. System includes (`#include <...>`) and nodes +// of other types are excluded. +func isProjectInclude(node *graph.Node, language string) bool { + if node == nil || node.Language != language || node.Type != cNodeIncludeStatement { + return false + } + if node.Name == "" || node.File == "" { + return false + } + if v, ok := node.Metadata[metaSystemInclude].(bool); ok && v { + return false + } + return true +} + +// resolveLocalInclude searches the conventional project directories for +// a header named headerName and returns the first absolute path that +// exists, or "" when none match. Search order is documented on +// BuildCIncludeMap. +func resolveLocalInclude(projectRoot, sourceFile, headerName string) string { + if headerName == "" { + return "" + } + searchDirs := []string{ + filepath.Dir(sourceFile), + filepath.Join(projectRoot, includeDirInclude), + filepath.Join(projectRoot, includeDirSrc), + projectRoot, + } + for _, dir := range searchDirs { + if dir == "" { + continue + } + candidate := filepath.Join(dir, headerName) + if info, err := os.Stat(candidate); err == nil && !info.IsDir() { + return candidate + } + } + return "" +} + +// cppClassEntry caches the byte range of a single C++ class_declaration +// node so methods declared inside it can be associated by containment. +// Caching avoids quadratic re-scans of the graph during method indexing. +type cppClassEntry struct { + name string + packageName string + startByte uint32 + endByte uint32 + fqn string +} + +// collectCppClasses walks the graph for class_declaration nodes, +// records each in registry.ClassIndex, and returns a per-file index +// used to associate methods with their enclosing class. +// +// Anonymous classes (Name=="") and classes whose source location is +// missing are skipped — neither can provide a meaningful FQN component. +func collectCppClasses( + codeGraph *graph.CodeGraph, + projectPath string, + registry *core.CppModuleRegistry, +) map[string][]cppClassEntry { + classes := make(map[string][]cppClassEntry) + for _, node := range codeGraph.Nodes { + if node == nil || node.Language != languageCpp || node.Type != cNodeClassDeclaration { + continue + } + if node.Name == "" || node.File == "" || node.SourceLocation == nil { + continue + } + prefix, ok := ensureFilePrefix(®istry.CModuleRegistry, node.File, projectPath) + if !ok { + continue + } + fqn := joinFQN(prefix, joinScope(node.PackageName, node.Name)) + registry.ClassIndex[node.Name] = appendUnique(registry.ClassIndex[node.Name], fqn) + classes[node.File] = append(classes[node.File], cppClassEntry{ + name: node.Name, + packageName: node.PackageName, + startByte: node.SourceLocation.StartByte, + endByte: node.SourceLocation.EndByte, + fqn: fqn, + }) + } + return classes +} + +// indexCppFreeFunction records a free C++ function in NamespaceIndex when +// it carries a namespace, OR associates it with an enclosing class when +// the function is defined inside one (out-of-line `Class::method` bodies +// land here as function_definition). +// +// The prefix argument is the file's project-relative path; the function +// composes the qualified FQN as "prefix::ns::[Class::]name", omitting +// empty scope components. +func indexCppFreeFunction( + node *graph.Node, + classes map[string][]cppClassEntry, + prefix string, + registry *core.CppModuleRegistry, +) { + if cls := enclosingClass(node, classes); cls != nil { + key := joinScope(cls.packageName, cls.name, node.Name) + fqn := joinFQN(prefix, key) + registry.NamespaceIndex[key] = fqn + return + } + if node.PackageName == "" { + return + } + key := joinScope(node.PackageName, node.Name) + registry.NamespaceIndex[key] = joinFQN(prefix, key) +} + +// indexCppMethod records a method_declaration in NamespaceIndex under +// its qualified key. Methods are always emitted inside a class context +// (either inline-in-class or via the destructor handler), so we look up +// the enclosing class by byte-range containment and fall back to +// PackageName-only qualification if no class is found (defensive — this +// should not happen for well-formed input). +func indexCppMethod( + node *graph.Node, + classes map[string][]cppClassEntry, + prefix string, + registry *core.CppModuleRegistry, +) { + if cls := enclosingClass(node, classes); cls != nil { + key := joinScope(cls.packageName, cls.name, node.Name) + registry.NamespaceIndex[key] = joinFQN(prefix, key) + return + } + if node.PackageName == "" { + return + } + key := joinScope(node.PackageName, node.Name) + registry.NamespaceIndex[key] = joinFQN(prefix, key) +} + +// enclosingClass returns the cppClassEntry whose byte range encloses +// node.SourceLocation.StartByte within the same file, picking the +// innermost (smallest range) class when classes nest. Returns nil when +// no class contains the node — used by free-function indexing. +func enclosingClass(node *graph.Node, classes map[string][]cppClassEntry) *cppClassEntry { + if node == nil || node.SourceLocation == nil { + return nil + } + candidates := classes[node.File] + if len(candidates) == 0 { + return nil + } + pos := node.SourceLocation.StartByte + var best *cppClassEntry + for i := range candidates { + c := &candidates[i] + if pos < c.startByte || pos >= c.endByte { + continue + } + if best == nil || (c.endByte-c.startByte) < (best.endByte-best.startByte) { + best = c + } + } + return best +} + +// joinFQN composes a top-level FQN from a file prefix and a tail scope. +// The tail is expected to already be `::`-joined when it represents +// nested scopes (e.g. "mylib::Socket::connect"). Both arguments are +// non-empty in every call site within this package — see +// indexFilesAndFunctions and indexCppMethod. +func joinFQN(prefix, tail string) string { + return prefix + fqnSeparator + tail +} + +// joinScope joins non-empty scope components with `::`. Empty entries +// are dropped so callers can pass node.PackageName unconditionally +// without worrying about double-separators when there is no namespace. +func joinScope(parts ...string) string { + out := make([]string, 0, len(parts)) + for _, p := range parts { + if p == "" { + continue + } + out = append(out, p) + } + return strings.Join(out, fqnSeparator) +} + +// relativeProjectPath returns filepath.Rel(projectPath, file) with +// outside-project paths (`../...`) treated as absent. The boolean +// signals whether the result is project-relative; callers that get +// false should drop the node. +// +// On Linux the comparison is case-sensitive, matching the underlying +// filesystem; on Windows/macOS the OS-level case-insensitivity of +// filepath.Rel still applies. +func relativeProjectPath(projectPath, file string) (string, bool) { + rel, err := filepath.Rel(projectPath, file) + if err != nil { + return "", false + } + if rel == "." || rel == ".." || strings.HasPrefix(rel, ".."+string(filepath.Separator)) { + return "", false + } + return filepath.ToSlash(rel), true +} + +// appendUnique adds value to slice unless it is already present. O(n) +// per call — acceptable for the small per-key slice sizes we expect +// (most function names map to a single FQN). +func appendUnique(slice []string, value string) []string { + if slices.Contains(slice, value) { + return slice + } + return append(slice, value) +} diff --git a/sast-engine/graph/callgraph/registry/c_module_test.go b/sast-engine/graph/callgraph/registry/c_module_test.go new file mode 100644 index 00000000..3e20d351 --- /dev/null +++ b/sast-engine/graph/callgraph/registry/c_module_test.go @@ -0,0 +1,563 @@ +package registry_test + +import ( + "os" + "path/filepath" + "testing" + + "github.com/shivasurya/code-pathfinder/sast-engine/graph" + "github.com/shivasurya/code-pathfinder/sast-engine/graph/callgraph/registry" + "github.com/stretchr/testify/assert" + "github.com/stretchr/testify/require" +) + +// makeNode returns a *graph.Node populated with the fields used by the +// module-registry build path. Tests build small CodeGraphs from these +// nodes rather than invoking the parser, so the test stays focused on +// registry behaviour and is independent of tree-sitter. +func makeNode(t *testing.T, n graph.Node) *graph.Node { + t.Helper() + if n.ID == "" { + n.ID = n.Type + ":" + n.Name + "@" + n.File + } + if n.SourceLocation == nil { + n.SourceLocation = &graph.SourceLocation{File: n.File} + } + return &n +} + +func newGraphFromNodes(nodes ...*graph.Node) *graph.CodeGraph { + cg := graph.NewCodeGraph() + for _, n := range nodes { + cg.AddNode(n) + } + return cg +} + +// TestBuildCModuleRegistry_FilesAndFunctions verifies that BuildCModuleRegistry +// (a) maps every distinct C source file to its project-relative prefix and +// (b) indexes every function_definition node under its bare name with the +// correct "relpath::funcname" FQN format. +func TestBuildCModuleRegistry_FilesAndFunctions(t *testing.T) { + root := "/projects/myapp" + + main := makeNode(t, graph.Node{Type: "function_definition", Name: "main", Language: "c", File: root + "/src/main.c"}) + helper := makeNode(t, graph.Node{Type: "function_definition", Name: "helper", Language: "c", File: root + "/src/main.c"}) + createBuf := makeNode(t, graph.Node{Type: "function_definition", Name: "create_buffer", Language: "c", File: root + "/src/buffer.c"}) + freeBuf := makeNode(t, graph.Node{Type: "function_definition", Name: "free_buffer", Language: "c", File: root + "/src/buffer.c"}) + process := makeNode(t, graph.Node{Type: "function_definition", Name: "process", Language: "c", File: root + "/src/util.c"}) + + // Decoy nodes to confirm filtering: wrong language, wrong type, empty file/name. + pyFunc := makeNode(t, graph.Node{Type: "function_definition", Name: "ignored", Language: "python", File: root + "/lib/x.py"}) + cppFunc := makeNode(t, graph.Node{Type: "function_definition", Name: "ignored_cpp", Language: "cpp", File: root + "/src/x.cpp"}) + emptyName := makeNode(t, graph.Node{Type: "function_definition", Name: "", Language: "c", File: root + "/src/empty.c"}) + emptyFile := makeNode(t, graph.Node{Type: "function_definition", Name: "no_file", Language: "c", File: ""}) + + cg := newGraphFromNodes(main, helper, createBuf, freeBuf, process, pyFunc, cppFunc, emptyName, emptyFile) + reg := registry.BuildCModuleRegistry(root, cg) + + require.NotNil(t, reg) + + // emptyFile is dropped (no file), but emptyName still seeds FileToPrefix + // because its file path is valid — this matches the parser contract + // where an unnamed declaration can still anchor its translation unit. + assert.Equal(t, "src/main.c", reg.FileToPrefix[root+"/src/main.c"]) + assert.Equal(t, "src/buffer.c", reg.FileToPrefix[root+"/src/buffer.c"]) + assert.Equal(t, "src/util.c", reg.FileToPrefix[root+"/src/util.c"]) + assert.Equal(t, "src/empty.c", reg.FileToPrefix[root+"/src/empty.c"]) + assert.NotContains(t, reg.FileToPrefix, root+"/lib/x.py", "python files must not appear") + assert.NotContains(t, reg.FileToPrefix, root+"/src/x.cpp", "cpp files must not appear in C registry") + + assert.ElementsMatch(t, []string{"src/main.c::main"}, reg.FunctionIndex["main"]) + assert.ElementsMatch(t, []string{"src/main.c::helper"}, reg.FunctionIndex["helper"]) + assert.ElementsMatch(t, []string{"src/buffer.c::create_buffer"}, reg.FunctionIndex["create_buffer"]) + assert.ElementsMatch(t, []string{"src/buffer.c::free_buffer"}, reg.FunctionIndex["free_buffer"]) + assert.ElementsMatch(t, []string{"src/util.c::process"}, reg.FunctionIndex["process"]) + assert.NotContains(t, reg.FunctionIndex, "ignored") + assert.NotContains(t, reg.FunctionIndex, "ignored_cpp") +} + +// TestBuildCModuleRegistry_DuplicateFunctionAcrossFiles verifies that a +// function with the same name in both a header (declaration) and a source +// file (definition) produces TWO FQN entries in FunctionIndex — the +// registry deliberately preserves duplicates so the call-graph builder +// can choose between header and source. +func TestBuildCModuleRegistry_DuplicateFunctionAcrossFiles(t *testing.T) { + root := "/projects/myapp" + header := makeNode(t, graph.Node{Type: "function_definition", Name: "create_buffer", Language: "c", File: root + "/include/buffer.h"}) + source := makeNode(t, graph.Node{Type: "function_definition", Name: "create_buffer", Language: "c", File: root + "/src/buffer.c"}) + + reg := registry.BuildCModuleRegistry(root, newGraphFromNodes(header, source)) + assert.ElementsMatch(t, + []string{"include/buffer.h::create_buffer", "src/buffer.c::create_buffer"}, + reg.FunctionIndex["create_buffer"], + ) +} + +// TestBuildCModuleRegistry_DuplicateFunctionSameFile guards against the +// most common bug in indexes of this shape: the same function visited +// twice should not appear twice in FunctionIndex. +func TestBuildCModuleRegistry_DuplicateFunctionSameFile(t *testing.T) { + root := "/projects/myapp" + a := makeNode(t, graph.Node{ID: "fdup-1", Type: "function_definition", Name: "init", Language: "c", File: root + "/src/init.c"}) + b := makeNode(t, graph.Node{ID: "fdup-2", Type: "function_definition", Name: "init", Language: "c", File: root + "/src/init.c"}) + + reg := registry.BuildCModuleRegistry(root, newGraphFromNodes(a, b)) + assert.ElementsMatch(t, []string{"src/init.c::init"}, reg.FunctionIndex["init"], + "same file + same name must dedupe to one FQN") +} + +// TestBuildCModuleRegistry_OutsideProjectRoot verifies that files which +// resolve to a `..`-prefixed relative path (i.e. outside the project) +// are skipped entirely. +func TestBuildCModuleRegistry_OutsideProjectRoot(t *testing.T) { + root := "/projects/myapp" + outside := makeNode(t, graph.Node{Type: "function_definition", Name: "external", Language: "c", File: "/projects/other/src/x.c"}) + inside := makeNode(t, graph.Node{Type: "function_definition", Name: "main", Language: "c", File: root + "/src/main.c"}) + + reg := registry.BuildCModuleRegistry(root, newGraphFromNodes(outside, inside)) + assert.NotContains(t, reg.FunctionIndex, "external") + assert.NotContains(t, reg.FileToPrefix, "/projects/other/src/x.c") + assert.Contains(t, reg.FunctionIndex, "main") +} + +// TestBuildCModuleRegistry_EmptyGraph confirms an empty CodeGraph yields +// a non-nil registry with empty (but allocated) maps. +func TestBuildCModuleRegistry_EmptyGraph(t *testing.T) { + reg := registry.BuildCModuleRegistry("/projects/empty", graph.NewCodeGraph()) + require.NotNil(t, reg) + assert.Empty(t, reg.FileToPrefix) + assert.Empty(t, reg.Includes) + assert.Empty(t, reg.FunctionIndex) + // nil-safety: a nil graph must not panic. + reg2 := registry.BuildCModuleRegistry("/projects/nil", nil) + require.NotNil(t, reg2) + assert.Empty(t, reg2.FileToPrefix) +} + +// TestBuildCppModuleRegistry_NamespaceAndClassIndex verifies the C++ +// extension: free functions in a namespace land in NamespaceIndex, +// classes land in ClassIndex, and methods inside a class are indexed by +// "ns::Class::method". +func TestBuildCppModuleRegistry_NamespaceAndClassIndex(t *testing.T) { + root := "/projects/cppapp" + srcCpp := root + "/src/socket.cpp" + + // Class spans bytes [100, 400] in socket.cpp, namespace = mylib. + socketClass := makeNode(t, graph.Node{ + Type: "class_declaration", + Name: "Socket", + Language: "cpp", + File: srcCpp, + PackageName: "mylib", + SourceLocation: &graph.SourceLocation{File: srcCpp, StartByte: 100, EndByte: 400}, + }) + // Method "connect" inside the class (StartByte 150 ∈ [100,400]). + connect := makeNode(t, graph.Node{ + Type: "method_declaration", + Name: "connect", + Language: "cpp", + File: srcCpp, + PackageName: "mylib", + SourceLocation: &graph.SourceLocation{File: srcCpp, StartByte: 150, EndByte: 200}, + }) + // Free function "process" in namespace mylib, outside the class. + process := makeNode(t, graph.Node{ + Type: "function_definition", + Name: "process", + Language: "cpp", + File: root + "/src/utils.cpp", + PackageName: "mylib", + SourceLocation: &graph.SourceLocation{File: root + "/src/utils.cpp", StartByte: 0, EndByte: 50}, + }) + // Free function "main" with NO namespace and NO class — must NOT add + // double-colon prefixes (the regression covered by case #9). + main := makeNode(t, graph.Node{ + Type: "function_definition", + Name: "main", + Language: "cpp", + File: root + "/src/main.cpp", + SourceLocation: &graph.SourceLocation{File: root + "/src/main.cpp", StartByte: 0, EndByte: 30}, + }) + + reg := registry.BuildCppModuleRegistry(root, newGraphFromNodes(socketClass, connect, process, main)) + require.NotNil(t, reg) + + // FunctionIndex includes free functions only; methods do not appear here. + assert.ElementsMatch(t, []string{"src/utils.cpp::process"}, reg.FunctionIndex["process"]) + assert.ElementsMatch(t, []string{"src/main.cpp::main"}, reg.FunctionIndex["main"]) + assert.NotContains(t, reg.FunctionIndex, "connect", "methods must not appear in FunctionIndex") + + // NamespaceIndex has one entry per qualified key. + assert.Equal(t, "src/utils.cpp::mylib::process", reg.NamespaceIndex["mylib::process"]) + assert.Equal(t, "src/socket.cpp::mylib::Socket::connect", reg.NamespaceIndex["mylib::Socket::connect"]) + assert.NotContains(t, reg.NamespaceIndex, "main", "free function with no namespace must not enter NamespaceIndex") + + // ClassIndex maps bare class name to FQN(s). + assert.ElementsMatch(t, []string{"src/socket.cpp::mylib::Socket"}, reg.ClassIndex["Socket"]) +} + +// TestBuildCppModuleRegistry_ClassMethodWithoutNamespace covers the +// "class method, no namespace" FQN form: the method key must be +// "Class::method" and the FQN must NOT begin with a leading "::". +func TestBuildCppModuleRegistry_ClassMethodWithoutNamespace(t *testing.T) { + root := "/projects/cppapp" + src := root + "/src/app.cpp" + + appClass := makeNode(t, graph.Node{ + Type: "class_declaration", + Name: "App", + Language: "cpp", + File: src, + PackageName: "", + SourceLocation: &graph.SourceLocation{File: src, StartByte: 0, EndByte: 200}, + }) + run := makeNode(t, graph.Node{ + Type: "method_declaration", + Name: "run", + Language: "cpp", + File: src, + PackageName: "", + SourceLocation: &graph.SourceLocation{File: src, StartByte: 50, EndByte: 100}, + }) + + reg := registry.BuildCppModuleRegistry(root, newGraphFromNodes(appClass, run)) + + assert.Equal(t, "src/app.cpp::App::run", reg.NamespaceIndex["App::run"]) + assert.NotContains(t, reg.NamespaceIndex, "::App::run", "no leading separator") + assert.Equal(t, []string{"src/app.cpp::App"}, reg.ClassIndex["App"]) +} + +// TestBuildCIncludeMap_LocalAndSystemIncludes spans every important +// include-resolution branch on a real on-disk layout: +// - A header in include/ resolved by directory #2. +// - A header in the same directory as the source resolved by #1 +// (and shadowing an alternative match in include/). +// - A header in src/ resolved by #3. +// - A header at the project root resolved by #4. +// - A system include skipped. +// - A missing header silently dropped. +func TestBuildCIncludeMap_LocalAndSystemIncludes(t *testing.T) { + root := t.TempDir() + + // Layout: + // /src/main.c + // /src/local.h (same-dir resolution, shadows include/local.h) + // /include/local.h (would otherwise win for "local.h") + // /include/utils.h (resolved by #2) + // /src/extras.h (resolved by #3) + // /version.h (resolved by #4) + srcDir := filepath.Join(root, "src") + includeDir := filepath.Join(root, "include") + require.NoError(t, os.MkdirAll(srcDir, 0o755)) + require.NoError(t, os.MkdirAll(includeDir, 0o755)) + for _, p := range []string{ + filepath.Join(srcDir, "main.c"), + filepath.Join(srcDir, "local.h"), + filepath.Join(includeDir, "local.h"), + filepath.Join(includeDir, "utils.h"), + filepath.Join(srcDir, "extras.h"), + filepath.Join(root, "version.h"), + } { + require.NoError(t, os.WriteFile(p, []byte(""), 0o644)) + } + + mainC := filepath.Join(srcDir, "main.c") + cg := newGraphFromNodes( + makeNode(t, graph.Node{ + Type: "include_statement", Name: "local.h", + Language: "c", File: mainC, + Metadata: map[string]any{"system_include": false}, + }), + makeNode(t, graph.Node{ + Type: "include_statement", Name: "utils.h", + Language: "c", File: mainC, + Metadata: map[string]any{"system_include": false}, + }), + makeNode(t, graph.Node{ + Type: "include_statement", Name: "extras.h", + Language: "c", File: mainC, + Metadata: map[string]any{"system_include": false}, + }), + makeNode(t, graph.Node{ + Type: "include_statement", Name: "version.h", + Language: "c", File: mainC, + Metadata: map[string]any{"system_include": false}, + }), + makeNode(t, graph.Node{ + Type: "include_statement", Name: "stdio.h", + Language: "c", File: mainC, + Metadata: map[string]any{"system_include": true}, + }), + makeNode(t, graph.Node{ + Type: "include_statement", Name: "missing.h", + Language: "c", File: mainC, + Metadata: map[string]any{"system_include": false}, + }), + ) + + includes := registry.BuildCIncludeMap(root, cg, "c") + assert.ElementsMatch(t, + []string{"src/local.h", "include/utils.h", "src/extras.h", "version.h"}, + includes["src/main.c"], + ) + // stdio.h must not appear; missing.h must not appear. + for _, v := range includes["src/main.c"] { + assert.NotEqual(t, "stdio.h", v) + assert.NotEqual(t, "missing.h", v) + } +} + +// TestBuildCIncludeMap_AppearsInRegistry confirms BuildCModuleRegistry +// wires Includes through to the produced registry (not just the +// standalone helper). This is the integration that PR-07 will actually +// consume. +func TestBuildCIncludeMap_AppearsInRegistry(t *testing.T) { + root := t.TempDir() + srcDir := filepath.Join(root, "src") + require.NoError(t, os.MkdirAll(srcDir, 0o755)) + require.NoError(t, os.WriteFile(filepath.Join(srcDir, "buddy.h"), []byte(""), 0o644)) + + mainC := filepath.Join(srcDir, "main.c") + require.NoError(t, os.WriteFile(mainC, []byte(""), 0o644)) + + cg := newGraphFromNodes( + makeNode(t, graph.Node{Type: "function_definition", Name: "main", Language: "c", File: mainC}), + makeNode(t, graph.Node{ + Type: "include_statement", Name: "buddy.h", + Language: "c", File: mainC, + Metadata: map[string]any{"system_include": false}, + }), + ) + + reg := registry.BuildCModuleRegistry(root, cg) + assert.ElementsMatch(t, []string{"src/buddy.h"}, reg.Includes["src/main.c"]) +} + +// TestBuildCIncludeMap_MissingMetadataTreatedAsLocal verifies behaviour +// when an include node lacks the `system_include` metadata entry: the +// function must default to project-local resolution rather than crash +// or skip silently. +func TestBuildCIncludeMap_MissingMetadataTreatedAsLocal(t *testing.T) { + root := t.TempDir() + srcDir := filepath.Join(root, "src") + require.NoError(t, os.MkdirAll(srcDir, 0o755)) + require.NoError(t, os.WriteFile(filepath.Join(srcDir, "neighbour.h"), []byte(""), 0o644)) + + mainC := filepath.Join(srcDir, "main.c") + require.NoError(t, os.WriteFile(mainC, []byte(""), 0o644)) + + cg := newGraphFromNodes(makeNode(t, graph.Node{ + Type: "include_statement", Name: "neighbour.h", + Language: "c", File: mainC, + // No Metadata at all. + })) + + includes := registry.BuildCIncludeMap(root, cg, "c") + assert.ElementsMatch(t, []string{"src/neighbour.h"}, includes["src/main.c"]) +} + +// TestBuildCIncludeMap_LanguageFilter confirms BuildCIncludeMap honours +// the language argument: a C++ include statement must NOT appear when +// the registry is built for C, and vice versa. +func TestBuildCIncludeMap_LanguageFilter(t *testing.T) { + root := t.TempDir() + srcDir := filepath.Join(root, "src") + require.NoError(t, os.MkdirAll(srcDir, 0o755)) + require.NoError(t, os.WriteFile(filepath.Join(srcDir, "shared.h"), []byte(""), 0o644)) + + cFile := filepath.Join(srcDir, "main.c") + cppFile := filepath.Join(srcDir, "main.cpp") + require.NoError(t, os.WriteFile(cFile, []byte(""), 0o644)) + require.NoError(t, os.WriteFile(cppFile, []byte(""), 0o644)) + + cg := newGraphFromNodes( + makeNode(t, graph.Node{ + Type: "include_statement", Name: "shared.h", + Language: "c", File: cFile, + Metadata: map[string]any{"system_include": false}, + }), + makeNode(t, graph.Node{ + Type: "include_statement", Name: "shared.h", + Language: "cpp", File: cppFile, + Metadata: map[string]any{"system_include": false}, + }), + ) + + cIncludes := registry.BuildCIncludeMap(root, cg, "c") + cppIncludes := registry.BuildCIncludeMap(root, cg, "cpp") + + assert.Contains(t, cIncludes, "src/main.c") + assert.NotContains(t, cIncludes, "src/main.cpp") + assert.Contains(t, cppIncludes, "src/main.cpp") + assert.NotContains(t, cppIncludes, "src/main.c") +} + +// TestBuildCppModuleRegistry_OutOfLineMethodBody covers a less-common +// shape: a function_definition whose byte range sits inside a class +// body. This is what tree-sitter produces for `void Foo::bar() {...}` +// defined at file scope but enclosed in a class block (e.g. inline +// header definitions). The registry must associate the function with +// its enclosing class and emit "Class::method" in NamespaceIndex. +func TestBuildCppModuleRegistry_OutOfLineMethodBody(t *testing.T) { + root := "/projects/cppapp" + src := root + "/include/widget.hpp" + + widgetClass := makeNode(t, graph.Node{ + Type: "class_declaration", + Name: "Widget", + Language: "cpp", + File: src, + PackageName: "ui", + SourceLocation: &graph.SourceLocation{File: src, StartByte: 0, EndByte: 500}, + }) + render := makeNode(t, graph.Node{ + Type: "function_definition", + Name: "render", + Language: "cpp", + File: src, + PackageName: "ui", + SourceLocation: &graph.SourceLocation{File: src, StartByte: 200, EndByte: 300}, + }) + + reg := registry.BuildCppModuleRegistry(root, newGraphFromNodes(widgetClass, render)) + assert.Equal(t, "include/widget.hpp::ui::Widget::render", reg.NamespaceIndex["ui::Widget::render"]) + // The function still appears in FunctionIndex (free-function form) + // because the registry preserves both views — call-graph resolution + // can pick whichever fits. + assert.ElementsMatch(t, []string{"include/widget.hpp::render"}, reg.FunctionIndex["render"]) +} + +// TestBuildCppModuleRegistry_FreeFunctionNoNamespaceNoClass verifies +// that a top-level C++ function with neither a namespace nor an +// enclosing class is recorded in FunctionIndex but NOT in +// NamespaceIndex (it has no qualified key to register under). +func TestBuildCppModuleRegistry_FreeFunctionNoNamespaceNoClass(t *testing.T) { + root := "/projects/cppapp" + src := root + "/src/standalone.cpp" + + helper := makeNode(t, graph.Node{ + Type: "function_definition", + Name: "helper", + Language: "cpp", + File: src, + SourceLocation: &graph.SourceLocation{File: src, StartByte: 10, EndByte: 50}, + }) + + reg := registry.BuildCppModuleRegistry(root, newGraphFromNodes(helper)) + assert.ElementsMatch(t, []string{"src/standalone.cpp::helper"}, reg.FunctionIndex["helper"]) + assert.Empty(t, reg.NamespaceIndex, "no namespace + no class => no NamespaceIndex entry") +} + +// TestBuildCppModuleRegistry_OrphanMethodWithNamespace covers the +// defensive fallback in indexCppMethod: a method_declaration that has a +// PackageName but no enclosing class node in the graph (this should not +// happen for well-formed input, but the registry must not drop it). +func TestBuildCppModuleRegistry_OrphanMethodWithNamespace(t *testing.T) { + root := "/projects/cppapp" + src := root + "/src/orphan.cpp" + + orphan := makeNode(t, graph.Node{ + Type: "method_declaration", + Name: "lonely", + Language: "cpp", + File: src, + PackageName: "ghost", + SourceLocation: &graph.SourceLocation{File: src, StartByte: 0, EndByte: 30}, + }) + + reg := registry.BuildCppModuleRegistry(root, newGraphFromNodes(orphan)) + assert.Equal(t, "src/orphan.cpp::ghost::lonely", reg.NamespaceIndex["ghost::lonely"]) +} + +// TestBuildCppModuleRegistry_OrphanMethodNoNamespace checks the other +// branch of the defensive fallback: a method with neither a class nor a +// PackageName must be skipped from NamespaceIndex entirely. +func TestBuildCppModuleRegistry_OrphanMethodNoNamespace(t *testing.T) { + root := "/projects/cppapp" + src := root + "/src/orphan.cpp" + + orphan := makeNode(t, graph.Node{ + Type: "method_declaration", + Name: "stray", + Language: "cpp", + File: src, + SourceLocation: &graph.SourceLocation{File: src, StartByte: 0, EndByte: 30}, + }) + + reg := registry.BuildCppModuleRegistry(root, newGraphFromNodes(orphan)) + assert.Empty(t, reg.NamespaceIndex) +} + +// TestBuildCIncludeMap_EmptyHeaderName guards against include nodes +// with an empty Name field. The parser should never emit one, but the +// resolver must not stat("") and must not pollute the map. +func TestBuildCIncludeMap_EmptyHeaderName(t *testing.T) { + root := t.TempDir() + srcDir := filepath.Join(root, "src") + require.NoError(t, os.MkdirAll(srcDir, 0o755)) + mainC := filepath.Join(srcDir, "main.c") + require.NoError(t, os.WriteFile(mainC, []byte(""), 0o644)) + + cg := newGraphFromNodes(makeNode(t, graph.Node{ + Type: "include_statement", Name: "", + Language: "c", File: mainC, + Metadata: map[string]any{"system_include": false}, + })) + + includes := registry.BuildCIncludeMap(root, cg, "c") + assert.Empty(t, includes) +} + +// TestBuildCIncludeMap_HeaderIsDirectoryRejected confirms resolveLocalInclude +// rejects directory matches. A directory named the same as the header +// must not be returned as a resolved include. +func TestBuildCIncludeMap_HeaderIsDirectoryRejected(t *testing.T) { + root := t.TempDir() + srcDir := filepath.Join(root, "src") + require.NoError(t, os.MkdirAll(srcDir, 0o755)) + // A directory named "config.h" alongside main.c + require.NoError(t, os.MkdirAll(filepath.Join(srcDir, "config.h"), 0o755)) + + mainC := filepath.Join(srcDir, "main.c") + require.NoError(t, os.WriteFile(mainC, []byte(""), 0o644)) + + cg := newGraphFromNodes(makeNode(t, graph.Node{ + Type: "include_statement", Name: "config.h", + Language: "c", File: mainC, + Metadata: map[string]any{"system_include": false}, + })) + + includes := registry.BuildCIncludeMap(root, cg, "c") + assert.Empty(t, includes["src/main.c"], "a directory must not satisfy include resolution") +} + +// TestBuildCppModuleRegistry_ClassWithDuplicateName covers the case of a +// class declared in both a header and a source file: ClassIndex must +// list both FQNs (the call-graph builder will choose by usage site). +func TestBuildCppModuleRegistry_ClassWithDuplicateName(t *testing.T) { + root := "/projects/cppapp" + + headerSocket := makeNode(t, graph.Node{ + Type: "class_declaration", + Name: "Socket", + Language: "cpp", + File: root + "/include/socket.hpp", + PackageName: "mylib", + SourceLocation: &graph.SourceLocation{File: root + "/include/socket.hpp", StartByte: 0, EndByte: 100}, + }) + sourceSocket := makeNode(t, graph.Node{ + Type: "class_declaration", + Name: "Socket", + Language: "cpp", + File: root + "/src/socket.cpp", + PackageName: "mylib", + SourceLocation: &graph.SourceLocation{File: root + "/src/socket.cpp", StartByte: 0, EndByte: 200}, + }) + + reg := registry.BuildCppModuleRegistry(root, newGraphFromNodes(headerSocket, sourceSocket)) + assert.ElementsMatch(t, + []string{"include/socket.hpp::mylib::Socket", "src/socket.cpp::mylib::Socket"}, + reg.ClassIndex["Socket"], + ) +}