diff --git a/sast-engine/graph/callgraph/builder/cpp_builder.go b/sast-engine/graph/callgraph/builder/cpp_builder.go new file mode 100644 index 00000000..c779a31e --- /dev/null +++ b/sast-engine/graph/callgraph/builder/cpp_builder.go @@ -0,0 +1,577 @@ +package builder + +import ( + "strings" + + "github.com/shivasurya/code-pathfinder/sast-engine/graph" + "github.com/shivasurya/code-pathfinder/sast-engine/graph/callgraph/core" + "github.com/shivasurya/code-pathfinder/sast-engine/graph/callgraph/resolution" +) + +// languageCpp is the Node.Language tag emitted by the C++ parser. +// Mirrors `parser_c.go:languageCpp`. +const languageCpp = "cpp" + +// nodeMethodDeclaration is the Node.Type emitted for inline method +// declarations and out-of-line method definitions inside a class body. +const nodeMethodDeclaration = "method_declaration" + +// nodeClassDeclaration is the Node.Type emitted for `class C { ... }` +// (and `parseCppStructSpecifier` for struct-as-class). +const nodeClassDeclaration = "class_declaration" + +// nodeFieldDeclaration is the Node.Type used for both data members and +// non-method field-shaped declarations inside a class body. +const nodeFieldDeclaration = "field_declaration" + +// receiverThis is the conventional name of the implicit class instance +// in C++ method bodies — `this->method()` resolution short-circuits to +// the caller's enclosing class. +const receiverThis = "this" + +// pointerPrefixes lists the pointer/reference qualifiers stripped from +// receiver type names before NamespaceIndex / ClassIndex lookup. The +// same type strings produced by the parser may be `Dog*`, `Dog&`, or +// `const Dog*` — every form must reduce to the bare `Dog`. +var pointerPrefixes = []string{"const ", "volatile "} + +// pointerSuffixes lists trailing modifiers stripped from receiver type +// names. Order matters: long forms first so `&&` does not become `&`. +var pointerSuffixes = []string{"**", "*", "&&", "&"} + +// BuildCppCallGraph constructs the C++ call graph using the same +// four-pass structure as the C builder, plus three C++-specific +// resolution paths exercised in Pass 4: +// +// 1. Namespace-/scope-qualified calls — `ns::func()`, +// `ClassName::staticMethod()` — resolve directly through +// `registry.NamespaceIndex`. +// 2. Method calls on typed receivers — `obj.method()` / +// `obj->method()` — look up the receiver's declared type via the +// type engine, then locate the method on that class. +// 3. `this->method()` — the receiver type is implicit (the caller's +// enclosing class), so resolution skips the type engine and asks +// the registry's class index directly. +// +// Plain free-function calls fall through to the same definition- +// preferring resolution used by the C builder +// (`resolveCCallTarget`), which makes the C++ builder a strict +// superset of the C one. +// +// The result is a stand-alone `*core.CallGraph` whose FQNs do not +// collide with C, Python, Go, or Java — `MergeCallGraphs` is safe to +// call with any combination of language graphs. +// +// Parameters: +// - codeGraph: parsed graph from graph.Initialize. Nil-safe. +// - registry: C++ module registry from PR-05. Nil-safe. +// - typeEngine: C++ type inference engine from PR-06. Nil-safe. +// +// Returns a fully-populated CallGraph and a nil error. Errors are +// reserved for future failure modes; the current implementation never +// returns one. +func BuildCppCallGraph( + codeGraph *graph.CodeGraph, + registry *core.CppModuleRegistry, + typeEngine *resolution.CppTypeInferenceEngine, +) (*core.CallGraph, error) { + callGraph := core.NewCallGraph() + if codeGraph == nil || registry == nil { + return callGraph, nil + } + + classes := collectCppClassesByFile(codeGraph) + indexCppFunctions(codeGraph, callGraph, registry, classes) + extractCppFunctionTypes(codeGraph, callGraph, typeEngine, classes) + callSites := extractCppCallSites(callGraph) + resolveCppCallSites(callSites, callGraph, registry, typeEngine, classes) + + return callGraph, nil +} + +// ============================================================================= +// Class-by-file index +// ============================================================================= + +// cppClassByteRange caches a class's source span for byte-range +// containment lookups. Methods declared inside a class body share its +// file and lie inside its [startByte, endByte) range; that is how we +// associate methods with their class without trusting parser-internal +// context tracking. +type cppClassByteRange struct { + name string + packageName string + startByte uint32 + endByte uint32 +} + +// collectCppClassesByFile groups every C++ class_declaration by the +// file that declares it. The result is keyed by absolute file path so +// per-file containment lookups stay O(C) where C is the (small) number +// of classes in that file. +func collectCppClassesByFile(codeGraph *graph.CodeGraph) map[string][]cppClassByteRange { + classes := make(map[string][]cppClassByteRange) + for _, node := range codeGraph.Nodes { + if !isCppClassNode(node) { + continue + } + classes[node.File] = append(classes[node.File], cppClassByteRange{ + name: node.Name, + packageName: node.PackageName, + startByte: node.SourceLocation.StartByte, + endByte: node.SourceLocation.EndByte, + }) + } + return classes +} + +// isCppClassNode is true when node is a usable C++ class declaration — +// well-named, file-anchored, and carrying a byte range. Anonymous +// classes (Name == "") are intentionally excluded; they cannot +// contribute to FQNs. +func isCppClassNode(node *graph.Node) bool { + return node != nil && + node.Language == languageCpp && + node.Type == nodeClassDeclaration && + node.Name != "" && + node.File != "" && + node.SourceLocation != nil +} + +// enclosingCppClass returns the smallest class byte-range in classes +// whose [startByte, endByte) span contains node's start byte, or nil +// when none does (free function / file-scope declaration). +// +// Picking the innermost class is a deliberate choice: nested classes +// (`class Outer { class Inner { ... }; };`) require the inner range to +// win so methods of `Inner` are not mis-attributed to `Outer`. +func enclosingCppClass(node *graph.Node, classes map[string][]cppClassByteRange) *cppClassByteRange { + if node == nil || node.SourceLocation == nil { + return nil + } + candidates := classes[node.File] + if len(candidates) == 0 { + return nil + } + pos := node.SourceLocation.StartByte + var best *cppClassByteRange + for i := range candidates { + c := &candidates[i] + if pos < c.startByte || pos >= c.endByte { + continue + } + if best == nil || (c.endByte-c.startByte) < (best.endByte-best.startByte) { + best = c + } + } + return best +} + +// ============================================================================= +// Pass 1 — index functions +// ============================================================================= + +// indexCppFunctions records every C++ function and method in +// callGraph.Functions under its qualified FQN. The FQN composition +// mirrors PR-05's BuildCppModuleRegistry so cross-component lookups +// stay consistent: +// +// free function (no namespace): "prefix::name" +// free function with namespace: "prefix::ns::name" +// class method: "prefix::[ns::]Class::name" +// +// In addition, the function ensures every recorded FQN appears in the +// registry's FunctionIndex / NamespaceIndex / ClassIndex tables so +// later passes can look up callees without re-deriving the FQN. +func indexCppFunctions( + codeGraph *graph.CodeGraph, + callGraph *core.CallGraph, + registry *core.CppModuleRegistry, + classes map[string][]cppClassByteRange, +) { + for _, node := range codeGraph.Nodes { + if !isCppFunctionNode(node) { + continue + } + prefix, ok := registry.FileToPrefix[node.File] + if !ok { + continue + } + cls := enclosingCppClass(node, classes) + fqn := composeCppFQN(prefix, node, cls) + callGraph.Functions[fqn] = node + + // Free functions stay reachable through the bare-name + // FunctionIndex so the C-style fallthrough in Pass 4 can find + // them with no namespace context. + if cls == nil && node.Type == "function_definition" { + appendUniqueFQN(registry.FunctionIndex, node.Name, fqn) + } + + key := composeCppScopeKey(node, cls) + if key != "" { + registry.NamespaceIndex[key] = fqn + } + } +} + +// isCppFunctionNode is true when node is a usable C++ function- or +// method-shaped node with a name and file. +func isCppFunctionNode(node *graph.Node) bool { + if node == nil || node.Language != languageCpp || node.Name == "" || node.File == "" { + return false + } + return node.Type == "function_definition" || node.Type == nodeMethodDeclaration +} + +// composeCppFQN composes the canonical C++ FQN for a function or method +// node, using its enclosing class (if any) for the class component. +func composeCppFQN(prefix string, node *graph.Node, cls *cppClassByteRange) string { + scope := composeCppScope(node, cls) + if scope == "" { + return prefix + fqnSeparator + node.Name + } + return prefix + fqnSeparator + scope + fqnSeparator + node.Name +} + +// composeCppScope returns the namespace + class chain that prefixes a +// function name in its FQN, joined by `::`. Empty when the function is +// a top-level free function with no namespace. +func composeCppScope(node *graph.Node, cls *cppClassByteRange) string { + switch { + case cls != nil: + return joinScopeParts(cls.packageName, cls.name) + case node.PackageName != "": + return node.PackageName + } + return "" +} + +// composeCppScopeKey returns the lookup key used by NamespaceIndex — +// `[ns::]Class::method` for methods, `ns::name` for namespaced free +// functions, "" when the function has no qualifying scope. +func composeCppScopeKey(node *graph.Node, cls *cppClassByteRange) string { + scope := composeCppScope(node, cls) + if scope == "" { + return "" + } + return scope + fqnSeparator + node.Name +} + +// joinScopeParts joins non-empty scope tokens with `::`. Used so +// callers can pass `node.PackageName` unconditionally without having +// to special-case the no-namespace path. +func joinScopeParts(parts ...string) string { + out := make([]string, 0, len(parts)) + for _, p := range parts { + if p == "" { + continue + } + out = append(out, p) + } + return strings.Join(out, fqnSeparator) +} + +// ============================================================================= +// Pass 2 — extract types +// ============================================================================= + +// extractCppFunctionTypes registers explicit return types, parameter +// symbols, class method return types, and class field types. +// +// Free-function and parameter handling mirrors the C builder so the +// resolver can resolve C++ free-function calls identically. Class +// method/field tracking augments the C++ type engine for receiver-typed +// resolution in Pass 4. +func extractCppFunctionTypes( + codeGraph *graph.CodeGraph, + callGraph *core.CallGraph, + typeEngine *resolution.CppTypeInferenceEngine, + classes map[string][]cppClassByteRange, +) { + for fqn, node := range callGraph.Functions { + if isDeclaration(node) { + // Declarations still register class methods on the type + // engine — that is the only place inline header method + // signatures appear — but they do not contribute return + // types or parameter symbols (no body). + registerCppClassMember(node, typeEngine, classes) + continue + } + if typeEngine != nil { + typeEngine.ExtractReturnType(fqn, node.ReturnType) + } + registerCParameters(callGraph, fqn, node) + registerCppClassMember(node, typeEngine, classes) + } + + if typeEngine != nil { + registerCppClassFields(codeGraph, typeEngine, classes) + } +} + +// registerCppClassMember records a method's return type on the type +// engine when the node is enclosed by a class. No-op for free +// functions and for `void` returns. +func registerCppClassMember( + node *graph.Node, + typeEngine *resolution.CppTypeInferenceEngine, + classes map[string][]cppClassByteRange, +) { + if typeEngine == nil || node == nil || node.ReturnType == "" { + return + } + cls := enclosingCppClass(node, classes) + if cls == nil { + return + } + typeEngine.RegisterClassMethod(cls.name, node.Name, node.ReturnType) +} + +// registerCppClassFields walks every field_declaration and records its +// declared type on the type engine, keyed by enclosing class name. +// Used in Pass 4 to resolve `obj.field.method()` chains; the field +// type tells the resolver which class to look up the method on. +func registerCppClassFields( + codeGraph *graph.CodeGraph, + typeEngine *resolution.CppTypeInferenceEngine, + classes map[string][]cppClassByteRange, +) { + for _, node := range codeGraph.Nodes { + if !isCppFieldNode(node) { + continue + } + cls := enclosingCppClass(node, classes) + if cls == nil { + continue + } + typeEngine.RegisterClassField(cls.name, node.Name, node.DataType) + } +} + +// isCppFieldNode is true when node is a usable C++ field declaration +// (data member with a type and name). Method declarations also use the +// field_declaration node type but carry no DataType, so the DataType +// check naturally filters them. +func isCppFieldNode(node *graph.Node) bool { + return node != nil && + node.Language == languageCpp && + node.Type == nodeFieldDeclaration && + node.Name != "" && + node.DataType != "" +} + +// ============================================================================= +// Pass 3 — extract call sites +// ============================================================================= + +// extractCppCallSites walks the parser-emitted edges from each C++ +// function/method to its call_expression children and emits one +// CallSiteInternal per call. The call shape (free, method, qualified) +// is preserved on the call node's metadata; this pass copies it +// forward so Pass 4 can dispatch without re-walking the AST. +func extractCppCallSites(callGraph *core.CallGraph) []*CallSiteInternal { + sites := make([]*CallSiteInternal, 0) + for callerFQN, fnNode := range callGraph.Functions { + if isDeclaration(fnNode) { + continue + } + for _, edge := range fnNode.OutgoingEdges { + callNode := edge.To + if !isCppCallNode(callNode) { + continue + } + sites = append(sites, &CallSiteInternal{ + CallerFQN: callerFQN, + CallerFile: fnNode.File, + CallLine: callNode.LineNumber, + FunctionName: callNode.Name, + ObjectName: stringMetadata(callNode, "receiver"), + Arguments: append([]string(nil), callNode.MethodArgumentsValue...), + }) + } + } + return sites +} + +// isCppCallNode is true when node represents a C++ call_expression +// with a usable target. +func isCppCallNode(node *graph.Node) bool { + return node != nil && + node.Language == languageCpp && + node.Type == "call_expression" && + node.Name != "" +} + +// ============================================================================= +// Pass 4 — resolve call sites +// ============================================================================= + +// resolveCppCallSites resolves every call site and adds the +// corresponding edge / CallSite record to the call graph. Unresolved +// sites are still recorded (Resolved=false) for diagnostics — stdlib +// and external calls remain visible. +func resolveCppCallSites( + sites []*CallSiteInternal, + callGraph *core.CallGraph, + registry *core.CppModuleRegistry, + typeEngine *resolution.CppTypeInferenceEngine, + classes map[string][]cppClassByteRange, +) { + for _, cs := range sites { + targetFQN, resolved := resolveCppCallTarget(cs, callGraph, registry, typeEngine, classes) + callSite := buildCCallSite(cs, targetFQN, resolved) + callGraph.AddCallSite(cs.CallerFQN, callSite) + if resolved { + callGraph.AddEdge(cs.CallerFQN, targetFQN) + } + } +} + +// resolveCppCallTarget implements the C++-specific resolution order: +// +// 1. Qualified call (`ns::func`, `Class::staticMethod`) — direct +// NamespaceIndex lookup with the full qualified name. +// 2. `this->method()` — receiver type is implicit; look up the +// method on the caller's enclosing class. +// 3. Method on typed receiver — find the receiver's declared type +// via the type engine, then look up the method on that class. +// 4. C-style fallthrough — definition-preferring lookup that +// mirrors `resolveCCallTarget`. +// +// Each step short-circuits on the first hit; later steps are tried +// only if earlier ones miss. +func resolveCppCallTarget( + cs *CallSiteInternal, + callGraph *core.CallGraph, + registry *core.CppModuleRegistry, + typeEngine *resolution.CppTypeInferenceEngine, + classes map[string][]cppClassByteRange, +) (string, bool) { + if cs.FunctionName == "" { + return "", false + } + + if fqn, ok := lookupQualifiedCall(cs.FunctionName, registry); ok { + return fqn, true + } + if cs.ObjectName == receiverThis { + if fqn, ok := lookupThisMethod(cs, callGraph, registry, classes); ok { + return fqn, true + } + } else if cs.ObjectName != "" { + if fqn, ok := lookupReceiverMethod(cs, registry, typeEngine); ok { + return fqn, true + } + } + return resolveCCallTarget(cs, callGraph, ®istry.CModuleRegistry) +} + +// lookupQualifiedCall handles `ns::func` and `Class::staticMethod` by +// querying NamespaceIndex with the verbatim call name. The check is +// scoped to names containing `::` so plain `func()` calls fall through +// to later stages without an extra map lookup. +func lookupQualifiedCall(name string, registry *core.CppModuleRegistry) (string, bool) { + if !strings.Contains(name, fqnSeparator) { + return "", false + } + fqn, ok := registry.NamespaceIndex[name] + return fqn, ok +} + +// lookupReceiverMethod resolves `obj.method()` / `obj->method()` by +// looking up the receiver's declared type in the type engine and then +// finding the method on that class via NamespaceIndex. +func lookupReceiverMethod( + cs *CallSiteInternal, + registry *core.CppModuleRegistry, + typeEngine *resolution.CppTypeInferenceEngine, +) (string, bool) { + if typeEngine == nil { + return "", false + } + scope := typeEngine.GetScope(cs.CallerFQN) + if scope == nil { + return "", false + } + binding := scope.GetVariable(cs.ObjectName) + if binding == nil || binding.Type == nil { + return "", false + } + className := normaliseTypeName(binding.Type.TypeFQN) + if className == "" { + return "", false + } + return findMethodOnClass(className, cs.FunctionName, registry) +} + +// lookupThisMethod handles `this->method()` by deriving the caller's +// enclosing class from the caller node and then locating the method on +// that class. +func lookupThisMethod( + cs *CallSiteInternal, + callGraph *core.CallGraph, + registry *core.CppModuleRegistry, + classes map[string][]cppClassByteRange, +) (string, bool) { + caller, ok := callGraph.Functions[cs.CallerFQN] + if !ok { + return "", false + } + cls := enclosingCppClass(caller, classes) + if cls == nil { + return "", false + } + return findMethodOnClass(cls.name, cs.FunctionName, registry) +} + +// findMethodOnClass tries the registry's NamespaceIndex with each +// known qualifier prefix that could match a method on className. The +// canonical key is `Class::method` (no namespace), but classes living +// in a namespace appear as `ns::Class::method`; we scan the index +// values keyed on the bare-class form and fall back to a structural +// suffix match when neither key shape exists yet. +func findMethodOnClass( + className, methodName string, + registry *core.CppModuleRegistry, +) (string, bool) { + bareKey := className + fqnSeparator + methodName + if fqn, ok := registry.NamespaceIndex[bareKey]; ok { + return fqn, true + } + + // `Class::method` may have been registered as `ns::Class::method` + // because the class lives inside a namespace. Scan the namespace + // index for any key whose tail matches `Class::method`. + suffix := fqnSeparator + bareKey + for key, fqn := range registry.NamespaceIndex { + if strings.HasSuffix(key, suffix) { + return fqn, true + } + } + return "", false +} + +// normaliseTypeName strips C++ qualifiers (`const`, `volatile`) and +// pointer/reference suffixes (`*`, `**`, `&`, `&&`) from a type +// expression so it can be matched against the bare class names stored +// in the registry's ClassIndex. +// +// Templates are left intact (`std::vector` stays +// `std::vector`); resolving template instantiations is a Phase 2 +// concern. Nested namespace prefixes (`ns::Type`) are also left as-is — +// the registry's NamespaceIndex stores the same form. +func normaliseTypeName(raw string) string { + t := strings.TrimSpace(raw) + for _, p := range pointerPrefixes { + t = strings.TrimPrefix(t, p) + } + t = strings.TrimSpace(t) + for _, s := range pointerSuffixes { + for strings.HasSuffix(t, s) { + t = strings.TrimSuffix(t, s) + t = strings.TrimSpace(t) + } + } + return t +} diff --git a/sast-engine/graph/callgraph/builder/cpp_builder_test.go b/sast-engine/graph/callgraph/builder/cpp_builder_test.go new file mode 100644 index 00000000..deaef4af --- /dev/null +++ b/sast-engine/graph/callgraph/builder/cpp_builder_test.go @@ -0,0 +1,506 @@ +package builder_test + +import ( + "testing" + + "github.com/shivasurya/code-pathfinder/sast-engine/graph" + "github.com/shivasurya/code-pathfinder/sast-engine/graph/callgraph/builder" + "github.com/shivasurya/code-pathfinder/sast-engine/graph/callgraph/core" + "github.com/shivasurya/code-pathfinder/sast-engine/graph/callgraph/resolution" + "github.com/stretchr/testify/assert" + "github.com/stretchr/testify/require" +) + +// cppFixture builds a CodeGraph + CppModuleRegistry + type engine for +// driving BuildCppCallGraph in unit tests. Tests describe the program +// shape via addClass / addMethod / addFreeFunction / addCall helpers; +// the registry's FileToPrefix is updated automatically so cross-file +// resolution matches what BuildCppModuleRegistry would have produced. +type cppFixture struct { + cg *graph.CodeGraph + registry *core.CppModuleRegistry + engine *resolution.CppTypeInferenceEngine +} + +const cppFixtureRoot = "/projects/cppapp" + +func newCppFixture(t *testing.T) *cppFixture { + t.Helper() + registry := core.NewCppModuleRegistry(cppFixtureRoot) + return &cppFixture{ + cg: graph.NewCodeGraph(), + registry: registry, + engine: resolution.NewCppTypeInferenceEngine(registry), + } +} + +func (f *cppFixture) addClass(t *testing.T, file, relPath, packageName, name string, startByte, endByte uint32) { + t.Helper() + node := &graph.Node{ + ID: "class:" + relPath + "::" + packageName + "::" + name, + Type: "class_declaration", + Name: name, + File: file, + Language: "cpp", + PackageName: packageName, + LineNumber: 1, + SourceLocation: &graph.SourceLocation{File: file, StartByte: startByte, EndByte: endByte}, + } + f.cg.AddNode(node) + f.registry.FileToPrefix[file] = relPath + scope := packageName + if scope != "" { + scope = scope + "::" + name + } else { + scope = name + } + f.registry.ClassIndex[name] = append(f.registry.ClassIndex[name], relPath+"::"+scope) +} + +// addMethod adds a method_declaration anchored at the given byte offset +// inside (or outside) any class declared via addClass. The startByte +// is what enclosingCppClass uses to associate the method with its class. +func (f *cppFixture) addMethod(t *testing.T, file, relPath, packageName, name, returnType string, startByte, endByte uint32) *graph.Node { + t.Helper() + node := &graph.Node{ + ID: "method:" + relPath + "::" + packageName + "::" + name, + Type: "method_declaration", + Name: name, + File: file, + Language: "cpp", + PackageName: packageName, + ReturnType: returnType, + LineNumber: 1, + SourceLocation: &graph.SourceLocation{File: file, StartByte: startByte, EndByte: endByte}, + } + f.cg.AddNode(node) + f.registry.FileToPrefix[file] = relPath + return node +} + +// addFreeFunction adds a function_definition outside any class. +// packageName may be "" (top-level) or a namespace name. +func (f *cppFixture) addFreeFunction(t *testing.T, file, relPath, packageName, name, returnType string) *graph.Node { + t.Helper() + node := &graph.Node{ + ID: "fn:" + relPath + "::" + packageName + "::" + name, + Type: "function_definition", + Name: name, + File: file, + Language: "cpp", + PackageName: packageName, + ReturnType: returnType, + LineNumber: 1, + SourceLocation: &graph.SourceLocation{File: file, StartByte: 9000, EndByte: 9100}, + } + f.cg.AddNode(node) + f.registry.FileToPrefix[file] = relPath + return node +} + +// addCall mimics the parser's edge from a containing function to a +// call_expression. metadata fields are sparse so tests can exercise +// the qualified / method / receiver branches. +func (f *cppFixture) addCall(t *testing.T, caller *graph.Node, target string, receiver string) { + t.Helper() + metadata := map[string]any{} + if receiver != "" { + metadata["is_method"] = true + metadata["receiver"] = receiver + } + call := &graph.Node{ + ID: "call:" + caller.ID + "->" + target + "@" + receiver, + Type: "call_expression", + Name: target, + File: caller.File, + Language: "cpp", + LineNumber: caller.LineNumber + 1, + Metadata: metadata, + } + f.cg.AddNode(call) + f.cg.AddEdge(caller, call) +} + +func (f *cppFixture) build(t *testing.T) *core.CallGraph { + t.Helper() + cg, err := builder.BuildCppCallGraph(f.cg, f.registry, f.engine) + require.NoError(t, err) + require.NotNil(t, cg) + return cg +} + +func TestBuildCppCallGraph_NilInputs(t *testing.T) { + cg, err := builder.BuildCppCallGraph(nil, nil, nil) + require.NoError(t, err) + require.NotNil(t, cg) + assert.Empty(t, cg.Functions) + assert.Empty(t, cg.Edges) +} + +// TestBuildCppCallGraph_NamespaceQualifiedCall covers `ns::func()` — +// the call's Name already carries the qualifier, so resolution is a +// direct NamespaceIndex lookup. +func TestBuildCppCallGraph_NamespaceQualifiedCall(t *testing.T) { + root := cppFixtureRoot + mainCpp := root + "/src/main.cpp" + utilsCpp := root + "/src/utils.cpp" + + f := newCppFixture(t) + main := f.addFreeFunction(t, mainCpp, "src/main.cpp", "", "main", "int") + f.addFreeFunction(t, utilsCpp, "src/utils.cpp", "mylib", "process", "void") + f.addCall(t, main, "mylib::process", "") + + cg := f.build(t) + + assert.Equal(t, []string{"src/utils.cpp::mylib::process"}, cg.Edges["src/main.cpp::main"]) + sites := cg.CallSites["src/main.cpp::main"] + require.Len(t, sites, 1) + assert.True(t, sites[0].Resolved) + assert.Equal(t, "src/utils.cpp::mylib::process", sites[0].TargetFQN) +} + +// TestBuildCppCallGraph_StaticMethodViaNamespaceIndex covers +// `ClassName::staticMethod()` — the call's Name already contains the +// `::` and resolves via NamespaceIndex without needing a class lookup. +func TestBuildCppCallGraph_StaticMethodViaNamespaceIndex(t *testing.T) { + root := cppFixtureRoot + mainCpp := root + "/src/main.cpp" + socketCpp := root + "/src/socket.cpp" + + f := newCppFixture(t) + f.addClass(t, socketCpp, "src/socket.cpp", "", "Socket", 0, 1000) + f.addMethod(t, socketCpp, "src/socket.cpp", "", "create", "Socket*", 100, 200) + main := f.addFreeFunction(t, mainCpp, "src/main.cpp", "", "main", "int") + f.addCall(t, main, "Socket::create", "") + + cg := f.build(t) + + assert.Equal(t, []string{"src/socket.cpp::Socket::create"}, cg.Edges["src/main.cpp::main"]) +} + +// TestBuildCppCallGraph_MethodOnTypedReceiver covers `obj.method()` +// where `obj` was declared as `Dog*` inside the caller. Resolution +// goes through the type engine to discover `Dog`, then through +// NamespaceIndex to find `Dog::speak`. +func TestBuildCppCallGraph_MethodOnTypedReceiver(t *testing.T) { + root := cppFixtureRoot + mainCpp := root + "/src/main.cpp" + animalCpp := root + "/src/animal.cpp" + + f := newCppFixture(t) + f.addClass(t, animalCpp, "src/animal.cpp", "mylib", "Dog", 0, 1000) + f.addMethod(t, animalCpp, "src/animal.cpp", "mylib", "speak", "void", 100, 200) + main := f.addFreeFunction(t, mainCpp, "src/main.cpp", "", "main", "int") + + // The call site is obj.speak(); register the receiver type via the + // type engine so resolution can find it. + f.engine.ExtractVariableType("src/main.cpp::main", "dog", "Dog*", resolution.Location{Line: 5}) + f.addCall(t, main, "speak", "dog") + + cg := f.build(t) + + assert.Equal(t, []string{"src/animal.cpp::mylib::Dog::speak"}, cg.Edges["src/main.cpp::main"]) + sites := cg.CallSites["src/main.cpp::main"] + require.Len(t, sites, 1) + assert.True(t, sites[0].Resolved) +} + +// TestBuildCppCallGraph_ThisMethodCall covers `this->method()` inside +// a method body — the receiver type is implicit (the caller's +// enclosing class). +func TestBuildCppCallGraph_ThisMethodCall(t *testing.T) { + root := cppFixtureRoot + socketCpp := root + "/src/socket.cpp" + + f := newCppFixture(t) + f.addClass(t, socketCpp, "src/socket.cpp", "", "Socket", 0, 1000) + connect := f.addMethod(t, socketCpp, "src/socket.cpp", "", "connect", "void", 100, 300) + f.addMethod(t, socketCpp, "src/socket.cpp", "", "disconnect", "void", 400, 500) + f.addCall(t, connect, "disconnect", "this") + + cg := f.build(t) + + assert.Equal(t, []string{"src/socket.cpp::Socket::disconnect"}, + cg.Edges["src/socket.cpp::Socket::connect"]) +} + +// TestBuildCppCallGraph_FreeFunctionFallthrough verifies that plain +// (non-qualified, non-method) calls fall through to the C-style +// resolution and find a free C++ function in the same file. +func TestBuildCppCallGraph_FreeFunctionFallthrough(t *testing.T) { + root := cppFixtureRoot + mainCpp := root + "/src/main.cpp" + + f := newCppFixture(t) + main := f.addFreeFunction(t, mainCpp, "src/main.cpp", "", "main", "int") + f.addFreeFunction(t, mainCpp, "src/main.cpp", "", "helper", "void") + f.addCall(t, main, "helper", "") + + cg := f.build(t) + + assert.Equal(t, []string{"src/main.cpp::helper"}, cg.Edges["src/main.cpp::main"]) +} + +// TestBuildCppCallGraph_StdlibCallUnresolved confirms unknown calls +// (e.g. `printf`) are recorded as Resolved:false rather than dropped. +func TestBuildCppCallGraph_StdlibCallUnresolved(t *testing.T) { + root := cppFixtureRoot + mainCpp := root + "/src/main.cpp" + + f := newCppFixture(t) + main := f.addFreeFunction(t, mainCpp, "src/main.cpp", "", "main", "int") + f.addCall(t, main, "printf", "") + + cg := f.build(t) + assert.Empty(t, cg.Edges["src/main.cpp::main"]) + sites := cg.CallSites["src/main.cpp::main"] + require.Len(t, sites, 1) + assert.False(t, sites[0].Resolved) + assert.NotEmpty(t, sites[0].FailureReason) +} + +// TestBuildCppCallGraph_ReceiverNotInScope falls back gracefully when +// the receiver variable was not registered with the type engine. +func TestBuildCppCallGraph_ReceiverNotInScope(t *testing.T) { + root := cppFixtureRoot + mainCpp := root + "/src/main.cpp" + + f := newCppFixture(t) + main := f.addFreeFunction(t, mainCpp, "src/main.cpp", "", "main", "int") + // dog.speak() but no type registered for `dog`. + f.addCall(t, main, "speak", "dog") + + cg := f.build(t) + assert.Empty(t, cg.Edges["src/main.cpp::main"], "missing receiver type must not produce an edge") + require.Len(t, cg.CallSites["src/main.cpp::main"], 1) + assert.False(t, cg.CallSites["src/main.cpp::main"][0].Resolved) +} + +// TestBuildCppCallGraph_MethodReturnTypeRegistered verifies Pass 2 +// records class method return types on the type engine. +func TestBuildCppCallGraph_MethodReturnTypeRegistered(t *testing.T) { + root := cppFixtureRoot + socketCpp := root + "/src/socket.cpp" + + f := newCppFixture(t) + f.addClass(t, socketCpp, "src/socket.cpp", "", "Socket", 0, 1000) + f.addMethod(t, socketCpp, "src/socket.cpp", "", "is_open", "bool", 100, 200) + f.addMethod(t, socketCpp, "src/socket.cpp", "", "destroy", "void", 300, 400) // dropped + f.build(t) + + got := f.engine.GetMethodReturnType("Socket", "is_open") + require.NotNil(t, got) + assert.Equal(t, "bool", got.TypeFQN) + assert.Nil(t, f.engine.GetMethodReturnType("Socket", "destroy"), "void method must not be registered") +} + +// TestBuildCppCallGraph_FieldTypeRegistered verifies Pass 2 records +// class field types on the type engine. +func TestBuildCppCallGraph_FieldTypeRegistered(t *testing.T) { + root := cppFixtureRoot + socketCpp := root + "/src/socket.cpp" + + f := newCppFixture(t) + f.addClass(t, socketCpp, "src/socket.cpp", "", "Socket", 0, 1000) + field := &graph.Node{ + ID: "field:port", + Type: "field_declaration", + Name: "port", + Language: "cpp", + File: socketCpp, + DataType: "int", + SourceLocation: &graph.SourceLocation{File: socketCpp, StartByte: 50, EndByte: 60}, + } + f.cg.AddNode(field) + + f.build(t) + got := f.engine.GetFieldType("Socket", "port") + require.NotNil(t, got) + assert.Equal(t, "int", got.TypeFQN) +} + +// TestBuildCppCallGraph_NormaliseTypeName covers the receiver-type +// stripping logic by exercising every common qualifier shape. +func TestBuildCppCallGraph_NormaliseTypeName(t *testing.T) { + root := cppFixtureRoot + mainCpp := root + "/src/main.cpp" + animalCpp := root + "/src/animal.cpp" + + cases := []struct { + name string + typeStr string + varName string + expectedFQN string + }{ + {"plain pointer", "Dog*", "dog", "src/animal.cpp::Dog::speak"}, + {"const pointer", "const Dog*", "dog", "src/animal.cpp::Dog::speak"}, + {"reference", "Dog&", "dog", "src/animal.cpp::Dog::speak"}, + {"double pointer", "Dog**", "dog", "src/animal.cpp::Dog::speak"}, + {"rvalue reference", "Dog&&", "dog", "src/animal.cpp::Dog::speak"}, + {"value with whitespace", " Dog ", "dog", "src/animal.cpp::Dog::speak"}, + } + + for _, tc := range cases { + t.Run(tc.name, func(t *testing.T) { + f := newCppFixture(t) + f.addClass(t, animalCpp, "src/animal.cpp", "", "Dog", 0, 1000) + f.addMethod(t, animalCpp, "src/animal.cpp", "", "speak", "void", 100, 200) + main := f.addFreeFunction(t, mainCpp, "src/main.cpp", "", "main", "int") + + f.engine.ExtractVariableType("src/main.cpp::main", tc.varName, tc.typeStr, resolution.Location{Line: 1}) + f.addCall(t, main, "speak", tc.varName) + + cg := f.build(t) + require.Len(t, cg.Edges["src/main.cpp::main"], 1) + assert.Equal(t, tc.expectedFQN, cg.Edges["src/main.cpp::main"][0]) + }) + } +} + +// TestBuildCppCallGraph_NestedClasses confirms enclosingCppClass picks +// the innermost class when classes are nested. +func TestBuildCppCallGraph_NestedClasses(t *testing.T) { + root := cppFixtureRoot + src := root + "/src/nested.cpp" + + f := newCppFixture(t) + f.addClass(t, src, "src/nested.cpp", "", "Outer", 0, 1000) + f.addClass(t, src, "src/nested.cpp", "", "Inner", 100, 500) + // Method anchored inside the inner class' range. + inner := f.addMethod(t, src, "src/nested.cpp", "", "ping", "void", 200, 300) + main := f.addFreeFunction(t, root+"/src/main.cpp", "src/main.cpp", "", "main", "int") + f.addCall(t, main, "Inner::ping", "") + + cg := f.build(t) + assert.Equal(t, []string{"src/nested.cpp::Inner::ping"}, cg.Edges["src/main.cpp::main"]) + _ = inner +} + +// TestBuildCppCallGraph_MergeIntoUnifiedGraph confirms a built C++ +// call graph merges cleanly into an empty destination, replicating +// what cmd/scan.go will do when assembling the unified graph. +func TestBuildCppCallGraph_MergeIntoUnifiedGraph(t *testing.T) { + root := cppFixtureRoot + mainCpp := root + "/src/main.cpp" + + f := newCppFixture(t) + main := f.addFreeFunction(t, mainCpp, "src/main.cpp", "", "main", "int") + f.addFreeFunction(t, mainCpp, "src/main.cpp", "", "helper", "void") + f.addCall(t, main, "helper", "") + + src := f.build(t) + dst := core.NewCallGraph() + builder.MergeCallGraphs(dst, src) + + assert.Contains(t, dst.Functions, "src/main.cpp::main") + assert.Contains(t, dst.Functions, "src/main.cpp::helper") + assert.Equal(t, []string{"src/main.cpp::helper"}, dst.Edges["src/main.cpp::main"]) +} + +// TestBuildCppCallGraph_FindMethodOnClass_NamespacedSuffixMatch +// covers the suffix fallback in findMethodOnClass: a class lives in a +// namespace and the call site has only the bare class name. +func TestBuildCppCallGraph_FindMethodOnClass_NamespacedSuffixMatch(t *testing.T) { + root := cppFixtureRoot + mainCpp := root + "/src/main.cpp" + libCpp := root + "/src/lib.cpp" + + f := newCppFixture(t) + f.addClass(t, libCpp, "src/lib.cpp", "ns", "Foo", 0, 1000) + f.addMethod(t, libCpp, "src/lib.cpp", "ns", "bar", "void", 100, 200) + main := f.addFreeFunction(t, mainCpp, "src/main.cpp", "", "main", "int") + + // Receiver registered as bare `Foo` (no namespace prefix). + f.engine.ExtractVariableType("src/main.cpp::main", "foo", "Foo*", resolution.Location{}) + f.addCall(t, main, "bar", "foo") + + cg := f.build(t) + assert.Equal(t, []string{"src/lib.cpp::ns::Foo::bar"}, cg.Edges["src/main.cpp::main"]) +} + +// TestBuildCppCallGraph_ReceiverBindingMissingType covers the +// defensive branch in lookupReceiverMethod: a binding exists for the +// receiver but Type is nil. This shouldn't happen for parser output +// but the resolver must not panic. +func TestBuildCppCallGraph_ReceiverBindingMissingType(t *testing.T) { + root := cppFixtureRoot + mainCpp := root + "/src/main.cpp" + + f := newCppFixture(t) + main := f.addFreeFunction(t, mainCpp, "src/main.cpp", "", "main", "int") + + // Manually inject a binding with no Type. + scope := resolution.NewCFunctionScope("src/main.cpp::main") + scope.AddVariable(&resolution.CVariableBinding{VarName: "obj", Type: nil}) + f.engine.AddScope(scope) + + f.addCall(t, main, "method", "obj") + + cg := f.build(t) + assert.Empty(t, cg.Edges["src/main.cpp::main"]) + require.Len(t, cg.CallSites["src/main.cpp::main"], 1) + assert.False(t, cg.CallSites["src/main.cpp::main"][0].Resolved) +} + +// TestBuildCppCallGraph_ThisOutsideKnownCaller covers the defensive +// branch in lookupThisMethod where the call's CallerFQN is not in +// callGraph.Functions. The resolver must skip the `this` path and +// fall through cleanly. +func TestBuildCppCallGraph_ThisOutsideKnownCaller(t *testing.T) { + root := cppFixtureRoot + socketCpp := root + "/src/socket.cpp" + + f := newCppFixture(t) + // A free function (not a method) calling `this->method()` is + // nonsensical in C++ but exercises the `caller not a class member` + // branch of lookupThisMethod via byte-range miss. + main := f.addFreeFunction(t, socketCpp, "src/socket.cpp", "", "main", "int") + f.addCall(t, main, "stray", "this") + + cg := f.build(t) + assert.Empty(t, cg.Edges["src/socket.cpp::main"]) + require.Len(t, cg.CallSites["src/socket.cpp::main"], 1) + assert.False(t, cg.CallSites["src/socket.cpp::main"][0].Resolved) +} + +// TestBuildCppCallGraph_DeclarationStillRegistersClassMethod verifies +// Pass 2 registers method return types from declarations even though +// declarations themselves don't get return-type entries on the function +// map (the C builder skips them) — class method tracking is needed for +// Pass 4 receiver resolution to work on header-declared methods. +func TestBuildCppCallGraph_DeclarationStillRegistersClassMethod(t *testing.T) { + root := cppFixtureRoot + socketHpp := root + "/include/socket.hpp" + + f := newCppFixture(t) + f.addClass(t, socketHpp, "include/socket.hpp", "", "Socket", 0, 1000) + // Declaration: no body, marked is_declaration. + decl := f.addMethod(t, socketHpp, "include/socket.hpp", "", "is_open", "bool", 100, 200) + decl.Metadata = map[string]any{"is_declaration": true} + + f.build(t) + got := f.engine.GetMethodReturnType("Socket", "is_open") + require.NotNil(t, got) + assert.Equal(t, "bool", got.TypeFQN) +} + +// TestBuildCppCallGraph_IgnoresNonCppNodes verifies the language +// filter — Python or C nodes must not enter the C++ call graph. +func TestBuildCppCallGraph_IgnoresNonCppNodes(t *testing.T) { + root := cppFixtureRoot + cppFile := root + "/src/main.cpp" + cFile := root + "/src/legacy.c" + + f := newCppFixture(t) + f.addFreeFunction(t, cppFile, "src/main.cpp", "", "main", "int") + f.cg.AddNode(&graph.Node{ + Type: "function_definition", + Name: "legacy_init", + File: cFile, + Language: "c", + }) + + cg := f.build(t) + assert.Contains(t, cg.Functions, "src/main.cpp::main") + assert.NotContains(t, cg.Functions, "src/legacy.c::legacy_init") +}