Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
29 changes: 16 additions & 13 deletions sast-engine/graph/graph_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -589,15 +589,18 @@ func TestInitializeWithNonJavaFiles(t *testing.T) {
t.Fatal("Initialize returned nil graph")
}

expectedNodeCount := 1 // Only one Java file
if len(graph.Nodes) != expectedNodeCount {
t.Errorf("Expected %d node, but got %d", expectedNodeCount, len(graph.Nodes))
}

// .java and .cpp are both parsed; only the .txt is ignored.
// The Java file produces a class_declaration; the C++ file produces a
// function_definition for `main`.
byType := map[string]int{}
for _, node := range graph.Nodes {
if node.Type != "class_declaration" {
t.Errorf("Expected node type to be 'class', but got '%s'", node.Type)
}
byType[node.Type]++
}
if byType["class_declaration"] == 0 {
t.Errorf("expected at least one class_declaration node from File1.java; got %v", byType)
}
if byType["function_definition"] == 0 {
t.Errorf("expected at least one function_definition node from File3.cpp; got %v", byType)
}
}

Expand Down Expand Up @@ -1043,23 +1046,23 @@ func TestBuildGraphFromASTPythonFunctionDefinition(t *testing.T) {
name: "Function with parameters",
sourceCode: `def add(x, y):
return x + y`,
expectedNodeCount: 2, // function + return
expectedNodeCount: 1, // function (Python parsers do not yet emit a Node for return inside this fixture)
expectedName: "add",
expectedParams: []string{"x", "y"},
},
{
name: "Method with self parameter",
sourceCode: `def method(self, arg1, arg2):
self.value = arg1`,
expectedNodeCount: 2, // function + assignment
expectedNodeCount: 1, // function only — assignment recursion below the function does not produce a graph.Node here
expectedName: "method",
expectedParams: []string{"self", "arg1", "arg2"},
},
{
name: "Function with default parameters",
sourceCode: `def func_with_defaults(x, y=10, z=20):
return x + y + z`,
expectedNodeCount: 2, // function + return
expectedNodeCount: 1, // function only
expectedName: "func_with_defaults",
expectedParams: []string{"x", "y=10", "z=20"}, // Parser captures default values
},
Expand Down Expand Up @@ -1157,7 +1160,7 @@ func TestBuildGraphFromASTPythonClassDefinition(t *testing.T) {
sourceCode: `class MyClass:
def my_method(self):
return 42`,
expectedNodeCount: 3, // class + method + return
expectedNodeCount: 2, // class + method
expectedClassName: "MyClass",
expectedBases: []string{},
},
Expand All @@ -1166,7 +1169,7 @@ func TestBuildGraphFromASTPythonClassDefinition(t *testing.T) {
sourceCode: `class Person:
def __init__(self, name):
self.name = name`,
expectedNodeCount: 3, // class + __init__ + assignment
expectedNodeCount: 2, // class + __init__
expectedClassName: "Person",
expectedBases: []string{},
},
Expand Down
120 changes: 100 additions & 20 deletions sast-engine/graph/parser.go
Original file line number Diff line number Diff line change
Expand Up @@ -18,9 +18,12 @@ func buildGraphFromAST(node *sitter.Node, sourceCode []byte, graph *CodeGraph, c
// language. C/C++ branches come first because the dispatcher is the
// only place these node types are handled.
case "function_definition":
if isCFile {
switch {
case isCFile:
currentContext = parseCFunctionDefinition(node, sourceCode, graph, file)
} else if isPythonSourceFile {
case isCppFile:
currentContext = parseCppFunctionDefinition(node, sourceCode, graph, file, currentContext)
case isPythonSourceFile:
currentContext = parsePythonFunctionDefinition(node, sourceCode, graph, file, currentContext)
}

Expand Down Expand Up @@ -59,33 +62,50 @@ func buildGraphFromAST(node *sitter.Node, sourceCode []byte, graph *CodeGraph, c
parsePythonAssignment(node, sourceCode, graph, file, currentContext)
}

// Java-specific node types
// Java-specific node types. The C/C++ grammars emit several of these
// same node types (block, if/while/do/for, binary_expression) with
// different AST shapes, so the Java handlers must be gated by language —
// otherwise they pollute C/C++ graphs with Java-tagged nodes.
case "block":
parseBlockStatement(node, sourceCode, graph, file, isJavaSourceFile)
if isJavaSourceFile {
parseBlockStatement(node, sourceCode, graph, file, isJavaSourceFile)
}

case "yield_statement":
parseYieldStatement(node, sourceCode, graph, file, isJavaSourceFile)
if isJavaSourceFile {
parseYieldStatement(node, sourceCode, graph, file, isJavaSourceFile)
}

case "if_statement":
parseIfStatement(node, sourceCode, graph, file, isJavaSourceFile)
if isJavaSourceFile {
parseIfStatement(node, sourceCode, graph, file, isJavaSourceFile)
}
if isGoSourceFile {
parseGoIfStatement(node, sourceCode, graph, file)
}

case "while_statement":
parseWhileStatement(node, sourceCode, graph, file, isJavaSourceFile)
if isJavaSourceFile {
parseWhileStatement(node, sourceCode, graph, file, isJavaSourceFile)
}

case "do_statement":
parseDoStatement(node, sourceCode, graph, file, isJavaSourceFile)
if isJavaSourceFile {
parseDoStatement(node, sourceCode, graph, file, isJavaSourceFile)
}

case "for_statement":
parseForStatement(node, sourceCode, graph, file, isJavaSourceFile)
if isJavaSourceFile {
parseForStatement(node, sourceCode, graph, file, isJavaSourceFile)
}
if isGoSourceFile {
parseGoForStatement(node, sourceCode, graph, file)
}

case "binary_expression":
currentContext = parseJavaBinaryExpression(node, sourceCode, graph, file, isJavaSourceFile)
if isJavaSourceFile {
currentContext = parseJavaBinaryExpression(node, sourceCode, graph, file, isJavaSourceFile)
}

case "method_declaration":
if isJavaSourceFile {
Expand All @@ -98,14 +118,32 @@ func buildGraphFromAST(node *sitter.Node, sourceCode []byte, graph *CodeGraph, c
parseJavaMethodInvocation(node, sourceCode, graph, currentContext, file)

case "class_declaration":
parseJavaClassDeclaration(node, sourceCode, graph, file)
if isJavaSourceFile {
parseJavaClassDeclaration(node, sourceCode, graph, file)
}

case "block_comment":
parseJavaBlockComment(node, sourceCode, graph, file)
if isJavaSourceFile {
parseJavaBlockComment(node, sourceCode, graph, file)
}

case "local_variable_declaration", "field_declaration":
case "local_variable_declaration":
parseJavaVariableDeclaration(node, sourceCode, graph, file)

case "field_declaration":
// tree-sitter overloads field_declaration:
// - Java: class fields (handled by parseJavaVariableDeclaration)
// - C: struct fields (handled by parseCStructSpecifier via clike;
// the bare nodes here are siblings of an already-recorded
// struct, so we skip them to avoid duplicate nodes)
// - C++: data members AND inline method declarations inside a
// class body (handled by parseCppFieldDeclaration)
if isCppFile {
parseCppFieldDeclaration(node, sourceCode, graph, file, currentContext)
} else if !isCFile {
parseJavaVariableDeclaration(node, sourceCode, graph, file)
}

case "object_creation_expression":
parseJavaObjectCreation(node, sourceCode, graph, file)

Expand All @@ -121,28 +159,37 @@ func buildGraphFromAST(node *sitter.Node, sourceCode []byte, graph *CodeGraph, c
}

case "call_expression":
if isCFile {
switch {
case isCFile:
parseCCallExpression(node, sourceCode, graph, file, currentContext)
} else if isGoSourceFile {
case isCppFile:
parseCppCallExpression(node, sourceCode, graph, file, currentContext)
case isGoSourceFile:
parseGoCallExpression(node, sourceCode, graph, file, currentContext)
}

// C/C++ specific node types. struct_specifier appears in C only at the
// top level (C++ uses class_specifier for the equivalent construct);
// the remaining four are shared between C and C++.
// C and C++ shared node types — each language has its own parse
// function that sets the right Language tag and handles language-
// specific concerns (e.g. C++ struct inheritance via base_class_clause).
case "struct_specifier":
if isCFile {
parseCStructSpecifier(node, sourceCode, graph, file)
} else if isCppFile {
parseCppStructSpecifier(node, sourceCode, graph, file)
}

case "enum_specifier":
if isCFile || isCppFile {
if isCFile {
parseCEnumSpecifier(node, sourceCode, graph, file)
} else if isCppFile {
parseCppEnumSpecifier(node, sourceCode, graph, file)
}

case "type_definition":
if isCFile || isCppFile {
if isCFile {
parseCTypeDefinition(node, sourceCode, graph, file)
} else if isCppFile {
parseCppTypeDefinition(node, sourceCode, graph, file)
}

case "declaration":
Expand All @@ -155,6 +202,39 @@ func buildGraphFromAST(node *sitter.Node, sourceCode []byte, graph *CodeGraph, c
parseCLikeInclude(node, sourceCode, graph, file, isCppFile)
}

// C++-only node types. The dispatcher returns the new node from
// class_specifier and namespace_definition so the recursion picks up
// the surrounding scope as currentContext for member resolution.
case "class_specifier":
if isCppFile {
currentContext = parseCppClassSpecifier(node, sourceCode, graph, file, currentContext)
}

case "namespace_definition":
if isCppFile {
currentContext = parseCppNamespaceDefinition(node, sourceCode, graph, file, currentContext)
}

case "template_declaration":
if isCppFile {
parseCppTemplateDeclaration(node, sourceCode, graph, file)
}

case "throw_statement":
if isCppFile {
parseCppThrowStatement(node, sourceCode, graph, file, currentContext)
}

case "try_statement":
if isCppFile {
parseCppTryStatement(node, sourceCode, graph, file, currentContext)
}

case "access_specifier":
if isCppFile {
recordAccessSpecifier(node, sourceCode, currentContext)
}

case "short_var_declaration":
if isGoSourceFile {
parseGoShortVarDeclaration(node, sourceCode, graph, file)
Expand Down
Loading
Loading