Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
117 changes: 117 additions & 0 deletions engine/ast_parser.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,117 @@
import os

import tree_sitter_javascript as tsjavascript
import tree_sitter_python as tspython
import tree_sitter_typescript as tstypescript
from tree_sitter import Language, Parser

# 1. Initialize the Language Grammars
LANG_PY = Language(tspython.language())
LANG_JS = Language(tsjavascript.language())
LANG_TS = Language(tstypescript.language_typescript())
LANG_TSX = Language(tstypescript.language_tsx())


def get_parser(extension: str):
"""Returns the correct Tree-Sitter parser based on file extension."""
parser = Parser(LANG_PY) # Default fallback
if extension == ".py":
parser = Parser(LANG_PY)
elif extension in [".js", ".jsx"]:
parser = Parser(LANG_JS)
elif extension == ".ts":
parser = Parser(LANG_TS)
elif extension == ".tsx":
parser = Parser(LANG_TSX)
else:
return None
return parser


def extract_signatures(file_path: str) -> str:
"""Parses a file and extracts only structural signatures (classes, functions)."""
ext = os.path.splitext(file_path)[1].lower()
parser = get_parser(ext)

if not parser:
return ""

try:
with open(file_path, encoding="utf-8") as f:
content = f.read()
except Exception:
return ""

raw_bytes = bytes(content, "utf-8")
tree = parser.parse(raw_bytes)
stubs = []

target_nodes = {
"class_definition",
"function_definition", # Python
"class_declaration",
"function_declaration",
"method_definition", # JS/TS
"lexical_declaration", # JS/TS Const arrow functions
}
body_nodes = {"block", "statement_block", "class_body"}

def walk(node, depth=0):
indent = " " * depth

if node.type in target_nodes:
# Helper to dig into variable declarators for arrow functions
def find_body(n):
for c in n.children:
if c.type in body_nodes:
return c
if c.type in ["variable_declarator", "arrow_function"]:
res = find_body(c)
if res:
return res
return None

block_node = find_body(node)

if block_node:
# AST BYTE SLICE: Grab the signature, ignore the body!
sig_bytes = raw_bytes[node.start_byte : block_node.start_byte]
signature = sig_bytes.decode("utf-8").strip()

# Clean up trailing braces
if signature.endswith("{"):
signature = signature[:-1].strip()

stubs.append(f"{indent}{signature} ...")

# If it's a class, walk inside to get its methods
if node.type in ["class_definition", "class_declaration"]:
for child in block_node.children:
walk(child, depth + 1)
return

for child in node.children:
walk(child, depth)

walk(tree.root_node)
return "\n".join(stubs)


def generate_project_stub(directory: str) -> str:
"""Walks a directory and returns a concatenated map of all file signatures."""
project_stub = []
ignored_dirs = {".git", "node_modules", ".venv", "__pycache__", "dist", "build"}

for root, dirs, files in os.walk(directory):
dirs[:] = [d for d in dirs if d not in ignored_dirs]

for file in files:
if file.endswith((".py", ".ts", ".tsx", ".js", ".jsx")):
file_path = os.path.join(root, file)
rel_path = os.path.relpath(file_path, directory)

signatures = extract_signatures(file_path)
if signatures:
project_stub.append(f"--- {rel_path} ---\n{signatures}\n")

return "\n".join(project_stub)
11 changes: 11 additions & 0 deletions engine/runtime.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,7 @@
import re
import sys

from engine.ast_parser import generate_project_stub
from engine.llm import SMART_ROUTING, LLMClient
from engine.tools import (
AGENTS_DIR,
Expand Down Expand Up @@ -45,6 +46,16 @@ def assemble_context(agent_name):
public_dir = os.path.join(BASE_DIR, "public")
ui_components_dir = os.path.join(BASE_DIR, "src", "web", "components", "ui")

# --- SHIFT-LEFT: AST OMNISCIENCE INJECTION ---
try:
src_dir = os.path.join(BASE_DIR, "src")
if os.path.exists(src_dir):
ast_map = generate_project_stub(src_dir)
context += f"\n\n--- PROJECT ARCHITECTURE SKELETON (AST MAP) ---\n{ast_map}\n"
except Exception as e:
print(f"⚠️ Warning: AST Parsing failed. {e}")
# ---------------------------------------------

if "Strategy" in agent_name:
context += read_file(os.path.join(DOCS_DIR, "company", "thesis.md"))
feedback_log_path = os.path.join(DOCS_DIR, "company", "feedback_log.md")
Expand Down
6 changes: 5 additions & 1 deletion pyproject.toml
Original file line number Diff line number Diff line change
Expand Up @@ -9,6 +9,10 @@ dependencies = [
"fastapi>=0.110.0",
"uvicorn>=0.27.0",
"litellm>=1.0.0",
"tree-sitter>=0.25.2",
"tree-sitter-python>=0.25.0",
"tree-sitter-javascript>=0.25.0",
"tree-sitter-typescript>=0.23.2",
]

[dependency-groups]
Expand Down Expand Up @@ -44,4 +48,4 @@ markers = [
"core: core system functionality",
"integration: testing 3rd party APIs",
"eval: AI evaluation tests (makes real LLM API calls, costs money)",
]
]
97 changes: 97 additions & 0 deletions tests/engine/test_ast_parser.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,97 @@
from engine.ast_parser import extract_signatures, generate_project_stub, get_parser


def test_get_parser():
"""Ensure the engine correctly maps file extensions to Tree-Sitter grammars."""
assert get_parser(".py") is not None
assert get_parser(".js") is not None
assert get_parser(".tsx") is not None
assert get_parser(".ts") is not None
# Invalid extension should return None
assert get_parser(".txt") is None


def test_extract_signatures_python(tmp_path):
"""Ensure it extracts Python classes and functions but drops their bodies."""
py_file = tmp_path / "main.py"
py_file.write_text(
"class Forge:\n"
" def build(self):\n"
" print('Building...')\n"
" return True\n"
"\n"
"def helper():\n"
" pass\n",
encoding="utf-8",
)

result = extract_signatures(str(py_file))

assert "class Forge:" in result
assert "def build(self):" in result
assert "def helper():" in result
assert "print('Building...')" not in result # The body must be stripped!


def test_extract_signatures_typescript(tmp_path):
"""Ensure it extracts TS/JS structural definitions and arrow functions."""
ts_file = tmp_path / "app.ts"
ts_file.write_text(
"export class Engine {\n"
" start() {\n"
" console.log('Vroom');\n"
" }\n"
"}\n"
"\n"
"export const helper = () => {\n"
" return false;\n"
"};\n",
encoding="utf-8",
)

result = extract_signatures(str(ts_file))

# Tree-Sitter parses 'export' as a parent wrapper, so the pure declaration drops it.
assert "class Engine" in result
assert "start()" in result
assert "const helper = () =>" in result
assert "console.log('Vroom')" not in result # The body must be stripped!


def test_extract_signatures_invalid_file(tmp_path):
"""Ensure the engine fails gracefully on missing files."""
invalid_path = tmp_path / "does_not_exist.py"
assert extract_signatures(str(invalid_path)) == ""


def test_generate_project_stub(tmp_path):
"""Ensure the directory walker correctly concatenates an entire repository."""
# Setup mock src directory
src_dir = tmp_path / "src"
src_dir.mkdir()

(src_dir / "api.py").write_text("def fetch_data():\n pass\n", encoding="utf-8")

# Broken into multiple lines to satisfy Ruff E501
(src_dir / "ui.tsx").write_text(
"export function Button() {\n return null;\n}\n", encoding="utf-8"
)

# Setup ignored directory
node_modules = tmp_path / "node_modules"
node_modules.mkdir()
(node_modules / "junk.js").write_text("function ignoreMe() {}", encoding="utf-8")

result = generate_project_stub(str(tmp_path))

# Assert it found the valid files and built the headers
assert "--- src" in result
assert "api.py" in result
assert "ui.tsx" in result

# Assert it extracted the signatures
assert "def fetch_data():" in result
assert "function Button()" in result # pure declaration drops the export wrapper

# Assert it ignored node_modules
assert "ignoreMe" not in result
Loading
Loading