From c0a547460789e446bc2b61eb52058276535678c0 Mon Sep 17 00:00:00 2001 From: Daniel Casper Date: Tue, 5 May 2026 01:10:31 -0500 Subject: [PATCH 1/5] feat: force engineering to fix lint errors before handoff --- orchestrator.py | 48 ++++++++++++++++++++++++++++++++ pyproject.toml | 3 +- tests/evals/test_orchestrator.py | 42 ++++++++++++++++++++++++++++ 3 files changed, 92 insertions(+), 1 deletion(-) diff --git a/orchestrator.py b/orchestrator.py index 5e7adb2..0e7cb6e 100644 --- a/orchestrator.py +++ b/orchestrator.py @@ -526,6 +526,42 @@ def extract_routing_queue(response_text): return None +def auto_lint_file(filepath): + """Zero-Cost Pre-Audit: Automatically lints files immediately after they are written.""" + abs_path = os.path.join(BASE_DIR, filepath) if not os.path.isabs(filepath) else filepath + ext = os.path.splitext(abs_path)[1] + + args = [] + if ext == ".py": + args = ["uv", "run", "ruff", "check", "--no-cache", abs_path] + elif ext in [".ts", ".tsx", ".js", ".jsx"]: + # Forge uses Biome for JS/TS + args = ["npx", "biome", "check", abs_path] + else: + return None # No auto-linter for this file type + + # Cross-Platform Executable Resolution (Windows Support) + if os.name == "nt": + executable = shutil.which(args[0]) + if executable: + args[0] = executable + + try: + result = subprocess.run( + args, + capture_output=True, + text=True, + encoding="utf-8", + timeout=30, + shell=False + ) + if result.returncode != 0: + return f"[⚠️ AUTO-LINT FAILED on {filepath}]:\n{result.stdout}\n{result.stderr}\nFix this syntax error before proceeding." + return f"[✅ AUTO-LINT PASSED for {filepath}]" + except Exception as e: + return f"[⚠️ AUTO-LINT EXECUTION ERROR on {filepath}]: {e}" + + def execute_autonomous_actions(response_text): """Scans the AI's response for a JSON payload and executes the sandbox tools.""" # Look for a JSON block explicitly tagged for the OS @@ -547,6 +583,12 @@ def execute_autonomous_actions(response_text): if path and content: result = write_file(path, content) execution_logs.append(result) + + # --- SHIFT-LEFT: FORGE AUTO-LINTING --- + if "SUCCESS" in result: + lint_result = auto_lint_file(path) + if lint_result: + execution_logs.append(lint_result) # 1.5 Execute File Appends (Scalpel) if "append_to_file" in payload: @@ -556,6 +598,12 @@ def execute_autonomous_actions(response_text): if path and content: result = append_file(path, content) execution_logs.append(result) + + # --- SHIFT-LEFT: FORGE AUTO-LINTING --- + if "SUCCESS" in result: + lint_result = auto_lint_file(path) + if lint_result: + execution_logs.append(lint_result) # 2. Execute Shell Commands (Testing/Linting) if "run_commands" in payload: diff --git a/pyproject.toml b/pyproject.toml index 479320a..724b5ec 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -18,6 +18,7 @@ dev = [ "pytest-cov>=5.0.0", "ruff>=0.3.0", "httpx>=0.27.0", + "pytest-mock>=3.15.1", ] [tool.ruff] @@ -41,4 +42,4 @@ markers = [ "core: core system functionality", "integration: testing 3rd party APIs", "eval: AI evaluation tests (makes real LLM API calls, costs money)", -] \ No newline at end of file +] diff --git a/tests/evals/test_orchestrator.py b/tests/evals/test_orchestrator.py index 72969f3..c843e49 100644 --- a/tests/evals/test_orchestrator.py +++ b/tests/evals/test_orchestrator.py @@ -206,3 +206,45 @@ def test_log_jsonl_telemetry(tmp_path, monkeypatch) -> None: assert data["agent"] == "Engineering" assert data["response"] == "response_text" assert data["prompt_tokens"] == 10 + + +def test_auto_lint_file_python_success(mocker): + """Ensure the Forge auto-linter correctly triggers ruff for Python files and passes.""" + from orchestrator import auto_lint_file + + mock_run = mocker.patch("subprocess.run") + # Simulate a successful ruff check (exit code 0) + mock_run.return_value = mocker.MagicMock(returncode=0) + + result = auto_lint_file("src/api/main.py") + + assert "✅ AUTO-LINT PASSED" in result + mock_run.assert_called_once() + + # Prove it specifically chose the Python linter + called_command = mock_run.call_args[0][0] + assert "ruff" in called_command + assert "check" in called_command + +def test_auto_lint_file_typescript_failure(mocker): + """Ensure the Forge auto-linter triggers biome for TS files and catches syntax errors.""" + from orchestrator import auto_lint_file + + mock_run = mocker.patch("subprocess.run") + # Simulate a failed biome check (exit code 1) + mock_run.return_value = mocker.MagicMock( + returncode=1, + stdout="Expected an identifier, but found '}'", + stderr="" + ) + + result = auto_lint_file("src/web/components/ui/button.tsx") + + assert "⚠️ AUTO-LINT FAILED" in result + assert "Expected an identifier" in result + mock_run.assert_called_once() + + # Prove it specifically chose the Frontend linter + called_command = mock_run.call_args[0][0] + assert "biome" in called_command + assert "check" in called_command \ No newline at end of file From 08bbdae27487f04d1e7128f3ff5c60cb72cc488f Mon Sep 17 00:00:00 2001 From: Daniel Casper Date: Tue, 5 May 2026 01:13:36 -0500 Subject: [PATCH 2/5] fix lint error --- orchestrator.py | 10 ++++++++-- 1 file changed, 8 insertions(+), 2 deletions(-) diff --git a/orchestrator.py b/orchestrator.py index 0e7cb6e..d932278 100644 --- a/orchestrator.py +++ b/orchestrator.py @@ -547,7 +547,8 @@ def auto_lint_file(filepath): args[0] = executable try: - result = subprocess.run( + # noqa: S603 tells the linter we explicitly control the args array + result = subprocess.run( # noqa: S603 args, capture_output=True, text=True, @@ -556,7 +557,12 @@ def auto_lint_file(filepath): shell=False ) if result.returncode != 0: - return f"[⚠️ AUTO-LINT FAILED on {filepath}]:\n{result.stdout}\n{result.stderr}\nFix this syntax error before proceeding." + # Wrap the long string in parentheses to comply with the 100-char limit + return ( + f"[⚠️ AUTO-LINT FAILED on {filepath}]:\n" + f"{result.stdout}\n{result.stderr}\n" + "Fix this syntax error before proceeding." + ) return f"[✅ AUTO-LINT PASSED for {filepath}]" except Exception as e: return f"[⚠️ AUTO-LINT EXECUTION ERROR on {filepath}]: {e}" From 98d926e35bc4a5875ae0eb1ff0755d66ad768cac Mon Sep 17 00:00:00 2001 From: Daniel Casper Date: Tue, 5 May 2026 01:15:45 -0500 Subject: [PATCH 3/5] fix another lint error --- pyproject.toml | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/pyproject.toml b/pyproject.toml index 724b5ec..1537c27 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -37,9 +37,9 @@ ignore = [] [tool.pytest.ini_options] addopts = "-v --strict-markers --cov=orchestrator --cov-report=term-missing --cov-fail-under=40" pythonpath = "." -testpaths = ["tests/api"] +testpaths = ["tests/api", "tests/evals"] markers = [ "core: core system functionality", "integration: testing 3rd party APIs", "eval: AI evaluation tests (makes real LLM API calls, costs money)", -] +] \ No newline at end of file From 54b42c3f8aecdc79966e32a5a52f66592a6eb0a5 Mon Sep 17 00:00:00 2001 From: Daniel Casper Date: Tue, 5 May 2026 01:22:54 -0500 Subject: [PATCH 4/5] fix test errors --- pyproject.toml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pyproject.toml b/pyproject.toml index 1537c27..0cf6342 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -35,7 +35,7 @@ ignore = [] "tests/**/*.py" = ["S101"] [tool.pytest.ini_options] -addopts = "-v --strict-markers --cov=orchestrator --cov-report=term-missing --cov-fail-under=40" +addopts = "-v -m \"not eval\" --strict-markers --cov=orchestrator --cov-report=term-missing --cov-fail-under=40" pythonpath = "." testpaths = ["tests/api", "tests/evals"] markers = [ From df9a79bc650ddd2433c659ba32a460cd7e772ccb Mon Sep 17 00:00:00 2001 From: Daniel Casper Date: Tue, 5 May 2026 01:32:39 -0500 Subject: [PATCH 5/5] fix last test stuff --- orchestrator.py | 23 +++++++++-------------- tests/evals/test_orchestrator.py | 23 +++++++++++------------ 2 files changed, 20 insertions(+), 26 deletions(-) diff --git a/orchestrator.py b/orchestrator.py index d932278..3360c2f 100644 --- a/orchestrator.py +++ b/orchestrator.py @@ -304,11 +304,11 @@ def run_shell_command(command: str) -> str: # noqa: S603 tells the linter we have explicitly sandboxed this input result = subprocess.run( # noqa: S603 - args, - capture_output=True, + args, + capture_output=True, text=True, - encoding="utf-8", - timeout=60, + encoding="utf-8", + timeout=60, shell=False, ) @@ -368,7 +368,7 @@ def get_active_artifacts(): # SHIFT-LEFT: Match any project file path anywhere in the document. # Pattern matches common project paths: docs/, src/, public/, tests/ with typical extensions. paths = re.findall(r"(?:docs|src|public|tests)[a-zA-Z0-9_./-]+\.[a-zA-Z0-9]+", content) - + for path in set(paths): # Deduplicate identical paths if "current_run.md" not in path: artifacts.append(path) @@ -530,7 +530,7 @@ def auto_lint_file(filepath): """Zero-Cost Pre-Audit: Automatically lints files immediately after they are written.""" abs_path = os.path.join(BASE_DIR, filepath) if not os.path.isabs(filepath) else filepath ext = os.path.splitext(abs_path)[1] - + args = [] if ext == ".py": args = ["uv", "run", "ruff", "check", "--no-cache", abs_path] @@ -549,12 +549,7 @@ def auto_lint_file(filepath): try: # noqa: S603 tells the linter we explicitly control the args array result = subprocess.run( # noqa: S603 - args, - capture_output=True, - text=True, - encoding="utf-8", - timeout=30, - shell=False + args, capture_output=True, text=True, encoding="utf-8", timeout=30, shell=False ) if result.returncode != 0: # Wrap the long string in parentheses to comply with the 100-char limit @@ -589,7 +584,7 @@ def execute_autonomous_actions(response_text): if path and content: result = write_file(path, content) execution_logs.append(result) - + # --- SHIFT-LEFT: FORGE AUTO-LINTING --- if "SUCCESS" in result: lint_result = auto_lint_file(path) @@ -604,7 +599,7 @@ def execute_autonomous_actions(response_text): if path and content: result = append_file(path, content) execution_logs.append(result) - + # --- SHIFT-LEFT: FORGE AUTO-LINTING --- if "SUCCESS" in result: lint_result = auto_lint_file(path) diff --git a/tests/evals/test_orchestrator.py b/tests/evals/test_orchestrator.py index c843e49..9b407cf 100644 --- a/tests/evals/test_orchestrator.py +++ b/tests/evals/test_orchestrator.py @@ -211,40 +211,39 @@ def test_log_jsonl_telemetry(tmp_path, monkeypatch) -> None: def test_auto_lint_file_python_success(mocker): """Ensure the Forge auto-linter correctly triggers ruff for Python files and passes.""" from orchestrator import auto_lint_file - + mock_run = mocker.patch("subprocess.run") # Simulate a successful ruff check (exit code 0) mock_run.return_value = mocker.MagicMock(returncode=0) - + result = auto_lint_file("src/api/main.py") - + assert "✅ AUTO-LINT PASSED" in result mock_run.assert_called_once() - + # Prove it specifically chose the Python linter called_command = mock_run.call_args[0][0] assert "ruff" in called_command assert "check" in called_command + def test_auto_lint_file_typescript_failure(mocker): """Ensure the Forge auto-linter triggers biome for TS files and catches syntax errors.""" from orchestrator import auto_lint_file - + mock_run = mocker.patch("subprocess.run") # Simulate a failed biome check (exit code 1) mock_run.return_value = mocker.MagicMock( - returncode=1, - stdout="Expected an identifier, but found '}'", - stderr="" + returncode=1, stdout="Expected an identifier, but found '}'", stderr="" ) - + result = auto_lint_file("src/web/components/ui/button.tsx") - + assert "⚠️ AUTO-LINT FAILED" in result assert "Expected an identifier" in result mock_run.assert_called_once() - + # Prove it specifically chose the Frontend linter called_command = mock_run.call_args[0][0] assert "biome" in called_command - assert "check" in called_command \ No newline at end of file + assert "check" in called_command