Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 0 additions & 2 deletions .github/workflows/test.yml
Original file line number Diff line number Diff line change
Expand Up @@ -222,8 +222,6 @@ jobs:

- name: Run pytest (v2 suite) with coverage
timeout-minutes: 15
env:
ANTHROPIC_API_KEY: ${{ secrets.ANTHROPIC_API_KEY }}
run: |
uv run pytest tests/ \
--ignore=tests/e2e \
Expand Down
8 changes: 8 additions & 0 deletions CLAUDE.md
Original file line number Diff line number Diff line change
Expand Up @@ -158,6 +158,7 @@ cf tasks show <id>
# Work — single task
cf work start <task-id> [--execute] [--engine react|plan] [--verbose] [--dry-run]
cf work start <task-id> --execute --stall-timeout 120 --stall-action retry|blocker|fail
cf work start <task-id> --execute --llm-provider openai --llm-model gpt-4o
cf work stop <task-id>
cf work resume <task-id>
cf work follow <task-id> [--tail 50]
Expand All @@ -166,6 +167,7 @@ cf work diagnose <task-id>
# Work — batch
cf work batch run [<id>...] [--all-ready] [--engine react|plan]
cf work batch run --strategy serial|parallel|auto [--max-parallel 4] [--retry 3]
cf work batch run --all-ready --llm-provider openai --llm-model qwen2.5-coder:7b
cf work batch status|cancel|resume [batch_id]

# Blockers
Expand Down Expand Up @@ -241,10 +243,16 @@ E2B_API_KEY=e2b_... # Required for --engine cloud
DATABASE_PATH=./codeframe.db # Optional

# LLM Provider selection (multi-provider support)
# Priority: CLI flag > env var > .codeframe/config.yaml > default (anthropic)
CODEFRAME_LLM_PROVIDER=anthropic # Provider: anthropic (default), openai, ollama, vllm, compatible
CODEFRAME_LLM_MODEL=gpt-4o # Model override (used with openai/ollama/vllm/compatible)
OPENAI_API_KEY=sk-... # Required for openai provider; not needed for local providers
OPENAI_BASE_URL=http://localhost:11434/v1 # Base URL override (for ollama, vllm, or custom endpoints)
# Per-workspace config: .codeframe/config.yaml supports llm: block
# llm:
# provider: openai
# model: qwen2.5-coder:7b
# base_url: http://localhost:11434/v1 # optional, for local models

# Optional — Rate limiting
RATE_LIMIT_ENABLED=true
Expand Down
52 changes: 52 additions & 0 deletions codeframe/adapters/llm/anthropic.py
Original file line number Diff line number Diff line change
Expand Up @@ -64,6 +64,7 @@ def __init__(
"or configure via 'codeframe auth setup --provider anthropic'."
)
self._client = None
self._async_client = None

@property
def client(self):
Expand Down Expand Up @@ -120,6 +121,57 @@ def complete(
# Parse response
return self._parse_response(response)

async def async_complete(
self,
messages: list[dict],
purpose: Purpose = Purpose.EXECUTION,
tools: Optional[list[Tool]] = None,
max_tokens: int = 4096,
temperature: float = 0.0,
system: Optional[str] = None,
) -> LLMResponse:
"""True async completion via AsyncAnthropic.

Raises LLMAuthError / LLMRateLimitError / LLMConnectionError on failure.
"""
from anthropic import AsyncAnthropic
from anthropic import (
AuthenticationError,
RateLimitError,
APIConnectionError,
)
from codeframe.adapters.llm.base import (
LLMAuthError,
LLMRateLimitError,
LLMConnectionError,
)

if self._async_client is None:
self._async_client = AsyncAnthropic(api_key=self.api_key)

model = self.get_model(purpose)
kwargs: dict = {
"model": model,
"max_tokens": max_tokens,
"messages": self._convert_messages(messages),
}
if temperature > 0:
kwargs["temperature"] = temperature
if system:
kwargs["system"] = system
if tools:
kwargs["tools"] = self._convert_tools(tools)

try:
response = await self._async_client.messages.create(**kwargs)
return self._parse_response(response)
except AuthenticationError as exc:
raise LLMAuthError(str(exc)) from exc
except RateLimitError as exc:
raise LLMRateLimitError(str(exc)) from exc
except APIConnectionError as exc:
raise LLMConnectionError(str(exc)) from exc

Comment on lines +124 to +174
Copy link
Copy Markdown
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

⚠️ Potential issue | 🟠 Major

🧩 Analysis chain

🏁 Script executed:

# First, let's check the current state of the complete() method in anthropic.py
cd codeframe/adapters/llm && wc -l anthropic.py

Repository: frankbria/codeframe

Length of output: 79


🏁 Script executed:

# Get the full anthropic.py to see both complete() and async_complete() methods
cat -n codeframe/adapters/llm/anthropic.py | head -150

Repository: frankbria/codeframe

Length of output: 5551


🏁 Script executed:

# Search for the complete() method to see if it has exception mapping
rg "def complete" codeframe/adapters/llm/anthropic.py -A 30

Repository: frankbria/codeframe

Length of output: 1003


🏁 Script executed:

# Check the base adapter interface to understand the contract
cat codeframe/adapters/llm/base.py

Repository: frankbria/codeframe

Length of output: 10579


🏁 Script executed:

# Search for SupervisorResolver and _classify_with_supervision
fd -t f "*.py" | xargs rg "SupervisorResolver" --files-with-matches

Repository: frankbria/codeframe

Length of output: 459


🏁 Script executed:

# Look at the SupervisorResolver implementation
rg "class SupervisorResolver" -A 50

Repository: frankbria/codeframe

Length of output: 3215


🏁 Script executed:

# Search for _classify_with_supervision in conductor.py
rg "_classify_with_supervision" codeframe/core/conductor.py -A 20

Repository: frankbria/codeframe

Length of output: 1830


🏁 Script executed:

# Get more context around SupervisorResolver methods
rg "def " codeframe/core/conductor.py | grep -A 5 -B 5 "SupervisorResolver"

Repository: frankbria/codeframe

Length of output: 664


🏁 Script executed:

# Check what exceptions the base interface expects complete() to handle or raise
rg "def complete" codeframe/adapters/llm/base.py -A 15

Repository: frankbria/codeframe

Length of output: 599


🏁 Script executed:

# Get the full _classify_with_supervision method to see exception handling
rg "_classify_with_supervision" codeframe/core/conductor.py -A 40

Repository: frankbria/codeframe

Length of output: 3896


Apply the same exception mapping to complete().

The sync complete() method (lines 77–121) does not normalize Anthropic SDK exceptions to the base adapter interface, while async_complete() implements full exception mapping (AuthenticationErrorLLMAuthError, RateLimitErrorLLMRateLimitError, APIConnectionErrorLLMConnectionError).

This creates divergent error contracts between sync and async paths. SupervisorResolver._classify_with_supervision() calls complete() and catches all exceptions broadly, falling back to a heuristic—behavior that masks authentication and rate-limit failures, which should fail fast rather than silently degrade. Per coding guidelines, adapters must implement the base interface uniformly for integration consistency.

🤖 Prompt for AI Agents
Verify each finding against the current code and only fix it if needed.

In `@codeframe/adapters/llm/anthropic.py` around lines 123 - 173, The sync method
complete() currently fails to map Anthropic SDK exceptions to the adapter's base
exceptions (AuthenticationError → LLMAuthError, RateLimitError →
LLMRateLimitError, APIConnectionError → LLMConnectionError) unlike
async_complete(), causing inconsistent contracts; update complete() to import
the same SDK exceptions (AuthenticationError, RateLimitError,
APIConnectionError) and wrap its call to self._client.messages.create(...) in a
try/except that catches those three exceptions and re-raises them as
LLMAuthError, LLMRateLimitError, and LLMConnectionError respectively (preserve
original message and use "from exc"), mirroring the async_complete() behavior
and keeping function name complete() and parsing logic (_parse_response)
unchanged.

def stream(
self,
messages: list[dict],
Expand Down
55 changes: 55 additions & 0 deletions codeframe/adapters/llm/base.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,13 +4,35 @@
along with shared data structures for requests and responses.
"""

import asyncio
import os
from abc import ABC, abstractmethod
from dataclasses import dataclass, field
from enum import Enum
from typing import Iterator, Optional


# ---------------------------------------------------------------------------
# Common exception hierarchy
# ---------------------------------------------------------------------------


class LLMError(Exception):
"""Base exception for LLM provider errors."""


class LLMAuthError(LLMError):
"""Authentication failure (bad key, expired token, etc.)."""


class LLMRateLimitError(LLMError):
"""Rate limit exceeded — caller may retry after a backoff."""


class LLMConnectionError(LLMError):
"""Network or connection error."""


class Purpose(str, Enum):
"""Purpose of an LLM call, used for model selection."""

Expand Down Expand Up @@ -277,6 +299,39 @@ def stream(
)
yield response.content

async def async_complete(
self,
messages: list[dict],
purpose: Purpose = Purpose.EXECUTION,
tools: Optional[list["Tool"]] = None,
max_tokens: int = 4096,
temperature: float = 0.0,
system: Optional[str] = None,
) -> "LLMResponse":
"""Async completion.

Default implementation wraps the synchronous :meth:`complete` in a
thread-pool executor so it never blocks the event loop. Subclasses
should override this with a truly async implementation when the
underlying SDK supports it.

Args:
messages: Conversation messages
purpose: Purpose of call (for model selection)
tools: Available tools for the model to use
max_tokens: Maximum tokens to generate
temperature: Sampling temperature
system: System prompt

Returns:
LLMResponse with content and/or tool calls
"""
loop = asyncio.get_running_loop()
return await loop.run_in_executor(
None,
lambda: self.complete(messages, purpose, tools, max_tokens, temperature, system),
)

def get_model(self, purpose: Purpose) -> str:
"""Get the model for a given purpose.

Expand Down
50 changes: 50 additions & 0 deletions codeframe/adapters/llm/openai.py
Original file line number Diff line number Diff line change
Expand Up @@ -78,6 +78,7 @@ def __init__(
)

self._client = None
self._async_client = None

def get_model(self, purpose: Purpose) -> str:
"""Return the model for a given purpose.
Expand Down Expand Up @@ -146,6 +147,55 @@ def complete(

return self._parse_response(response)

async def async_complete(
self,
messages: list[dict],
purpose: Purpose = Purpose.EXECUTION,
tools: Optional[list[Tool]] = None,
max_tokens: int = 4096,
temperature: float = 0.0,
system: Optional[str] = None,
) -> LLMResponse:
"""True async completion via openai.AsyncOpenAI.

Raises LLMAuthError / LLMRateLimitError / LLMConnectionError on failure.
"""
import openai as _openai
from codeframe.adapters.llm.base import (
LLMAuthError,
LLMRateLimitError,
LLMConnectionError,
)

if self._async_client is None:
self._async_client = _openai.AsyncOpenAI(
api_key=self.api_key, base_url=self.base_url
)

converted = self._convert_messages(messages)
if system:
converted = [{"role": "system", "content": system}] + converted

kwargs: dict = {
"model": self.get_model(purpose),
"max_tokens": max_tokens,
"messages": converted,
"temperature": temperature,
}
if tools:
kwargs["tools"] = self._convert_tools(tools)
kwargs["tool_choice"] = "auto"

try:
response = await self._async_client.chat.completions.create(**kwargs)
return self._parse_response(response)
except _openai.AuthenticationError as exc:
raise LLMAuthError(str(exc)) from exc
except _openai.RateLimitError as exc:
raise LLMRateLimitError(str(exc)) from exc
except _openai.APIConnectionError as exc:
raise LLMConnectionError(str(exc)) from exc

Comment on lines +150 to +198
Copy link
Copy Markdown
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

⚠️ Potential issue | 🟠 Major

🧩 Analysis chain

🏁 Script executed:

# First, let's examine the complete() method in openai.py
rg -A 50 "def complete\(" codeframe/adapters/llm/openai.py

Repository: frankbria/codeframe

Length of output: 1820


🏁 Script executed:

# Also check the base adapter interface to understand the contract
cat -n codeframe/adapters/llm/base.py | head -100

Repository: frankbria/codeframe

Length of output: 4170


🏁 Script executed:

# Let's get the full structure of the openai.py file to understand the class
wc -l codeframe/adapters/llm/openai.py

Repository: frankbria/codeframe

Length of output: 99


🏁 Script executed:

# Let's check the base class structure to see if there are abstract method definitions
rg -B 5 -A 20 "def complete|def async_complete" codeframe/adapters/llm/base.py

Repository: frankbria/codeframe

Length of output: 1840


🏁 Script executed:

# Let's also verify the OpenAIProvider class definition to see if it implements the interface
rg -B 2 "class OpenAIProvider" codeframe/adapters/llm/openai.py

Repository: frankbria/codeframe

Length of output: 99


🏁 Script executed:

# Let's verify what the LLMProvider base class looks like to confirm the contract
rg -B 5 -A 30 "class LLMProvider" codeframe/adapters/llm/base.py

Repository: frankbria/codeframe

Length of output: 1109


🏁 Script executed:

# Let's check if there are other adapter implementations to see how they handle exceptions
fd "\.py$" codeframe/adapters/llm/ | grep -v base.py | grep -v openai.py

Repository: frankbria/codeframe

Length of output: 164


🏁 Script executed:

# Check the Anthropic adapter's exception handling for both sync and async
rg -B 5 -A 30 "def complete\(|def async_complete\(" codeframe/adapters/llm/anthropic.py | head -150

Repository: frankbria/codeframe

Length of output: 2277


🏁 Script executed:

# Check the mock adapter as well to understand the pattern
rg -B 5 -A 30 "def complete\(|def async_complete\(" codeframe/adapters/llm/mock.py

Repository: frankbria/codeframe

Length of output: 1209


🏁 Script executed:

# Let's also check if there are any callers of the sync complete() method to understand the impact
rg "\.complete\(" codeframe/ --type py | grep -v "async_complete" | head -20

Repository: frankbria/codeframe

Length of output: 1498


Sync and async paths have divergent exception contracts.

async_complete() raises LLMAuthError, LLMRateLimitError, and LLMConnectionError, but complete() catches the same OpenAI SDK exceptions and converts them to ValueError. Callers using the sync path will handle different exception types than those using the async path, violating the uniform adapter interface requirement stated in the coding guidelines: "Adapters (LLM providers in codeframe/adapters/llm/) must implement the base adapter interface for uniform integration."

🤖 Prompt for AI Agents
Verify each finding against the current code and only fix it if needed.

In `@codeframe/adapters/llm/openai.py` around lines 149 - 197, The sync path
(complete()) currently maps OpenAI SDK exceptions to ValueError while
async_complete() maps them to LLMAuthError / LLMRateLimitError /
LLMConnectionError, causing divergent contracts; update complete() to import and
raise the same adapter exceptions (LLMAuthError, LLMRateLimitError,
LLMConnectionError) when catching _openai.AuthenticationError,
_openai.RateLimitError, and _openai.APIConnectionError respectively so both
complete() and async_complete() present the same exception types to callers
(refer to the methods complete and async_complete and the exception classes
LLMAuthError, LLMRateLimitError, LLMConnectionError).

def stream(
self,
messages: list[dict],
Expand Down
19 changes: 7 additions & 12 deletions codeframe/agents/frontend_worker_agent.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,14 +5,13 @@
following project conventions (Tailwind CSS, functional components).
"""

import os
import json
import logging
import asyncio
from pathlib import Path
from typing import Dict, Any, Optional
from anthropic import AsyncAnthropic

from codeframe.adapters.llm.base import Purpose
from codeframe.core.models import Task, AgentMaturity
from codeframe.agents.worker_agent import WorkerAgent

Expand Down Expand Up @@ -59,8 +58,8 @@ def __init__(
system_prompt=self._build_system_prompt(),
db=db,
)
self.api_key = api_key or os.getenv("ANTHROPIC_API_KEY")
self.client = AsyncAnthropic(api_key=self.api_key) if self.api_key else None
# api_key kept for backwards compatibility; LLM calls use self.llm_provider
self.api_key = api_key
self.websocket_manager = websocket_manager
self.project_root = Path(__file__).parent.parent.parent # codeframe/
self.web_ui_root = self.project_root / "web-ui"
Expand Down Expand Up @@ -293,10 +292,6 @@ async def _generate_react_component(self, spec: Dict[str, Any]) -> str:
Returns:
Component code as string
"""
if not self.client:
# Fallback: generate basic component template
return self._generate_basic_component_template(spec)

prompt = f"""Generate a React functional component with the following specification:

Component Name: {spec['name']}
Expand All @@ -312,14 +307,14 @@ async def _generate_react_component(self, spec: Dict[str, Any]) -> str:
Provide ONLY the component code, no explanations."""

try:
response = await self.client.messages.create(
model="claude-3-5-sonnet-20241022",
max_tokens=2000,
response = await self.llm_provider.async_complete(
messages=[{"role": "user", "content": prompt}],
purpose=Purpose.GENERATION,
max_tokens=2000,
)

# Extract code from response
code = response.content[0].text
code = response.content

# Remove markdown code blocks if present
if "```" in code:
Expand Down
29 changes: 11 additions & 18 deletions codeframe/agents/test_worker_agent.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,6 @@
analyzing code for test requirements, and self-correcting failing tests.
"""

import os
import sys
import json
import logging
Expand All @@ -14,8 +13,8 @@
import re
from pathlib import Path
from typing import Dict, Any, Optional, Tuple
from anthropic import AsyncAnthropic

from codeframe.adapters.llm.base import Purpose
from codeframe.core.models import Task, AgentMaturity
from codeframe.agents.worker_agent import WorkerAgent

Expand Down Expand Up @@ -67,8 +66,8 @@ def __init__(
system_prompt=self._build_system_prompt(),
db=db,
)
self.api_key = api_key or os.getenv("ANTHROPIC_API_KEY")
self.client = AsyncAnthropic(api_key=self.api_key) if self.api_key else None
# api_key kept for backwards compatibility; LLM calls use self.llm_provider
self.api_key = api_key
self.websocket_manager = websocket_manager
self.max_correction_attempts = max_correction_attempts
self.project_root = Path(__file__).parent.parent.parent
Expand Down Expand Up @@ -321,9 +320,6 @@ async def _generate_pytest_tests(
Returns:
Generated test code
"""
if not self.client:
return self._generate_basic_test_template(spec, code_analysis)

# Build context from code analysis
context = ""
if code_analysis.get("functions"):
Expand Down Expand Up @@ -351,13 +347,13 @@ async def _generate_pytest_tests(
Provide ONLY the test code, no explanations."""

try:
response = await self.client.messages.create(
model="claude-3-5-sonnet-20241022",
max_tokens=3000,
response = await self.llm_provider.async_complete(
messages=[{"role": "user", "content": prompt}],
purpose=Purpose.GENERATION,
max_tokens=3000,
)

code = response.content[0].text
code = response.content

# Remove markdown code blocks
if "```" in code:
Expand Down Expand Up @@ -671,9 +667,6 @@ async def _correct_failing_tests(
Returns:
Corrected test code or None
"""
if not self.client:
return None

prompt = f"""Fix the following failing pytest tests:

Original Test Code:
Expand All @@ -696,13 +689,13 @@ async def _correct_failing_tests(
Provide ONLY the corrected test code, no explanations."""

try:
response = await self.client.messages.create(
model="claude-3-5-sonnet-20241022",
max_tokens=3000,
response = await self.llm_provider.async_complete(
messages=[{"role": "user", "content": prompt}],
purpose=Purpose.CORRECTION,
max_tokens=3000,
)

code = response.content[0].text
code = response.content

# Remove markdown code blocks
if "```" in code:
Expand Down
Loading
Loading