frankbria · frankbria · Apr 6, 2026 · Apr 5, 2026 · Apr 5, 2026 · Apr 5, 2026
diff --git a/.github/workflows/test.yml b/.github/workflows/test.yml
@@ -222,8 +222,6 @@ jobs:
 
       - name: Run pytest (v2 suite) with coverage
         timeout-minutes: 15
-        env:
-          ANTHROPIC_API_KEY: ${{ secrets.ANTHROPIC_API_KEY }}
         run: |
           uv run pytest tests/ \
             --ignore=tests/e2e \

diff --git a/CLAUDE.md b/CLAUDE.md
@@ -158,6 +158,7 @@ cf tasks show <id>
 # Work — single task
 cf work start <task-id> [--execute] [--engine react|plan] [--verbose] [--dry-run]
 cf work start <task-id> --execute --stall-timeout 120 --stall-action retry|blocker|fail
+cf work start <task-id> --execute --llm-provider openai --llm-model gpt-4o
 cf work stop <task-id>
 cf work resume <task-id>
 cf work follow <task-id> [--tail 50]
@@ -166,6 +167,7 @@ cf work diagnose <task-id>
 # Work — batch
 cf work batch run [<id>...] [--all-ready] [--engine react|plan]
 cf work batch run --strategy serial|parallel|auto [--max-parallel 4] [--retry 3]
+cf work batch run --all-ready --llm-provider openai --llm-model qwen2.5-coder:7b
 cf work batch status|cancel|resume [batch_id]
 
 # Blockers
@@ -241,10 +243,16 @@ E2B_API_KEY=e2b_...                   # Required for --engine cloud
 DATABASE_PATH=./codeframe.db          # Optional
 
 # LLM Provider selection (multi-provider support)
+# Priority: CLI flag > env var > .codeframe/config.yaml > default (anthropic)
 CODEFRAME_LLM_PROVIDER=anthropic      # Provider: anthropic (default), openai, ollama, vllm, compatible
 CODEFRAME_LLM_MODEL=gpt-4o            # Model override (used with openai/ollama/vllm/compatible)
 OPENAI_API_KEY=sk-...                 # Required for openai provider; not needed for local providers
 OPENAI_BASE_URL=http://localhost:11434/v1  # Base URL override (for ollama, vllm, or custom endpoints)
+# Per-workspace config: .codeframe/config.yaml supports llm: block
+# llm:
+#   provider: openai
+#   model: qwen2.5-coder:7b
+#   base_url: http://localhost:11434/v1   # optional, for local models
 
 # Optional — Rate limiting
 RATE_LIMIT_ENABLED=true

diff --git a/codeframe/adapters/llm/anthropic.py b/codeframe/adapters/llm/anthropic.py
@@ -64,6 +64,7 @@ def __init__(
                 "or configure via 'codeframe auth setup --provider anthropic'."
             )
         self._client = None
+        self._async_client = None
 
     @property
     def client(self):
@@ -120,6 +121,57 @@ def complete(
         # Parse response
         return self._parse_response(response)
 
+    async def async_complete(
+        self,
+        messages: list[dict],
+        purpose: Purpose = Purpose.EXECUTION,
+        tools: Optional[list[Tool]] = None,
+        max_tokens: int = 4096,
+        temperature: float = 0.0,
+        system: Optional[str] = None,
+    ) -> LLMResponse:
+        """True async completion via AsyncAnthropic.
+
+        Raises LLMAuthError / LLMRateLimitError / LLMConnectionError on failure.
+        """
+        from anthropic import AsyncAnthropic
+        from anthropic import (
+            AuthenticationError,
+            RateLimitError,
+            APIConnectionError,
+        )
+        from codeframe.adapters.llm.base import (
+            LLMAuthError,
+            LLMRateLimitError,
+            LLMConnectionError,
+        )
+
+        if self._async_client is None:
+            self._async_client = AsyncAnthropic(api_key=self.api_key)
+
+        model = self.get_model(purpose)
+        kwargs: dict = {
+            "model": model,
+            "max_tokens": max_tokens,
+            "messages": self._convert_messages(messages),
+        }
+        if temperature > 0:
+            kwargs["temperature"] = temperature
+        if system:
+            kwargs["system"] = system
+        if tools:
+            kwargs["tools"] = self._convert_tools(tools)
+
+        try:
+            response = await self._async_client.messages.create(**kwargs)
+            return self._parse_response(response)
+        except AuthenticationError as exc:
+            raise LLMAuthError(str(exc)) from exc
+        except RateLimitError as exc:
+            raise LLMRateLimitError(str(exc)) from exc
+        except APIConnectionError as exc:
+            raise LLMConnectionError(str(exc)) from exc
+
     def stream(
         self,
         messages: list[dict],

diff --git a/codeframe/adapters/llm/base.py b/codeframe/adapters/llm/base.py
@@ -4,13 +4,35 @@
 along with shared data structures for requests and responses.
 """
 
+import asyncio
 import os
 from abc import ABC, abstractmethod
 from dataclasses import dataclass, field
 from enum import Enum
 from typing import Iterator, Optional
 
 
+# ---------------------------------------------------------------------------
+# Common exception hierarchy
+# ---------------------------------------------------------------------------
+
+
+class LLMError(Exception):
+    """Base exception for LLM provider errors."""
+
+
+class LLMAuthError(LLMError):
+    """Authentication failure (bad key, expired token, etc.)."""
+
+
+class LLMRateLimitError(LLMError):
+    """Rate limit exceeded — caller may retry after a backoff."""
+
+
+class LLMConnectionError(LLMError):
+    """Network or connection error."""
+
+
 class Purpose(str, Enum):
     """Purpose of an LLM call, used for model selection."""
 
@@ -277,6 +299,39 @@ def stream(
         )
         yield response.content
 
+    async def async_complete(
+        self,
+        messages: list[dict],
+        purpose: Purpose = Purpose.EXECUTION,
+        tools: Optional[list["Tool"]] = None,
+        max_tokens: int = 4096,
+        temperature: float = 0.0,
+        system: Optional[str] = None,
+    ) -> "LLMResponse":
+        """Async completion.
+
+        Default implementation wraps the synchronous :meth:`complete` in a
+        thread-pool executor so it never blocks the event loop.  Subclasses
+        should override this with a truly async implementation when the
+        underlying SDK supports it.
+
+        Args:
+            messages: Conversation messages
+            purpose: Purpose of call (for model selection)
+            tools: Available tools for the model to use
+            max_tokens: Maximum tokens to generate
+            temperature: Sampling temperature
+            system: System prompt
+
+        Returns:
+            LLMResponse with content and/or tool calls
+        """
+        loop = asyncio.get_running_loop()
+        return await loop.run_in_executor(
+            None,
+            lambda: self.complete(messages, purpose, tools, max_tokens, temperature, system),
+        )
+
     def get_model(self, purpose: Purpose) -> str:
         """Get the model for a given purpose.
 

diff --git a/codeframe/adapters/llm/openai.py b/codeframe/adapters/llm/openai.py
@@ -78,6 +78,7 @@ def __init__(
             )
 
         self._client = None
+        self._async_client = None
 
     def get_model(self, purpose: Purpose) -> str:
         """Return the model for a given purpose.
@@ -146,6 +147,55 @@ def complete(
 
         return self._parse_response(response)
 
+    async def async_complete(
+        self,
+        messages: list[dict],
+        purpose: Purpose = Purpose.EXECUTION,
+        tools: Optional[list[Tool]] = None,
+        max_tokens: int = 4096,
+        temperature: float = 0.0,
+        system: Optional[str] = None,
+    ) -> LLMResponse:
+        """True async completion via openai.AsyncOpenAI.
+
+        Raises LLMAuthError / LLMRateLimitError / LLMConnectionError on failure.
+        """
+        import openai as _openai
+        from codeframe.adapters.llm.base import (
+            LLMAuthError,
+            LLMRateLimitError,
+            LLMConnectionError,
+        )
+
+        if self._async_client is None:
+            self._async_client = _openai.AsyncOpenAI(
+                api_key=self.api_key, base_url=self.base_url
+            )
+
+        converted = self._convert_messages(messages)
+        if system:
+            converted = [{"role": "system", "content": system}] + converted
+
+        kwargs: dict = {
+            "model": self.get_model(purpose),
+            "max_tokens": max_tokens,
+            "messages": converted,
+            "temperature": temperature,
+        }
+        if tools:
+            kwargs["tools"] = self._convert_tools(tools)
+            kwargs["tool_choice"] = "auto"
+
+        try:
+            response = await self._async_client.chat.completions.create(**kwargs)
+            return self._parse_response(response)
+        except _openai.AuthenticationError as exc:
+            raise LLMAuthError(str(exc)) from exc
+        except _openai.RateLimitError as exc:
+            raise LLMRateLimitError(str(exc)) from exc
+        except _openai.APIConnectionError as exc:
+            raise LLMConnectionError(str(exc)) from exc
+
     def stream(
         self,
         messages: list[dict],

diff --git a/codeframe/agents/frontend_worker_agent.py b/codeframe/agents/frontend_worker_agent.py
@@ -5,14 +5,13 @@
 following project conventions (Tailwind CSS, functional components).
 """
 
-import os
 import json
 import logging
 import asyncio
 from pathlib import Path
 from typing import Dict, Any, Optional
-from anthropic import AsyncAnthropic
 
+from codeframe.adapters.llm.base import Purpose
 from codeframe.core.models import Task, AgentMaturity
 from codeframe.agents.worker_agent import WorkerAgent
 
@@ -59,8 +58,8 @@ def __init__(
             system_prompt=self._build_system_prompt(),
             db=db,
         )
-        self.api_key = api_key or os.getenv("ANTHROPIC_API_KEY")
-        self.client = AsyncAnthropic(api_key=self.api_key) if self.api_key else None
+        # api_key kept for backwards compatibility; LLM calls use self.llm_provider
+        self.api_key = api_key
         self.websocket_manager = websocket_manager
         self.project_root = Path(__file__).parent.parent.parent  # codeframe/
         self.web_ui_root = self.project_root / "web-ui"
@@ -293,10 +292,6 @@ async def _generate_react_component(self, spec: Dict[str, Any]) -> str:
         Returns:
             Component code as string
         """
-        if not self.client:
-            # Fallback: generate basic component template
-            return self._generate_basic_component_template(spec)
-
         prompt = f"""Generate a React functional component with the following specification:
 
 Component Name: {spec['name']}
@@ -312,14 +307,14 @@ async def _generate_react_component(self, spec: Dict[str, Any]) -> str:
 Provide ONLY the component code, no explanations."""
 
         try:
-            response = await self.client.messages.create(
-                model="claude-3-5-sonnet-20241022",
-                max_tokens=2000,
+            response = await self.llm_provider.async_complete(
                 messages=[{"role": "user", "content": prompt}],
+                purpose=Purpose.GENERATION,
+                max_tokens=2000,
             )
 
             # Extract code from response
-            code = response.content[0].text
+            code = response.content
 
             # Remove markdown code blocks if present
             if "```" in code:

diff --git a/codeframe/agents/test_worker_agent.py b/codeframe/agents/test_worker_agent.py
@@ -5,7 +5,6 @@
 analyzing code for test requirements, and self-correcting failing tests.
 """
 
-import os
 import sys
 import json
 import logging
@@ -14,8 +13,8 @@
 import re
 from pathlib import Path
 from typing import Dict, Any, Optional, Tuple
-from anthropic import AsyncAnthropic
 
+from codeframe.adapters.llm.base import Purpose
 from codeframe.core.models import Task, AgentMaturity
 from codeframe.agents.worker_agent import WorkerAgent
 
@@ -67,8 +66,8 @@ def __init__(
             system_prompt=self._build_system_prompt(),
             db=db,
         )
-        self.api_key = api_key or os.getenv("ANTHROPIC_API_KEY")
-        self.client = AsyncAnthropic(api_key=self.api_key) if self.api_key else None
+        # api_key kept for backwards compatibility; LLM calls use self.llm_provider
+        self.api_key = api_key
         self.websocket_manager = websocket_manager
         self.max_correction_attempts = max_correction_attempts
         self.project_root = Path(__file__).parent.parent.parent
@@ -321,9 +320,6 @@ async def _generate_pytest_tests(
         Returns:
             Generated test code
         """
-        if not self.client:
-            return self._generate_basic_test_template(spec, code_analysis)
-
         # Build context from code analysis
         context = ""
         if code_analysis.get("functions"):
@@ -351,13 +347,13 @@ async def _generate_pytest_tests(
 Provide ONLY the test code, no explanations."""
 
         try:
-            response = await self.client.messages.create(
-                model="claude-3-5-sonnet-20241022",
-                max_tokens=3000,
+            response = await self.llm_provider.async_complete(
                 messages=[{"role": "user", "content": prompt}],
+                purpose=Purpose.GENERATION,
+                max_tokens=3000,
             )
 
-            code = response.content[0].text
+            code = response.content
 
             # Remove markdown code blocks
             if "```" in code:
@@ -671,9 +667,6 @@ async def _correct_failing_tests(
         Returns:
             Corrected test code or None
         """
-        if not self.client:
-            return None
-
         prompt = f"""Fix the following failing pytest tests:
 
 Original Test Code:
@@ -696,13 +689,13 @@ async def _correct_failing_tests(
 Provide ONLY the corrected test code, no explanations."""
 
         try:
-            response = await self.client.messages.create(
-                model="claude-3-5-sonnet-20241022",
-                max_tokens=3000,
+            response = await self.llm_provider.async_complete(
                 messages=[{"role": "user", "content": prompt}],
+                purpose=Purpose.CORRECTION,
+                max_tokens=3000,
             )
 
-            code = response.content[0].text
+            code = response.content
 
             # Remove markdown code blocks
             if "```" in code: