diff --git a/.env.example b/.env.example
index 31e2bea5..52b891df 100644
--- a/.env.example
+++ b/.env.example
@@ -1,8 +1,17 @@
 # Murphy Configuration
 # Copy this file to .env and fill in your values
 
-# === Required ===
+# === LLM Provider (at least one required) ===
 OPENAI_API_KEY=your_openai_api_key_here
+# GOOGLE_API_KEY=your_google_api_key_here
+# ANTHROPIC_API_KEY=your_anthropic_api_key_here
+# AZURE_OPENAI_KEY=your_azure_openai_key_here
+# AZURE_OPENAI_ENDPOINT=https://your-resource.openai.azure.com/
+# MISTRAL_API_KEY=your_mistral_api_key_here
+# GROQ_API_KEY=your_groq_api_key_here
+# CEREBRAS_API_KEY=your_cerebras_api_key_here
+# OPENROUTER_API_KEY=your_openrouter_api_key_here
+# BROWSER_USE_API_KEY=your_browser_use_api_key_here
 
 # === Murphy REST API (optional, for murphy-api server) ===
 # MURPHY_API_KEY=your_murphy_api_key_here
diff --git a/CHANGELOG.md b/CHANGELOG.md
index d44a83ea..785c3241 100644
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@@ -4,12 +4,27 @@ All notable changes to Murphy will be documented in this file.
 
 The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.1.0/).
 
-## [Unreleased]
+## [1.1.0] - 2026-04-07
 
 ### Added
+- Multi-provider LLM support: `--provider` and `--model` flags for OpenAI, Google Gemini, Anthropic Claude, Azure OpenAI, Mistral, Groq, DeepSeek, Cerebras, Ollama, OpenRouter, and Browser Use
+- Separate `--judge-provider` and `--judge-model` flags for using a different model for verdicts
+- `provider` field in REST API request models (`/analyze`, `/generate-plan`, `/execute`, `/evaluate`)
 - `--open` flag to re-open the interactive web UI for a previously completed run without re-running any tests (no browser or LLM required)
-- Step-by-step execution trace view in the UI (`View trace →`) showing each agent step with goal, evaluation, actions, screenshots, memory, and reasoning
-- Interactive agent path graph in the UI (`View graph →`) visualising the full decision tree with colour-coded nodes (success/failure evaluations) and labelled action edges; click any node to inspect step details
+- Step-by-step execution trace view in the UI (`View trace ->`) showing each agent step with goal, evaluation, actions, screenshots, memory, and reasoning
+- Interactive agent path graph in the UI (`View graph ->`) visualising the full decision tree with colour-coded nodes (success/failure evaluations) and labelled action edges; click any node to inspect step details
+- Missing signals reporting in judge verdicts (UX observations that don't affect the verdict)
+- Agent history saved as JSON per test in `output/agent_history/`
+
+### Fixed
+- UTF-8 surrogate encoding error (`ModelProviderError: 'utf-8' codec can't encode character`)
+- Occasional infinite verify loop during test execution
+- Pages URL incorrect in reporting and trace visualization
+- Angry user persona double-click test failing due to unidentified element ID
+- Agent not reporting missing validation indicators
+
+### Changed
+- Removed actions column from results main page in the UI
 
 ## [1.0.0] - 2026-03-05
 
diff --git a/README.md b/README.md
index 2a7bf5b3..02d83a6a 100644
--- a/README.md
+++ b/README.md
@@ -7,7 +7,7 @@ Built on top of [browser-use](https://github.com/browser-use/browser-use) (AI br
 ## Prerequisites
 
 - Python >= 3.11
-- An OpenAI API key (`OPENAI_API_KEY`) — default model is `gpt-5-mini`
+- An LLM API key — default model is `gpt-5-mini` (OpenAI), but Murphy supports multiple providers (see [Model Providers](#model-providers))
 
 ## Which setup should I use?
 
@@ -43,10 +43,11 @@ uv run playwright install chromium
 ```bash
 cp .env.example .env
 ```
-Then set your key:
+Then set your key (at minimum one provider):
 ```
 OPENAI_API_KEY=sk-...
 ```
+See [Model Providers](#model-providers) for other providers.
 
 ## Setup (Docker)
 
@@ -179,8 +180,10 @@ The full JSON report (`evaluation_report.json`) contains structured results, act
 | `--features` | | Path to existing features markdown (skips feature discovery) |
 | `--plan` | | Path to existing YAML test plan (skips planning, goes straight to execution) |
 | `--max-tests` | `8` | Maximum number of test scenarios to generate |
-| `--model` | `gpt-5-mini` | LLM model for agent tasks |
-| `--judge-model` | `gpt-5-mini` | LLM model for judging verdicts |
+| `--provider` | `openai` | LLM provider (see [Model Providers](#model-providers)) |
+| `--model` | `gpt-5-mini` | LLM model name as it appears in the provider's docs |
+| `--judge-provider` | *(same as `--provider`)* | LLM provider for judging verdicts |
+| `--judge-model` | *(same as `--model`)* | LLM model for judging verdicts |
 | `--output-dir` | `./murphy/output` | Output directory for all generated files |
 | `--category` | | Site category hint (`ecommerce`, `saas`, `content`, `social`) |
 | `--open` | `false` | Open the interactive UI for a previously completed run (no browser or LLM required); `--url` is not needed |
@@ -220,15 +223,69 @@ See [docs/ARCHITECTURE.md](docs/ARCHITECTURE.md#rest-api-murphy-api) for full en
 
 ---
 
+## Model Providers
+
+Murphy supports multiple LLM providers via [browser-use](https://github.com/browser-use/browser-use). Use `--provider` and `--model` to select any supported provider. Model names are passed exactly as they appear in the provider's documentation — no renaming needed.
+
+```bash
+# Default: OpenAI
+uv run murphy --url https://example.com --model gpt-5-mini
+
+# Google Gemini
+uv run murphy --url https://example.com --provider google --model gemini-2.5-pro
+
+# Anthropic Claude
+uv run murphy --url https://example.com --provider anthropic --model claude-sonnet-4-20250514
+
+# Azure OpenAI
+uv run murphy --url https://example.com --provider azure --model gpt-4o
+
+# Mistral
+uv run murphy --url https://example.com --provider mistral --model mistral-large-latest
+
+# Mix providers: cheap model for agent tasks, stronger model for judging
+uv run murphy --url https://example.com \
+  --provider google --model gemini-2.5-flash \
+  --judge-provider openai --judge-model gpt-5-mini
+```
+
+Set the corresponding API key as an environment variable (see [Environment Variables](#environment-variables)).
+
+| Provider | `--provider` value | Example `--model` |
+|----------|-------------------|-------------------|
+| OpenAI | `openai` (default) | `gpt-5-mini`, `gpt-4o`, `o3` |
+| Google Gemini | `google` | `gemini-2.5-pro`, `gemini-2.5-flash` |
+| Anthropic | `anthropic` | `claude-sonnet-4-20250514`, `claude-haiku-4-5-20251001` |
+| Azure OpenAI | `azure` | `gpt-4o`, `gpt-4o-mini` |
+| Mistral | `mistral` | `mistral-large-latest`, `mistral-small-latest` |
+| Groq | `groq` | `llama3-70b-8192` |
+| DeepSeek | `deepseek` | `deepseek-chat` |
+| Cerebras | `cerebras` | `llama-3.3-70b` |
+| Ollama | `ollama` | `llama3`, `mistral` |
+| OpenRouter | `openrouter` | `meta-llama/llama-3-70b` |
+| Browser Use | `bu` | `bu-latest` |
+
+---
+
 ## Environment Variables
 
 All variables are optional unless noted. See `.env.example` for a template.
 
-### LLM Provider
-
-| Variable | Description |
-|----------|-------------|
-| `OPENAI_API_KEY` | OpenAI API key (required) |
+### LLM Providers
+
+Set the API key for whichever provider you use (at least one is required):
+
+| Variable | Provider |
+|----------|----------|
+| `OPENAI_API_KEY` | OpenAI (default) |
+| `GOOGLE_API_KEY` | Google Gemini |
+| `ANTHROPIC_API_KEY` | Anthropic Claude |
+| `AZURE_OPENAI_KEY` | Azure OpenAI (also needs `AZURE_OPENAI_ENDPOINT`) |
+| `MISTRAL_API_KEY` | Mistral |
+| `GROQ_API_KEY` | Groq |
+| `CEREBRAS_API_KEY` | Cerebras |
+| `OPENROUTER_API_KEY` | OpenRouter |
+| `BROWSER_USE_API_KEY` | Browser Use |
 
 ### REST API
 
diff --git a/docs/ARCHITECTURE.md b/docs/ARCHITECTURE.md
index 4259bb78..42d9758b 100644
--- a/docs/ARCHITECTURE.md
+++ b/docs/ARCHITECTURE.md
@@ -7,6 +7,7 @@ murphy/
 ├── __init__.py              # Package exports and version
 ├── __main__.py              # python -m murphy entry point
 ├── config.py                # Shared configuration constants
+├── llm.py                   # Multi-provider LLM factory (OpenAI, Google, Anthropic, etc.)
 ├── models.py                # Pydantic models (TestPlan, TestResult, JudgeVerdict, etc.)
 ├── prompts.py               # All LLM prompt text
 ├── evaluate.py              # Backward-compatible re-exports
diff --git a/murphy/__init__.py b/murphy/__init__.py
index 3b80f9ab..f8841457 100644
--- a/murphy/__init__.py
+++ b/murphy/__init__.py
@@ -1,6 +1,6 @@
 """Murphy — AI-driven website evaluation powered by browser-use."""
 
-__version__ = '1.0.0'
+__version__ = '1.1.0'
 
 from murphy.core.analysis import analyze_website as analyze_website
 from murphy.core.execution import execute_tests as execute_tests
diff --git a/murphy/api/auth.py b/murphy/api/auth.py
index 59ae6756..e8cea687 100644
--- a/murphy/api/auth.py
+++ b/murphy/api/auth.py
@@ -8,12 +8,12 @@
 
 if TYPE_CHECKING:
 	from browser_use.browser.session import BrowserSession
-	from browser_use.llm import ChatOpenAI
+	from browser_use.llm import BaseChatModel
 
 logger = logging.getLogger(__name__)
 
 
-async def detect_auth_required(browser_session: BrowserSession, llm: ChatOpenAI, url: str) -> bool:
+async def detect_auth_required(browser_session: BrowserSession, llm: BaseChatModel, url: str) -> bool:
 	"""Navigate to URL and use a passive LLM call to detect if login is required."""
 	logger.info('\n%s', '=' * 60)
 	logger.info('Checking if %s requires login...', url)
@@ -59,7 +59,7 @@ async def _get_page_text(browser_session: BrowserSession) -> tuple[str, str, str
 	return current_url, title, body
 
 
-async def _llm_classify_page(llm: ChatOpenAI, url: str, title: str, body: str, *, mode: str = 'auth_detect') -> bool:
+async def _llm_classify_page(llm: BaseChatModel, url: str, title: str, body: str, *, mode: str = 'auth_detect') -> bool:
 	"""Use a single LLM call (no agent) to classify page content.
 
 	Returns True if the page looks like authenticated/usable content.
@@ -97,7 +97,7 @@ async def _llm_classify_page(llm: ChatOpenAI, url: str, title: str, body: str, *
 
 async def wait_for_manual_login(
 	browser_session: BrowserSession,
-	llm: ChatOpenAI,
+	llm: BaseChatModel,
 	url: str,
 	*,
 	already_navigated: bool = False,
diff --git a/murphy/api/cli.py b/murphy/api/cli.py
index dfc4d786..f61df849 100644
--- a/murphy/api/cli.py
+++ b/murphy/api/cli.py
@@ -51,8 +51,14 @@ def main() -> int:
 	parser.add_argument('--features', help='Path to existing features markdown (skips analysis, goes to test generation)')
 	parser.add_argument('--plan', help='Path to existing YAML test plan (skips analysis + test generation)')
 	parser.add_argument('--max-tests', type=int, default=8, help='Max test scenarios (default: 8)')
-	parser.add_argument('--model', default='gpt-5-mini', help='OpenAI model for agent tasks (default: gpt-5-mini)')
-	parser.add_argument('--judge-model', default='gpt-5-mini', help='OpenAI model for judging verdicts (default: gpt-5-mini)')
+	parser.add_argument(
+		'--provider', default='openai', help='LLM provider (default: openai). e.g. google, anthropic, azure, mistral'
+	)
+	parser.add_argument(
+		'--model', default='gpt-5-mini', help='LLM model name as it appears in the provider docs (default: gpt-5-mini)'
+	)
+	parser.add_argument('--judge-provider', default=None, help='LLM provider for judging (defaults to --provider)')
+	parser.add_argument('--judge-model', default=None, help='LLM model for judging (defaults to --model)')
 	parser.add_argument('--output-dir', default='./murphy/output', help='Output directory for reports')
 	parser.add_argument(
 		'--open',
@@ -94,7 +100,6 @@ async def _async_main(args: argparse.Namespace) -> None:
 
 	from browser_use.browser.profile import BrowserProfile
 	from browser_use.browser.session import BrowserSession
-	from browser_use.llm import ChatOpenAI
 	from murphy.api.auth import detect_auth_required, wait_for_manual_login
 	from murphy.browser.patches import apply as apply_patches
 	from murphy.core.analysis import analyze_website
@@ -104,6 +109,7 @@ async def _async_main(args: argparse.Namespace) -> None:
 	from murphy.io.features_io import read_features_markdown, write_features_markdown
 	from murphy.io.fixtures import ensure_dummy_fixture_files
 	from murphy.io.test_plan_io import load_test_plan, save_test_plan
+	from murphy.llm import create_llm
 	from murphy.models import WebsiteAnalysis
 
 	# Apply patches early (idempotent)
@@ -112,8 +118,14 @@ async def _async_main(args: argparse.Namespace) -> None:
 	# Ensure dummy fixture files exist for upload testing
 	fixture_paths = ensure_dummy_fixture_files()
 
-	llm = ChatOpenAI(model=args.model)
-	judge_llm = ChatOpenAI(model=args.judge_model) if args.judge_model != args.model else None
+	llm = create_llm(args.model, provider=args.provider)
+	judge_provider = args.judge_provider or args.provider
+	judge_model = args.judge_model or args.model
+	judge_llm = (
+		create_llm(judge_model, provider=judge_provider)
+		if (judge_model != args.model or judge_provider != args.provider)
+		else None
+	)
 	output_dir = Path(args.output_dir)
 	output_dir.mkdir(parents=True, exist_ok=True)
 
diff --git a/murphy/api/request_models.py b/murphy/api/request_models.py
index 62590d1b..497511a9 100644
--- a/murphy/api/request_models.py
+++ b/murphy/api/request_models.py
@@ -23,6 +23,7 @@ class AnalyzeRequest(BaseModel):
 	url: str
 	category: str | None = None
 	goal: str | None = None
+	provider: str = 'openai'
 	model: str = 'gpt-5-mini'
 	webhook_url: str | None = None
 	async_mode: bool = Field(False, alias='async')
@@ -35,6 +36,7 @@ class GeneratePlanRequest(BaseModel):
 	analysis: Annotated[WebsiteAnalysis, BeforeValidator(_parse_json_string)]
 	max_tests: int = 8
 	goal: str | None = None
+	provider: str = 'openai'
 	model: str = 'gpt-5-mini'
 	webhook_url: str | None = None
 	async_mode: bool = Field(False, alias='async')
@@ -47,8 +49,10 @@ class ExecuteRequest(BaseModel):
 	test_plan: Annotated[TestPlan, BeforeValidator(_parse_json_string)] | None = None
 	evaluate_job_id: str | None = None
 	goal: str | None = None
+	provider: str = 'openai'
 	model: str = 'gpt-5-mini'
-	judge_model: str = 'gpt-5-mini'
+	judge_provider: str | None = None
+	judge_model: str | None = None
 	max_steps: int = 15
 	max_concurrent: int = 3
 	webhook_url: str | None = None
@@ -61,8 +65,10 @@ class EvaluateRequest(BaseModel):
 	url: str
 	goal: str | None = None
 	max_tests: int = 8
+	provider: str = 'openai'
 	model: str = 'gpt-5-mini'
-	judge_model: str = 'gpt-5-mini'
+	judge_provider: str | None = None
+	judge_model: str | None = None
 	async_mode: bool = Field(False, alias='async')
 	webhook_url: str | None = None
 
diff --git a/murphy/api/rest.py b/murphy/api/rest.py
index 775163d8..3d2c6d8a 100644
--- a/murphy/api/rest.py
+++ b/murphy/api/rest.py
@@ -61,7 +61,7 @@ async def _core_analyze(req: AnalyzeRequest) -> dict[str, Any]:
 	"""Run website analysis. Returns serialized WebsiteAnalysis dict."""
 	from murphy.core.pipeline import run_analyze
 
-	analysis = await run_analyze(req.url, req.model, goal=req.goal)
+	analysis = await run_analyze(req.url, req.model, provider=req.provider, goal=req.goal)
 	return analysis.model_dump()
 
 
@@ -69,7 +69,9 @@ async def _core_generate_plan(req: GeneratePlanRequest) -> dict[str, Any]:
 	"""Generate test plan from analysis. Returns serialized TestPlan dict."""
 	from murphy.core.pipeline import run_generate_plan
 
-	test_plan = await run_generate_plan(req.url, req.analysis, req.model, req.max_tests, goal=req.goal)
+	test_plan = await run_generate_plan(
+		req.url, req.analysis, req.model, provider=req.provider, max_tests=req.max_tests, goal=req.goal
+	)
 	return test_plan.model_dump()
 
 
@@ -93,7 +95,9 @@ async def _core_execute(req: ExecuteRequest) -> dict[str, Any]:
 		req.url,
 		test_plan,
 		req.model,
+		provider=req.provider,
 		judge_model=req.judge_model,
+		judge_provider=req.judge_provider,
 		goal=req.goal,
 		max_steps=req.max_steps,
 		max_concurrent=req.max_concurrent,
@@ -105,7 +109,7 @@ async def _core_evaluate(req: EvaluateRequest) -> dict[str, Any]:
 	"""Run exploration-first evaluation: explore site → generate test plan."""
 	from murphy.core.pipeline import run_evaluate
 
-	test_plan = await run_evaluate(req.url, req.model, req.max_tests, goal=req.goal)
+	test_plan = await run_evaluate(req.url, req.model, provider=req.provider, max_tests=req.max_tests, goal=req.goal)
 	return test_plan.model_dump()
 
 
diff --git a/murphy/core/analysis.py b/murphy/core/analysis.py
index c7da2c43..ce2f0907 100644
--- a/murphy/core/analysis.py
+++ b/murphy/core/analysis.py
@@ -4,7 +4,7 @@
 
 from browser_use import Agent
 from browser_use.browser.session import BrowserSession
-from browser_use.llm import ChatOpenAI
+from browser_use.llm import BaseChatModel
 from murphy.models import WebsiteAnalysis
 from murphy.prompts import build_analysis_prompt
 
@@ -13,7 +13,7 @@
 
 async def analyze_website(
 	url: str,
-	llm: ChatOpenAI,
+	llm: BaseChatModel,
 	category: str | None = None,
 	goal: str | None = None,
 	browser_session: BrowserSession | None = None,
diff --git a/murphy/core/execution.py b/murphy/core/execution.py
index 15c0d2b4..40a5b588 100644
--- a/murphy/core/execution.py
+++ b/murphy/core/execution.py
@@ -12,7 +12,7 @@
 from browser_use import Agent
 from browser_use.agent.views import AgentHistoryList
 from browser_use.browser.session import BrowserSession
-from browser_use.llm import ChatOpenAI
+from browser_use.llm import BaseChatModel
 from murphy.core.judge import murphy_judge
 from murphy.core.summary import classify_failure
 from murphy.io.report_helpers import _slugify
@@ -108,14 +108,14 @@ async def _collect_session_urls(browser_session: BrowserSession) -> list[str]:
 async def _execute_single_test(
 	url: str,
 	scenario: TestScenario,
-	llm: ChatOpenAI,
+	llm: BaseChatModel,
 	browser_session: BrowserSession,
 	goal: str | None,
 	fixture_paths: list[Path] | None,
 	max_steps: int,
 	index: int,
 	total: int,
-	judge_llm: ChatOpenAI | None = None,
+	judge_llm: BaseChatModel | None = None,
 	output_dir: Path | None = None,
 ) -> TestResult:
 	"""Execute one test scenario and return its TestResult.
@@ -362,10 +362,10 @@ async def _cleanup_session_pool(sessions: list[BrowserSession], original_session
 async def execute_tests(
 	url: str,
 	test_plan: TestPlan,
-	llm: ChatOpenAI,
+	llm: BaseChatModel,
 	progress_state: Any = None,
 	save_callback: Callable[[list[TestResult]], None] | None = None,
-	judge_llm: ChatOpenAI | None = None,
+	judge_llm: BaseChatModel | None = None,
 	output_dir: Path | None = None,
 ) -> list[TestResult]:
 	"""Execute tests without a pre-existing session (creates its own)."""
@@ -393,7 +393,7 @@ async def execute_tests(
 async def execute_tests_with_session(
 	url: str,
 	test_plan: TestPlan,
-	llm: ChatOpenAI,
+	llm: BaseChatModel,
 	browser_session: BrowserSession,
 	progress_state: Any = None,
 	goal: str | None = None,
@@ -401,7 +401,7 @@ async def execute_tests_with_session(
 	max_steps: int = 15,
 	save_callback: Callable[[list[TestResult]], None] | None = None,
 	max_concurrent: int = 3,
-	judge_llm: ChatOpenAI | None = None,
+	judge_llm: BaseChatModel | None = None,
 	output_dir: Path | None = None,
 ) -> list[TestResult]:
 	"""Phase 3 execution reusing an existing browser session.
diff --git a/murphy/core/generation.py b/murphy/core/generation.py
index f665ddb8..c94ce1c7 100644
--- a/murphy/core/generation.py
+++ b/murphy/core/generation.py
@@ -5,7 +5,7 @@
 
 from browser_use import Agent
 from browser_use.browser.session import BrowserSession
-from browser_use.llm import ChatOpenAI, SystemMessage, UserMessage
+from browser_use.llm import BaseChatModel, SystemMessage, UserMessage
 from murphy.config import EXPLORE_MAX_STEPS, QUALITY_MAX_RETRIES
 from murphy.core.quality import plan_quality_issues
 from murphy.models import TestPlan
@@ -17,7 +17,7 @@
 async def generate_tests(
 	url: str,
 	analysis: 'Any',
-	llm: ChatOpenAI,
+	llm: BaseChatModel,
 	max_tests: int,
 	goal: str | None = None,
 ) -> TestPlan:
@@ -78,7 +78,7 @@ async def generate_tests(
 async def explore_and_generate_plan(
 	task: str,
 	url: str,
-	llm: ChatOpenAI,
+	llm: BaseChatModel,
 	session: BrowserSession,
 	max_scenarios: int = 8,
 	max_steps: int = 30,
diff --git a/murphy/core/judge.py b/murphy/core/judge.py
index b1ab0f01..99914ee7 100644
--- a/murphy/core/judge.py
+++ b/murphy/core/judge.py
@@ -10,7 +10,7 @@
 """
 
 from browser_use.agent.views import AgentHistoryList
-from browser_use.llm import ChatOpenAI, SystemMessage, UserMessage
+from browser_use.llm import BaseChatModel, SystemMessage, UserMessage
 from browser_use.llm.messages import ContentPartImageParam, ContentPartTextParam, ImageURL
 from browser_use.utils import sanitize_surrogates
 from murphy.models import (
@@ -151,6 +151,7 @@ def build_judge_trait_context(persona: str, traits: TraitVector, test_type: Test
 
 
 
+
 ## Failure classification
 
 If verdict is FALSE, you MUST also classify the failure:
@@ -326,10 +327,10 @@ def _format_pages_reached(history: AgentHistoryList) -> str:
 async def murphy_judge(
 	history: AgentHistoryList,
 	scenario: TestScenario,
-	llm: ChatOpenAI,
+	llm: BaseChatModel,
 	start_url: str = '',
 	*,
-	judge_llm: ChatOpenAI | None = None,
+	judge_llm: BaseChatModel | None = None,
 ) -> JudgeVerdict:
 	"""Evaluate agent success based on action trace, not self-report.
 
diff --git a/murphy/core/pipeline.py b/murphy/core/pipeline.py
index c4472c83..111cadaa 100644
--- a/murphy/core/pipeline.py
+++ b/murphy/core/pipeline.py
@@ -14,7 +14,6 @@
 
 from browser_use.browser.profile import BrowserProfile
 from browser_use.browser.session import BrowserSession
-from browser_use.llm import ChatOpenAI
 from murphy.browser.patches import apply as apply_patches
 from murphy.evaluate import (
 	analyze_website,
@@ -24,18 +23,20 @@
 	generate_tests,
 )
 from murphy.io.fixtures import ensure_dummy_fixture_files
+from murphy.llm import create_llm
 from murphy.models import ReportSummary, TestPlan, TestResult, WebsiteAnalysis
 
 
 async def run_analyze(
 	url: str,
 	model: str,
+	provider: str = 'openai',
 	goal: str | None = None,
 	browser_session: BrowserSession | None = None,
 ) -> WebsiteAnalysis:
 	"""Run website analysis (feature discovery)."""
 	apply_patches()
-	llm = ChatOpenAI(model=model)
+	llm = create_llm(model, provider=provider)
 	own_session = browser_session is None
 	if own_session:
 		browser_session = BrowserSession(browser_profile=BrowserProfile(headless=True, keep_alive=False))
@@ -51,12 +52,13 @@ async def run_generate_plan(
 	url: str,
 	analysis: WebsiteAnalysis,
 	model: str,
+	provider: str = 'openai',
 	max_tests: int = 8,
 	goal: str | None = None,
 ) -> TestPlan:
 	"""Generate test plan from analysis."""
 	apply_patches()
-	llm = ChatOpenAI(model=model)
+	llm = create_llm(model, provider=provider)
 	return await generate_tests(url, analysis, llm, max_tests, goal=goal)
 
 
@@ -64,7 +66,9 @@ async def run_execute(
 	url: str,
 	test_plan: TestPlan,
 	model: str,
+	provider: str = 'openai',
 	judge_model: str | None = None,
+	judge_provider: str | None = None,
 	goal: str | None = None,
 	max_steps: int = 15,
 	max_concurrent: int = 3,
@@ -78,8 +82,10 @@ async def run_execute(
 	apply_patches()
 	if fixture_paths is None:
 		fixture_paths = ensure_dummy_fixture_files()
-	llm = ChatOpenAI(model=model)
-	judge_llm = ChatOpenAI(model=judge_model) if judge_model and judge_model != model else None
+	llm = create_llm(model, provider=provider)
+	jp = judge_provider or provider
+	jm = judge_model or model
+	judge_llm = create_llm(jm, provider=jp) if (jm != model or jp != provider) else None
 	own_session = browser_session is None
 	if own_session:
 		browser_session = BrowserSession(browser_profile=BrowserProfile(headless=True, keep_alive=False))
@@ -107,6 +113,7 @@ async def run_execute(
 async def run_evaluate(
 	url: str,
 	model: str,
+	provider: str = 'openai',
 	max_tests: int = 8,
 	goal: str | None = None,
 	browser_session: BrowserSession | None = None,
@@ -114,7 +121,7 @@ async def run_evaluate(
 	"""Exploration-first: explore site then generate test plan."""
 	apply_patches()
 	task = goal or f'Evaluate the website at {url}'
-	llm = ChatOpenAI(model=model)
+	llm = create_llm(model, provider=provider)
 	own_session = browser_session is None
 	if own_session:
 		browser_session = BrowserSession(browser_profile=BrowserProfile(headless=True, keep_alive=False))
diff --git a/murphy/core/summary.py b/murphy/core/summary.py
index 1177a2ce..5a805938 100644
--- a/murphy/core/summary.py
+++ b/murphy/core/summary.py
@@ -4,7 +4,7 @@
 from pathlib import Path
 from typing import Literal
 
-from browser_use.llm import ChatOpenAI, SystemMessage, UserMessage
+from browser_use.llm import BaseChatModel, SystemMessage, UserMessage
 from browser_use.utils import sanitize_surrogates
 from murphy.io.report import write_full_report
 from murphy.models import (
@@ -71,7 +71,7 @@ async def generate_executive_summary(
 	analysis: WebsiteAnalysis,
 	results: list[TestResult],
 	summary: ReportSummary,
-	llm: ChatOpenAI,
+	llm: BaseChatModel,
 ) -> ExecutiveSummary:
 	"""Generate an LLM-powered executive summary of the evaluation results."""
 	results_summary_parts: list[str] = []
diff --git a/murphy/llm.py b/murphy/llm.py
new file mode 100644
index 00000000..32d7697f
--- /dev/null
+++ b/murphy/llm.py
@@ -0,0 +1,51 @@
+"""Murphy — LLM factory supporting multiple providers via browser_use."""
+
+import os
+
+from browser_use.llm import BaseChatModel
+
+_PROVIDER_MAP = {
+	'openai': ('browser_use.llm.openai.chat', 'ChatOpenAI', 'OPENAI_API_KEY'),
+	'anthropic': ('browser_use.llm.anthropic.chat', 'ChatAnthropic', 'ANTHROPIC_API_KEY'),
+	'google': ('browser_use.llm.google.chat', 'ChatGoogle', 'GOOGLE_API_KEY'),
+	'azure': ('browser_use.llm.azure.chat', 'ChatAzureOpenAI', 'AZURE_OPENAI_KEY'),
+	'mistral': ('browser_use.llm.mistral.chat', 'ChatMistral', 'MISTRAL_API_KEY'),
+	'groq': ('browser_use.llm.groq.chat', 'ChatGroq', 'GROQ_API_KEY'),
+	'deepseek': ('browser_use.llm.deepseek.chat', 'ChatDeepSeek', 'DEEPSEEK_API_KEY'),
+	'cerebras': ('browser_use.llm.cerebras.chat', 'ChatCerebras', 'CEREBRAS_API_KEY'),
+	'ollama': ('browser_use.llm.ollama.chat', 'ChatOllama', None),
+	'openrouter': ('browser_use.llm.openrouter.chat', 'ChatOpenRouter', 'OPENROUTER_API_KEY'),
+	'bu': ('browser_use.llm.browser_use.chat', 'ChatBrowserUse', 'BROWSER_USE_API_KEY'),
+}
+
+SUPPORTED_PROVIDERS = sorted(_PROVIDER_MAP.keys())
+
+
+def create_llm(model: str, provider: str = 'openai') -> BaseChatModel:
+	"""Create an LLM instance from a provider name and model string.
+
+	The model name is passed directly to the provider — use exact names as
+	they appear in the provider's docs (e.g. 'gemini-2.5-pro', 'claude-sonnet-4-20250514').
+	"""
+	if provider not in _PROVIDER_MAP:
+		raise ValueError(f"Unknown provider: '{provider}'. Supported: {', '.join(SUPPORTED_PROVIDERS)}")
+
+	module_path, class_name, api_key_env = _PROVIDER_MAP[provider]
+
+	from importlib import import_module
+
+	cls = getattr(import_module(module_path), class_name)
+
+	kwargs: dict = {'model': model}
+	if api_key_env:
+		api_key = os.getenv(api_key_env)
+		if api_key:
+			kwargs['api_key'] = api_key
+
+	# Azure needs extra env vars
+	if provider == 'azure':
+		azure_endpoint = os.getenv('AZURE_OPENAI_ENDPOINT')
+		if azure_endpoint:
+			kwargs['azure_endpoint'] = azure_endpoint
+
+	return cls(**kwargs)
diff --git a/pyproject.toml b/pyproject.toml
index cdf2f558..3ee53cb2 100644
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -2,7 +2,7 @@
 name = "murphy"
 description = "AI-driven website evaluation powered by browser-use"
 authors = [{ name = "MIH AI B.V." }]
-version = "1.0.0"
+version = "1.1.0"
 readme = "README.md"
 requires-python = ">=3.11,<4.0"
 classifiers = [
diff --git a/tests/murphy/test_llm.py b/tests/murphy/test_llm.py
new file mode 100644
index 00000000..aa377476
--- /dev/null
+++ b/tests/murphy/test_llm.py
@@ -0,0 +1,192 @@
+"""Tests for murphy.llm — multi-provider LLM factory and CLI/API integration."""
+
+from unittest.mock import MagicMock, patch
+
+import pytest
+
+from murphy.llm import SUPPORTED_PROVIDERS, create_llm
+
+# ─── Provider resolution ────────────────────────────────────────────────────
+
+
+def test_unknown_provider_raises_with_suggestions():
+	with pytest.raises(ValueError, match='Unknown provider.*Supported'):
+		create_llm('some-model', provider='nonexistent')
+
+
+def test_supported_providers_list_is_complete():
+	"""SUPPORTED_PROVIDERS should include all major providers."""
+	expected = {'openai', 'google', 'anthropic', 'azure', 'mistral', 'groq', 'deepseek', 'cerebras', 'ollama', 'openrouter', 'bu'}
+	assert expected == set(SUPPORTED_PROVIDERS)
+
+
+# ─── API key from env ───────────────────────────────────────────────────────
+
+
+def test_api_key_passed_from_env():
+	"""create_llm should read the provider's env var and pass it to the constructor."""
+	mock_cls = MagicMock()
+	with patch.dict('os.environ', {'GOOGLE_API_KEY': 'test-key-123'}):
+		with patch('importlib.import_module') as mock_import:
+			mock_import.return_value = MagicMock(**{'ChatGoogle': mock_cls})
+			create_llm('gemini-2.5-pro', provider='google')
+			mock_cls.assert_called_once_with(model='gemini-2.5-pro', api_key='test-key-123')
+
+
+def test_api_key_omitted_when_not_set():
+	"""When env var is unset, api_key should not be passed (let SDK use its own default)."""
+	mock_cls = MagicMock()
+	env = {k: v for k, v in {}.items()}  # empty
+	with patch.dict('os.environ', env, clear=True):
+		with patch('importlib.import_module') as mock_import:
+			mock_import.return_value = MagicMock(**{'ChatOpenAI': mock_cls})
+			create_llm('gpt-5-mini', provider='openai')
+			mock_cls.assert_called_once_with(model='gpt-5-mini')
+
+
+def test_azure_passes_endpoint_from_env():
+	"""Azure provider should pass azure_endpoint from AZURE_OPENAI_ENDPOINT."""
+	mock_cls = MagicMock()
+	with patch.dict('os.environ', {'AZURE_OPENAI_KEY': 'az-key', 'AZURE_OPENAI_ENDPOINT': 'https://my.azure.com'}):
+		with patch('importlib.import_module') as mock_import:
+			mock_import.return_value = MagicMock(**{'ChatAzureOpenAI': mock_cls})
+			create_llm('gpt-4o', provider='azure')
+			mock_cls.assert_called_once_with(model='gpt-4o', api_key='az-key', azure_endpoint='https://my.azure.com')
+
+
+def test_ollama_no_api_key():
+	"""Ollama is local — should never pass an api_key."""
+	mock_cls = MagicMock()
+	with patch('importlib.import_module') as mock_import:
+		mock_import.return_value = MagicMock(**{'ChatOllama': mock_cls})
+		create_llm('llama3', provider='ollama')
+		mock_cls.assert_called_once_with(model='llama3')
+
+
+# ─── CLI arg parsing ────────────────────────────────────────────────────────
+
+
+def test_cli_provider_defaults():
+	"""--provider defaults to 'openai', --judge-provider defaults to None."""
+	parser = _build_parser()
+	args = parser.parse_args(['--url', 'https://example.com'])
+	assert args.provider == 'openai'
+	assert args.judge_provider is None
+	assert args.judge_model is None
+
+
+def test_cli_provider_override():
+	parser = _build_parser()
+	args = parser.parse_args(['--url', 'https://example.com', '--provider', 'google', '--model', 'gemini-2.5-pro'])
+	assert args.provider == 'google'
+	assert args.model == 'gemini-2.5-pro'
+
+
+def test_cli_judge_provider_independent():
+	parser = _build_parser()
+	args = parser.parse_args(
+		[
+			'--url',
+			'https://example.com',
+			'--provider',
+			'google',
+			'--model',
+			'gemini-2.5-flash',
+			'--judge-provider',
+			'openai',
+			'--judge-model',
+			'gpt-5-mini',
+		]
+	)
+	assert args.provider == 'google'
+	assert args.model == 'gemini-2.5-flash'
+	assert args.judge_provider == 'openai'
+	assert args.judge_model == 'gpt-5-mini'
+
+
+# ─── REST API request models ────────────────────────────────────────────────
+
+
+def test_analyze_request_provider_default():
+	from murphy.api.request_models import AnalyzeRequest
+
+	req = AnalyzeRequest.model_validate({'url': 'https://example.com'})
+	assert req.provider == 'openai'
+	assert req.model == 'gpt-5-mini'
+
+
+def test_execute_request_judge_provider_defaults_to_none():
+	from murphy.api.request_models import ExecuteRequest
+
+	req = ExecuteRequest.model_validate({'url': 'https://example.com'})
+	assert req.provider == 'openai'
+	assert req.judge_provider is None
+	assert req.judge_model is None
+
+
+def test_execute_request_custom_providers():
+	from murphy.api.request_models import ExecuteRequest
+
+	req = ExecuteRequest.model_validate(
+		{
+			'url': 'https://example.com',
+			'provider': 'google',
+			'model': 'gemini-2.5-pro',
+			'judge_provider': 'anthropic',
+			'judge_model': 'claude-sonnet-4-20250514',
+		}
+	)
+	assert req.provider == 'google'
+	assert req.model == 'gemini-2.5-pro'
+	assert req.judge_provider == 'anthropic'
+	assert req.judge_model == 'claude-sonnet-4-20250514'
+
+
+def test_evaluate_request_provider_field():
+	from murphy.api.request_models import EvaluateRequest
+
+	req = EvaluateRequest.model_validate({'url': 'https://example.com', 'provider': 'mistral', 'model': 'mistral-large-latest'})
+	assert req.provider == 'mistral'
+
+
+# ─── Judge LLM creation logic ───────────────────────────────────────────────
+
+
+def test_judge_llm_none_when_same_as_main():
+	"""When judge provider+model match main, no separate judge LLM should be created."""
+	# Replicate the CLI logic
+	provider, model = 'openai', 'gpt-5-mini'
+	judge_provider = provider  # defaults to main
+	judge_model = model  # defaults to main
+	should_create = judge_model != model or judge_provider != provider
+	assert should_create is False
+
+
+def test_judge_llm_created_when_different_provider():
+	provider, model = 'openai', 'gpt-5-mini'
+	judge_provider, judge_model = 'google', 'gemini-2.5-pro'
+	should_create = judge_model != model or judge_provider != provider
+	assert should_create is True
+
+
+def test_judge_llm_created_when_different_model_same_provider():
+	provider, model = 'openai', 'gpt-5-mini'
+	judge_provider, judge_model = 'openai', 'gpt-5'
+	should_create = judge_model != model or judge_provider != provider
+	assert should_create is True
+
+
+# ─── Helpers ─────────────────────────────────────────────────────────────────
+
+
+def _build_parser():
+	"""Build the Murphy CLI parser for testing (mirrors cli.py's arg definitions)."""
+	import argparse
+
+	parser = argparse.ArgumentParser()
+	parser.add_argument('--url')
+	parser.add_argument('--provider', default='openai')
+	parser.add_argument('--model', default='gpt-5-mini')
+	parser.add_argument('--judge-provider', default=None)
+	parser.add_argument('--judge-model', default=None)
+	return parser
diff --git a/uv.lock b/uv.lock
index 73119b79..db394a95 100644
--- a/uv.lock
+++ b/uv.lock
@@ -1906,7 +1906,7 @@ wheels = [
 
 [[package]]
 name = "murphy"
-version = "1.0.0"
+version = "1.1.0"
 source = { editable = "." }
 dependencies = [
     { name = "aiohttp" },