ggml-org
diff --git a/‎PR.md‎
Lines changed: 0 additions & 93 deletions b/‎PR.md‎
Lines changed: 0 additions & 93 deletions
diff --git a/‎tools/server/CMakeLists.txt‎
Lines changed: 0 additions & 4 deletions b/‎tools/server/CMakeLists.txt‎
Lines changed: 0 additions & 4 deletions
diff --git a/‎tools/server/README.md‎
Lines changed: 47 additions & 30 deletions b/‎tools/server/README.md‎
Lines changed: 47 additions & 30 deletions
diff --git a/‎tools/server/mcp_config.example.json‎
Lines changed: 10 additions & 33 deletions b/‎tools/server/mcp_config.example.json‎
Lines changed: 10 additions & 33 deletions
diff --git a/‎tools/server/public/index.html.gz‎
7.92 KB b/‎tools/server/public/index.html.gz‎
7.92 KB
diff --git a/‎tools/server/server-common.cpp‎
Lines changed: 0 additions & 58 deletions b/‎tools/server/server-common.cpp‎
Lines changed: 0 additions & 58 deletions
diff --git a/‎tools/server/server-common.h‎
Lines changed: 0 additions & 15 deletions b/‎tools/server/server-common.h‎
Lines changed: 0 additions & 15 deletions
@@ -46,10 +46,6 @@ set(TARGET_SRCS
     server-common.h
     server-context.cpp
     server-context.h
-    server-ws.cpp
-    server-ws.h
-    server-mcp-bridge.cpp
-    server-mcp-bridge.h
 )
 set(PUBLIC_ASSETS
     index.html.gz
 
@@ -1681,26 +1681,25 @@ Apart from error types supported by OAI, we also have custom types that are spec
 
 ### MCP (Model Context Protocol) Support
 
-The server supports [MCP](https://modelcontextprotocol.io/) for integrating external tools via WebSocket. MCP enables models to interact with external services like file systems, databases, APIs, and more.
+The server supports [MCP](https://modelcontextprotocol.io/) for integrating external tools. MCP enables models to interact with external services like file systems, databases, APIs, and more.
+
+The server acts as an HTTP proxy for remote MCP servers, handling CORS for browser-based clients.
 
 #### MCP Configuration
 
-Create an MCP configuration file (JSON format):
+Create an MCP configuration file (JSON format) with remote MCP server URLs:
 
 ```json
 {
   "mcpServers": {
-    "filesystem": {
-      "command": "npx",
-      "args": ["-y", "@modelcontextprotocol/server-filesystem", "/path/to/allowed/dir"],
-      "env": {}
-    },
     "brave-search": {
-      "command": "npx",
-      "args": ["-y", "@anthropic/mcp-server-brave-search"],
-      "env": {
-        "BRAVE_API_KEY": "your-api-key"
+      "url": "http://127.0.0.1:38180/mcp",
+      "headers": {
+        "Authorization": "Bearer your-api-key"
       }
+    },
+    "filesystem": {
+      "url": "http://127.0.0.1:38181/mcp"
     }
   }
 }
@@ -1717,38 +1716,56 @@ The server looks for MCP configuration in the following order:
 #### MCP Usage
 
 ```bash
-# Use default config location (~/.llama.cpp/mcp.json)
-./llama-server -m model.gguf
+# Enable MCP with --webui-mcp flag
+./llama-server -m model.gguf --webui-mcp
 
-# Or specify config path
-./llama-server -m model.gguf --mcp-config /path/to/mcp.json
+# Specify config path
+./llama-server -m model.gguf --webui-mcp --mcp-config /path/to/mcp.json
 
 # Or use environment variable
-LLAMA_MCP_CONFIG=/path/to/mcp.json ./llama-server -m model.gguf
+LLAMA_MCP_CONFIG=/path/to/mcp.json ./llama-server -m model.gguf --webui-mcp
 ```
 
-#### MCP WebSocket Port
-
-MCP uses WebSocket on HTTP port + 1 (default: 8081 when HTTP is on 8080).
-
 #### MCP API Endpoints
 
-| Endpoint | Description |
-|----------|-------------|
-| `GET /mcp/servers` | List available MCP servers from configuration |
-| `WS /mcp?server=<name>` | WebSocket connection (on HTTP port + 1) |
+| Endpoint | Method | Description |
+|----------|--------|-------------|
+| `/mcp/servers` | GET | List available MCP server names from config |
+| `/mcp?server=<name>` | GET | Proxy GET requests to remote MCP server (SSE streams) |
+| `/mcp?server=<name>` | POST | Proxy POST requests to remote MCP server (JSON-RPC) |
 
 #### MCP Protocol
 
-The MCP bridge implements JSON-RPC 2.0 over WebSocket. Key methods:
-- `initialize` - Establish MCP session
-- `tools/list` - List available tools
-- `tools/call` - Execute a tool
-- `resources/list` - List available resources
-- `resources/read` - Read a resource
+The server proxies requests to remote MCP servers using the [Streamable HTTP transport](https://modelcontextprotocol.io/specification/2025-11-25/basic/transports). The web UI uses the official `@modelcontextprotocol/sdk` client.
 
 For more information about MCP, see the [Model Context Protocol documentation](https://modelcontextprotocol.io/).
 
+#### Example MCP Servers
+
+Here's how to run some example MCP servers that work with the default config:
+
+**Brave Search** (requires `BRAVE_API_KEY` environment variable - get one at https://brave.com/search/api/):
+
+```bash
+BRAVE_API_KEY=your-key-here npx -y @anthropic-ai/mcp-server-brave-search --transport http --port 38180
+```
+
+**Python interpreter** (with common data science packages):
+
+```bash
+uvx mcp-run-python --deps numpy,pandas,pydantic,requests,httpx,sympy,aiohttp streamable-http --port 38181
+```
+
+**Run both together** using `concurrently`:
+
+```bash
+BRAVE_API_KEY=your-key-here npx -y concurrently \
+  "npx -y @anthropic-ai/mcp-server-brave-search --transport http --port 38180" \
+  "uvx mcp-run-python --deps numpy,pandas,pydantic,requests,httpx,sympy,aiohttp streamable-http --port 38181"
+```
+
+Then update `mcp_config.example.json` with your settings and start llama-server with `--webui-mcp`.
+
 ### Legacy completion web UI
 
 A new chat-based UI has replaced the old completion-based since [this PR](https://github.com/ggml-org/llama.cpp/pull/10175). If you want to use the old completion, start the server with `--path ./tools/server/public_legacy`
 
@@ -4,42 +4,19 @@
   "_comment_macos": "On macOS/Linux, place this file in ~/.llama.cpp/mcp.json",
   "_comment_env": "Or set the LLAMA_MCP_CONFIG environment variable to point to this file",
   "mcpServers": {
-    "filesystem": {
-      "command": "npx",
-      "args": [
-        "-y",
-        "@modelcontextprotocol/server-filesystem",
-        "/allowed/path"
-      ]
-    },
+    "_comment": "Remote MCP servers (proxied via C++ backend with CORS support)",
     "brave-search": {
-      "command": "npx",
-      "args": [
-        "-y",
-        "@modelcontextprotocol/server-brave-search"
-      ],
-      "env": {
-        "BRAVE_API_KEY": "your-api-key-here"
-      }
+      "_comment": "Run: BRAVE_API_KEY=... npx -y @anthropic-ai/mcp-server-brave-search --transport http --port 38180",
+      "_comment_key": "Get your API key at https://brave.com/search/api/",
+      "url": "http://127.0.0.1:38180/mcp"
     },
-    "github": {
-      "command": "npx",
-      "args": [
-        "-y",
-        "@modelcontextprotocol/server-github"
-      ],
-      "env": {
-        "GITHUB_TOKEN": "your-github-token-here"
-      }
+    "python": {
+      "_comment": "Run: uvx mcp-run-python --deps numpy,pandas,pydantic,requests,httpx,sympy,aiohttp streamable-http --port 38181",
+      "url": "http://127.0.0.1:38181/mcp"
     },
-    "_comment_cwd_example": "Example: Run a custom MCP server script from a specific directory",
-    "my-script": {
-      "command": "python",
-      "args": ["server.py"],
-      "cwd": "/path/to/working/directory",
-      "env": {
-        "PYTHONUNBUFFERED": "1"
-      }
+    "filesystem": {
+      "_comment": "Run: npx -y @anthropic-ai/mcp-server-filesystem --transport http --port 38182 /path/to/allowed/dir",
+      "url": "http://127.0.0.1:38182/mcp"
     }
   }
 }
@@ -57,64 +57,6 @@ json format_error_response(const std::string & message, const enum error_type ty
     };
 }
 
-//
-// API key validation helpers
-//
-
-std::string extract_api_key_from_auth_header(const std::string & auth_header) {
-    std::string req_api_key = auth_header;
-
-    // Remove the "Bearer " prefix if needed
-    std::string prefix = "Bearer ";
-    if (req_api_key.length() >= prefix.length() && req_api_key.substr(0, prefix.length()) == prefix) {
-        req_api_key = req_api_key.substr(prefix.length());
-    }
-
-    // Trim leading whitespace
-    while (!req_api_key.empty() && req_api_key[0] == ' ') {
-        req_api_key.erase(0, 1);
-    }
-
-    return req_api_key;
-}
-
-// Constant-time string comparison to prevent timing attacks
-// Returns true if strings are equal, false otherwise
-static bool constant_time_compare(const std::string & a, const std::string & b) {
-    if (a.size() != b.size()) {
-        return false;
-    }
-
-    // Use XOR to compare all bytes without early exit
-    volatile unsigned char result = 0;
-    for (size_t i = 0; i < a.size(); i++) {
-        result |= (a[i] ^ b[i]);
-    }
-
-    return result == 0;
-}
-
-bool validate_auth_header(const std::string & auth_header, const std::vector<std::string> & api_keys) {
-    // If API key is not set, skip validation
-    if (api_keys.empty()) {
-        return true;
-    }
-
-    // Extract the API key from the Authorization header
-    std::string req_api_key = extract_api_key_from_auth_header(auth_header);
-
-    // Validate the API key using constant-time comparison
-    // This prevents timing attacks where an attacker could measure
-    // response times to guess valid API key characters
-    for (const auto & key : api_keys) {
-        if (constant_time_compare(req_api_key, key)) {
-            return true;
-        }
-    }
-
-    return false;
-}
-
 //
 // random string / id
 //
 
@@ -86,21 +86,6 @@ struct server_grammar_trigger {
 
 json format_error_response(const std::string & message, const enum error_type type);
 
-//
-// API key validation helpers
-//
-
-// Validates an Authorization header value against a list of configured API keys.
-// Handles "Bearer " prefix and X-Api-Key header format.
-// Returns true if:
-//   - api_keys is empty (no authentication configured)
-//   - the provided auth_header matches one of the configured keys
-// Uses constant-time comparison to prevent timing attacks.
-bool validate_auth_header(const std::string & auth_header, const std::vector<std::string> & api_keys);
-
-// Extracts the API key from an Authorization header value (removes "Bearer " prefix if present)
-std::string extract_api_key_from_auth_header(const std::string & auth_header);
-
 //
 // random string / id
 //