Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
247 changes: 247 additions & 0 deletions src/app/endpoints/mcp_servers.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,247 @@
"""Handler for REST API calls to dynamically manage MCP servers."""

from typing import Annotated, Any

from fastapi import APIRouter, Depends, HTTPException, Request, status
from llama_stack_client import APIConnectionError

from authentication import get_auth_dependency
from authentication.interface import AuthTuple
from authorization.middleware import authorize
from client import AsyncLlamaStackClientHolder
from configuration import configuration
from models.config import Action, ModelContextProtocolServer
from models.requests import MCPServerRegistrationRequest
from models.responses import (
ConflictResponse,
ForbiddenResponse,
InternalServerErrorResponse,
MCPServerDeleteResponse,
MCPServerInfo,
MCPServerListResponse,
MCPServerRegistrationResponse,
NotFoundResponse,
ServiceUnavailableResponse,
UnauthorizedResponse,
)
from utils.endpoints import check_configuration_loaded
from log import get_logger

logger = get_logger(__name__)
router = APIRouter(tags=["mcp-servers"])


register_responses: dict[int | str, dict[str, Any]] = {
201: MCPServerRegistrationResponse.openapi_response(),
401: UnauthorizedResponse.openapi_response(
examples=["missing header", "missing token"]
),
403: ForbiddenResponse.openapi_response(examples=["endpoint"]),
409: ConflictResponse.openapi_response(examples=["mcp server"]),
500: InternalServerErrorResponse.openapi_response(examples=["configuration"]),
503: ServiceUnavailableResponse.openapi_response(),
}


@router.post(
"/mcp-servers",
responses=register_responses,
status_code=status.HTTP_201_CREATED,
)
@authorize(Action.REGISTER_MCP_SERVER)
async def register_mcp_server_handler(
request: Request,
body: MCPServerRegistrationRequest,
auth: Annotated[AuthTuple, Depends(get_auth_dependency())],
) -> MCPServerRegistrationResponse:
"""Register an MCP server dynamically at runtime.
Adds the MCP server to the runtime configuration and registers it
as a toolgroup with Llama Stack so it becomes available for queries.
Raises:
HTTPException: On duplicate name, Llama Stack connection error,
or registration failure.
Returns:
MCPServerRegistrationResponse: Details of the newly registered server.
"""
_ = auth
_ = request

check_configuration_loaded(configuration)

mcp_server = ModelContextProtocolServer(
name=body.name,
url=body.url,
provider_id=body.provider_id,
authorization_headers=body.authorization_headers or {},
headers=body.headers or [],
timeout=body.timeout,
)
Comment on lines +74 to +81
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

⚠️ Potential issue | 🟠 Major

🧩 Analysis chain

🌐 Web query:

For llama-stack-client 0.5.2, what fields are supported inside AsyncLlamaStackClient.toolgroups.register(..., mcp_endpoint=...)for MCP endpoints? Doesmcp_endpoint support headers, authorization headers, or timeout values in addition to the URI?

💡 Result:

In llama-stack-client 0.5.2, AsyncLlamaStackClient.toolgroups.register(..., mcp_endpoint=...) uses the URL schema, and the only supported field inside mcp_endpoint is:

  • uri: str (required)

This comes from the RegisterToolGroupRequest schema (mcp_endpoint: URL | null) and the URL schema definition ({ "uri": string }). [1][2]

So, no—in 0.5.2, mcp_endpoint itself does not support additional structured fields like headers, authorization/Authorization headers, or timeouts beyond the URI. [1][2]

If you need headers/auth, note that headers and authorization are supported elsewhere in the API (e.g., the OpenAI “Responses” MCP tool config includes headers and authorization), but that is not part of toolgroups.register(..., mcp_endpoint=...). [3]

Sources
[1] Llama Stack OpenAPI (v0.5.2): RegisterToolGroupRequest schema (shows mcp_endpoint is URL)
[2] Llama Stack OpenAPI (v0.5.2): URL schema (uri only)
[3] Llama Stack OpenAPI (v0.5.2): OpenAIResponseInputToolMCP schema (has headers, authorization)


Forward the request's connection settings to Llama Stack or reject them explicitly.

The handler accepts and stores authorization_headers, headers, and timeout (lines 74-81), but the call to toolgroups.register(...) (lines 91-95) only forwards the URI via mcp_endpoint={"uri": mcp_server.url}. In llama-stack-client 0.5.2, the mcp_endpoint parameter supports only the uri field; additional connection metadata is not propagated to the remote toolgroup. This means the endpoint returns 201 while potentially creating a toolgroup that cannot authenticate to or properly connect with the MCP server if those settings are required. Either pass these fields through to Llama Stack if a future version supports them, or reject the request with a clear error if authorization_headers, headers, or a non-default timeout are provided.

🤖 Prompt for AI Agents
Verify each finding against the current code and only fix it if needed.

In `@src/app/endpoints/mcp_servers.py` around lines 74 - 81, The handler currently
constructs a ModelContextProtocolServer (mcp_server) with authorization_headers,
headers, and timeout but only passes mcp_endpoint={"uri": mcp_server.url} into
toolgroups.register, which can create an unusable toolgroup; update the endpoint
handler to either forward these connection fields to Llama Stack when supported
or explicitly reject requests that supply them—specifically, before calling
toolgroups.register check ModelContextProtocolServer.authorization_headers,
.headers, and .timeout against defaults and if any non-empty/non-default values
are present raise a clear HTTP error (or ValidationError) explaining that
authorization_headers, headers, and custom timeout are not supported by the
current llama-stack-client; reference ModelContextProtocolServer,
toolgroups.register, and mcp_endpoint in the change so the rejection/forwarding
logic is colocated with where mcp_server is created and toolgroups.register is
called.


try:
configuration.add_mcp_server(mcp_server)
except ValueError as e:
response = ConflictResponse(resource="MCP server", resource_id=body.name)
raise HTTPException(**response.model_dump()) from e

try:
client = AsyncLlamaStackClientHolder().get_client()
await client.toolgroups.register( # pyright: ignore[reportDeprecated]
toolgroup_id=mcp_server.name,
provider_id=mcp_server.provider_id,
mcp_endpoint={"uri": mcp_server.url},
)
except APIConnectionError as e:
configuration.remove_mcp_server(body.name)
logger.error("Failed to register MCP server with Llama Stack: %s", e)
response = ServiceUnavailableResponse(backend_name="Llama Stack", cause=str(e))
raise HTTPException(**response.model_dump()) from e
except Exception as e: # pylint: disable=broad-exception-caught
configuration.remove_mcp_server(body.name)
logger.error("Failed to register MCP toolgroup: %s", e)
error_response = InternalServerErrorResponse(
response="Failed to register MCP server",
cause=str(e),
)
Comment on lines +99 to +107
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

⚠️ Potential issue | 🟠 Major

Do not return raw upstream exception text to API clients.

These branches serialize str(e) into the public error payload. That can leak internal Llama Stack details and makes this API contract depend on upstream exception wording. Keep the full exception in logs, but return a fixed cause string or opaque error id to callers.

Also applies to: 223-225

🤖 Prompt for AI Agents
Verify each finding against the current code and only fix it if needed.

In `@src/app/endpoints/mcp_servers.py` around lines 99 - 107, The code currently
includes raw upstream exception text (str(e)) in API responses (see
ServiceUnavailableResponse and InternalServerErrorResponse in the except blocks
around configuration.remove_mcp_server and HTTPException raising); change these
to log the full exception via logger.exception or logger.error with exception
info, but replace the response cause with a fixed opaque string (e.g.
"upstream_error" or a generated error_id) or a generic message like "internal
server error" before raising HTTPException or returning the response; apply the
same change to the other occurrence referenced (lines ~223-225) so no raw
exception text is serialized to clients while full details remain in logs for
debugging.

raise HTTPException(**error_response.model_dump()) from e

logger.info("Dynamically registered MCP server: %s at %s", body.name, body.url)

return MCPServerRegistrationResponse(
name=mcp_server.name,
url=mcp_server.url,
provider_id=mcp_server.provider_id,
message=f"MCP server '{mcp_server.name}' registered successfully",
)


list_responses: dict[int | str, dict[str, Any]] = {
200: MCPServerListResponse.openapi_response(),
401: UnauthorizedResponse.openapi_response(
examples=["missing header", "missing token"]
),
403: ForbiddenResponse.openapi_response(examples=["endpoint"]),
500: InternalServerErrorResponse.openapi_response(examples=["configuration"]),
}


@router.get("/mcp-servers", responses=list_responses)
@authorize(Action.LIST_MCP_SERVERS)
async def list_mcp_servers_handler(
request: Request,
auth: Annotated[AuthTuple, Depends(get_auth_dependency())],
) -> MCPServerListResponse:
"""List all registered MCP servers.
Returns both statically configured (from YAML) and dynamically
registered (via API) MCP servers.
Raises:
HTTPException: If configuration is not loaded.
Returns:
MCPServerListResponse: List of all registered MCP servers with source info.
"""
_ = auth
_ = request

check_configuration_loaded(configuration)

servers = []
for mcp in configuration.mcp_servers:
source = "api" if configuration.is_dynamic_mcp_server(mcp.name) else "config"
servers.append(
MCPServerInfo(
name=mcp.name,
url=mcp.url,
provider_id=mcp.provider_id,
source=source,
)
)

return MCPServerListResponse(servers=servers)


delete_responses: dict[int | str, dict[str, Any]] = {
200: MCPServerDeleteResponse.openapi_response(),
401: UnauthorizedResponse.openapi_response(
examples=["missing header", "missing token"]
),
403: ForbiddenResponse.openapi_response(examples=["endpoint"]),
404: NotFoundResponse.openapi_response(examples=["mcp server"]),
500: InternalServerErrorResponse.openapi_response(examples=["configuration"]),
503: ServiceUnavailableResponse.openapi_response(),
}


@router.delete("/mcp-servers/{name}", responses=delete_responses)
@authorize(Action.DELETE_MCP_SERVER)
async def delete_mcp_server_handler(
request: Request,
name: str,
auth: Annotated[AuthTuple, Depends(get_auth_dependency())],
) -> MCPServerDeleteResponse:
"""Unregister a dynamically registered MCP server.
Removes the MCP server from the runtime configuration and unregisters
its toolgroup from Llama Stack. Only servers registered via the API
can be deleted; statically configured servers cannot be removed.
Raises:
HTTPException: If the server is not found, is statically configured,
or Llama Stack unregistration fails.
Returns:
MCPServerDeleteResponse: Confirmation of the deletion.
"""
_ = auth
_ = request

check_configuration_loaded(configuration)

if not configuration.is_dynamic_mcp_server(name):
found = any(s.name == name for s in configuration.mcp_servers)
if found:
response = ForbiddenResponse(
response="Cannot delete statically configured MCP server",
cause=f"MCP server '{name}' was configured in lightspeed-stack.yaml "
"and cannot be removed via the API.",
)
else:
response = NotFoundResponse(resource="MCP server", resource_id=name)
raise HTTPException(**response.model_dump())

try:
client = AsyncLlamaStackClientHolder().get_client()
await client.toolgroups.unregister( # pyright: ignore[reportDeprecated]
toolgroup_id=name
)
except APIConnectionError as e:
logger.error("Failed to unregister MCP toolgroup from Llama Stack: %s", e)
svc_response = ServiceUnavailableResponse(
backend_name="Llama Stack", cause=str(e)
)
raise HTTPException(**svc_response.model_dump()) from e
except Exception as e: # pylint: disable=broad-exception-caught
logger.warning(
"Llama Stack toolgroup unregister failed for '%s', "
"proceeding with local removal: %s",
name,
e,
)

try:
configuration.remove_mcp_server(name)
except ValueError as e:
logger.error("Failed to remove MCP server from configuration: %s", e)
response = NotFoundResponse(resource="MCP server", resource_id=name)
raise HTTPException(**response.model_dump()) from e
Comment on lines +227 to +240
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

⚠️ Potential issue | 🟠 Major

Do not report a successful delete after remote unregister fails.

The broad except Exception at Line 227 logs and continues, so the local config can be removed even though the Llama Stack toolgroup is still present. That leaves /v1/mcp-servers and the actual toolgroup state out of sync. Except for a verified remote "not found" case, this should fail the request and preserve the local registration so the delete can be retried safely.

💡 Example change
-    except Exception as e:  # pylint: disable=broad-exception-caught
-        logger.warning(
-            "Llama Stack toolgroup unregister failed for '%s', "
-            "proceeding with local removal: %s",
-            name,
-            e,
-        )
+    except Exception as e:  # pylint: disable=broad-exception-caught
+        logger.error("Failed to unregister MCP toolgroup from Llama Stack: %s", e)
+        error_response = InternalServerErrorResponse(
+            response="Failed to unregister MCP server",
+            cause="Toolgroup unregistration failed",
+        )
+        raise HTTPException(**error_response.model_dump()) from e
🤖 Prompt for AI Agents
Verify each finding against the current code and only fix it if needed.

In `@src/app/endpoints/mcp_servers.py` around lines 227 - 240, The code currently
swallows all exceptions from the Llama Stack toolgroup unregister and proceeds
to call configuration.remove_mcp_server(name); change this so that only a
verified remote "not found" outcome allows local removal. Specifically, replace
the broad except Exception around the remote unregister with logic that (1)
detects the remote-not-found case and continues to call
configuration.remove_mcp_server(name), and (2) for any other exception from the
Llama Stack toolgroup unregister, log the error and raise an HTTPException (do
not call configuration.remove_mcp_server). Reference the Llama Stack toolgroup
unregister call and configuration.remove_mcp_server(name) when making the change
so the local config is preserved unless the remote toolgroup is confirmed
missing.


logger.info("Dynamically unregistered MCP server: %s", name)

return MCPServerDeleteResponse(
name=name,
message=f"MCP server '{name}' unregistered successfully",
)
2 changes: 2 additions & 0 deletions src/app/routers.py
Original file line number Diff line number Diff line change
Expand Up @@ -20,6 +20,7 @@
metrics,
tools,
mcp_auth,
mcp_servers,
# Query endpoints for Response API support
query,
# RHEL Lightspeed rlsapi v1 compatibility
Expand Down Expand Up @@ -48,6 +49,7 @@ def include_routers(app: FastAPI) -> None:
app.include_router(models.router, prefix="/v1")
app.include_router(tools.router, prefix="/v1")
app.include_router(mcp_auth.router, prefix="/v1")
app.include_router(mcp_servers.router, prefix="/v1")
app.include_router(shields.router, prefix="/v1")
app.include_router(providers.router, prefix="/v1")
app.include_router(rags.router, prefix="/v1")
Expand Down
62 changes: 62 additions & 0 deletions src/configuration.py
Original file line number Diff line number Diff line change
Expand Up @@ -64,6 +64,7 @@ def __init__(self) -> None:
self._conversation_cache: Optional[Cache] = None
self._quota_limiters: list[QuotaLimiter] = []
self._token_usage_history: Optional[TokenUsageHistory] = None
self._dynamic_mcp_server_names: set[str] = set()
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

⚠️ Potential issue | 🟠 Major

Dynamic MCP registry is process-local and will diverge across workers.

Line 67 stores dynamic registration state only in local memory. In multi-worker deployments, requests can hit different workers and observe different MCP server sets (e.g., register succeeds but list/delete on another worker returns inconsistent results).

Also applies to: 169-228

🤖 Prompt for AI Agents
Verify each finding against the current code and only fix it if needed.

In `@src/configuration.py` at line 67, The dynamic MCP registry is currently
stored in the in-memory set _dynamic_mcp_server_names (and other related code
around lines 169-228), which will diverge across workers; change the
implementation so the registry is kept in a shared, persistent store (e.g.,
Redis, database table, or central config service) instead of the process-local
set. Replace direct reads/writes of _dynamic_mcp_server_names with calls to new
helper methods (e.g., get_dynamic_mcp_servers(), add_dynamic_mcp_server(name),
remove_dynamic_mcp_server(name)) that operate on the shared backend and provide
atomic operations; update any code paths that reference
_dynamic_mcp_server_names to use these helpers and ensure worker-safe
concurrency (locks or atomic commands) and consistent
serialization/deserialization. Ensure migration/initialization code populates
the shared store from any existing local state and add tests verifying
consistency across simulated multiple-worker access.

⚠️ Potential issue | 🟠 Major

Reset dynamic MCP registry on config re-initialization.

init_from_dict() clears other cached runtime state but not _dynamic_mcp_server_names. Stale names can survive reloads and incorrectly mark servers as API-managed.

🛠️ Proposed fix
     def init_from_dict(self, config_dict: dict[Any, Any]) -> None:
         """Initialize configuration from a dictionary.
@@
         # clear cached values when configuration changes
         self._conversation_cache = None
         self._quota_limiters = []
         self._token_usage_history = None
+        self._dynamic_mcp_server_names.clear()
         # now it is possible to re-read configuration
         self._configuration = Configuration(**config_dict)
🤖 Prompt for AI Agents
Verify each finding against the current code and only fix it if needed.

In `@src/configuration.py` at line 67, init_from_dict() currently clears cached
runtime state but doesn't reset the _dynamic_mcp_server_names set, allowing
stale server names to persist across reinitialization; update init_from_dict()
to clear or reinitialize self._dynamic_mcp_server_names (e.g. set it to an empty
set) as part of the re-initialization path so dynamic MCP registry state is
reset when configuration is reloaded.


def load_configuration(self, filename: str) -> None:
"""Load configuration from YAML file.
Expand Down Expand Up @@ -165,6 +166,67 @@ def mcp_servers(self) -> list[ModelContextProtocolServer]:
raise LogicError("logic error: configuration is not loaded")
return self._configuration.mcp_servers

@property
def dynamic_mcp_server_names(self) -> set[str]:
"""Return the set of dynamically registered MCP server names.

Returns:
set[str]: Names of MCP servers added via the API (not from config file).
"""
return self._dynamic_mcp_server_names

def add_mcp_server(self, mcp_server: ModelContextProtocolServer) -> None:
"""Add an MCP server to the runtime configuration.

Parameters:
mcp_server: The MCP server configuration to add.

Raises:
LogicError: If the configuration has not been loaded.
ValueError: If an MCP server with the same name already exists.
"""
if self._configuration is None:
raise LogicError("logic error: configuration is not loaded")
for existing in self._configuration.mcp_servers:
if existing.name == mcp_server.name:
raise ValueError(
f"MCP server with name '{mcp_server.name}' already exists"
)
self._configuration.mcp_servers.append(mcp_server)
self._dynamic_mcp_server_names.add(mcp_server.name)

def remove_mcp_server(self, name: str) -> None:
"""Remove a dynamically registered MCP server from the runtime configuration.

Parameters:
name: The name of the MCP server to remove.

Raises:
LogicError: If the configuration has not been loaded.
ValueError: If the server was not found or was statically configured.
"""
if self._configuration is None:
raise LogicError("logic error: configuration is not loaded")
if name not in self._dynamic_mcp_server_names:
raise ValueError(
f"MCP server '{name}' was not dynamically registered or does not exist"
)
self._configuration.mcp_servers = [
s for s in self._configuration.mcp_servers if s.name != name
]
self._dynamic_mcp_server_names.discard(name)

def is_dynamic_mcp_server(self, name: str) -> bool:
"""Check if an MCP server was dynamically registered.

Parameters:
name: The name of the MCP server.

Returns:
bool: True if the server was registered via the API.
"""
return name in self._dynamic_mcp_server_names

@property
def authentication_configuration(self) -> AuthenticationConfiguration:
"""Return authentication configuration.
Expand Down
5 changes: 5 additions & 0 deletions src/models/config.py
Original file line number Diff line number Diff line change
Expand Up @@ -1019,6 +1019,11 @@ class Action(str, Enum):
# RHEL Lightspeed rlsapi v1 compatibility - stateless inference (no history/RAG)
RLSAPI_V1_INFER = "rlsapi_v1_infer"

# Dynamic MCP server management
REGISTER_MCP_SERVER = "register_mcp_server"
LIST_MCP_SERVERS = "list_mcp_servers"
DELETE_MCP_SERVER = "delete_mcp_server"

# A2A (Agent-to-Agent) protocol actions
A2A_AGENT_CARD = "a2a_agent_card"
A2A_TASK_EXECUTION = "a2a_task_execution"
Expand Down
Loading
Loading