From d1a653a3c384dc26cab3110af7ed59bf51a7d669 Mon Sep 17 00:00:00 2001 From: Andre Bossard Date: Wed, 18 Feb 2026 11:34:48 +0100 Subject: [PATCH 1/8] Optimize agent runtime and SLA demo flow (#15) * feat: Enhance SLA Breach Risk functionality and UI integration - Increased max_length for agent prompt to 5000 - Added fields parameter to list and search tickets for selective data retrieval - Updated timeout for usecase demo agent to 300 seconds - Introduced SLA Breach Risk demo with detailed prompt and ticket analysis - Added E2E tests for SLA Breach Risk demo page * feat: add incident_id field to ticket model and related components - Added incident_id to the ticket mapping in app.py. - Updated csv_data.py to include incident_id when converting CSV rows to tickets. - Modified operations.py to define incident_id as a CSV ticket field. - Enhanced the Ticket model in tickets.py to include incident_id. - Updated usecase_demo.py to accommodate changes in ticket structure. - Modified CSVTicketTable.jsx to display incident_id in the ticket table. - Updated TicketList.jsx to filter and display incident_id in the ticket list. - Enhanced TicketsWithoutAnAssignee.jsx to include incident_id in ticket operations. - Updated UsecaseDemoPage.jsx to pass matchingTickets to the render function. - Enhanced demoDefinitions.js to improve prompts for use case demos. - Added SLA Breach Overview result view in resultViews.jsx to visualize SLA status of tickets. Signed-off-by: Andre Bossard * refactor: clean up import statements across multiple components Signed-off-by: Andre Bossard * refactor: standardize import statement formatting in resultViews.jsx Signed-off-by: Andre Bossard * feat: add SLA breach reporting functionality and related API endpoints Signed-off-by: Andre Bossard * feat: implement SLA breach report retrieval for unassigned tickets Signed-off-by: Andre Bossard --------- Signed-off-by: Andre Bossard --- backend/agents.py | 341 ++++++++++++++---- backend/app.py | 25 +- backend/csv_data.py | 1 + backend/operations.py | 48 ++- backend/tickets.py | 169 ++++++++- backend/usecase_demo.py | 17 +- .../features/csvtickets/CSVTicketTable.jsx | 40 +- frontend/src/features/tickets/TicketList.jsx | 80 ++-- .../tickets/TicketsWithoutAnAssignee.jsx | 72 ++-- .../features/usecase-demo/UsecaseDemoPage.jsx | 46 +-- .../features/usecase-demo/demoDefinitions.js | 164 ++++++--- .../src/features/usecase-demo/resultViews.jsx | 335 ++++++++++++++++- frontend/src/services/api.js | 18 + net.drawio | 67 ++-- tests/e2e/app.spec.js | 30 ++ 15 files changed, 1185 insertions(+), 268 deletions(-) diff --git a/backend/agents.py b/backend/agents.py index 6ac2fad..39cfe23 100644 --- a/backend/agents.py +++ b/backend/agents.py @@ -32,6 +32,7 @@ # Standard library import os from datetime import datetime +from time import perf_counter from typing import Any, Literal, Optional # Load environment variables before anything else @@ -39,6 +40,34 @@ load_dotenv() +import logging + +from langchain_core.globals import set_verbose + + +def _env_flag(name: str, default: str = "false") -> bool: + """Parse environment boolean flags with common truthy values.""" + return os.getenv(name, default).strip().lower() in {"1", "true", "yes", "on"} + + +def _env_int(name: str, default: int) -> int: + """Parse integer env var with fallback.""" + raw = os.getenv(name) + if raw is None: + return default + try: + return int(raw) + except ValueError: + return default + + +LANGCHAIN_VERBOSE = _env_flag("LANGCHAIN_VERBOSE", "false") +set_verbose(LANGCHAIN_VERBOSE) + +logging.basicConfig(level=logging.INFO) +logging.getLogger("langchain").setLevel(logging.INFO if LANGCHAIN_VERBOSE else logging.WARNING) +logger = logging.getLogger(__name__) + from uuid import UUID # Ensure operations register before we request LangChain tools @@ -52,6 +81,7 @@ # Third-party - FastMCP client for external MCP servers from fastmcp import Client as MCPClient +from langchain_core.callbacks import BaseCallbackHandler from langchain_core.tools import StructuredTool # Third-party - LangChain and LangGraph @@ -76,7 +106,7 @@ class AgentRequest(BaseModel): prompt: str = Field( ..., min_length=1, - max_length=2000, + max_length=5000, description="User prompt for the agent to process" ) agent_type: Literal["task_assistant"] = Field( @@ -149,6 +179,10 @@ class AgentResponse(BaseModel): OPENAI_API_KEY = os.getenv("OPENAI_API_KEY", "") OPENAI_MODEL = os.getenv("OPENAI_MODEL", "gpt-4o-mini") OPENAI_BASE_URL = os.getenv("OPENAI_BASE_URL", "") # optional override +OPENAI_CALL_LOGGING_ENABLED = _env_flag("OPENAI_CALL_LOGGING_ENABLED", "true") +AGENT_EFFICIENCY_MODE = _env_flag("AGENT_EFFICIENCY_MODE", "true") +AGENT_TRACE_ENABLED = _env_flag("AGENT_TRACE_ENABLED", "false") +REACT_AGENT_RECURSION_LIMIT = max(3, _env_int("REACT_AGENT_RECURSION_LIMIT", 8)) # External MCP server URL for ticket management (hardcoded) TICKET_MCP_SERVER_URL = "https://yodrrscbpxqnslgugwow.supabase.co/functions/v1/mcp/a7f2b8c4-d3e9-4f1a-b5c6-e8d9f0123456" @@ -191,6 +225,120 @@ def _schema_to_pydantic(name: str, schema: dict) -> type[BaseModel]: return create_model(model_name, **fields) +def _extract_llm_call_metadata(response: Any) -> tuple[dict[str, Any] | None, str | None, str | None]: + """Extract token usage, model name, and finish reason from LLMResult-like objects.""" + token_usage: dict[str, Any] | None = None + model_name: str | None = None + finish_reason: str | None = None + + llm_output = getattr(response, "llm_output", None) + if isinstance(llm_output, dict): + maybe_usage = llm_output.get("token_usage") + if isinstance(maybe_usage, dict): + token_usage = maybe_usage + maybe_model = llm_output.get("model_name") + if isinstance(maybe_model, str): + model_name = maybe_model + + generations = getattr(response, "generations", None) or [] + if generations and generations[0]: + first_generation = generations[0][0] + generation_info = getattr(first_generation, "generation_info", None) + if isinstance(generation_info, dict): + maybe_finish = generation_info.get("finish_reason") + if isinstance(maybe_finish, str): + finish_reason = maybe_finish + + message = getattr(first_generation, "message", None) + if message is not None: + usage_metadata = getattr(message, "usage_metadata", None) + if isinstance(usage_metadata, dict): + token_usage = token_usage or usage_metadata + + response_metadata = getattr(message, "response_metadata", None) + if isinstance(response_metadata, dict): + maybe_usage = response_metadata.get("token_usage") + if isinstance(maybe_usage, dict): + token_usage = token_usage or maybe_usage + + maybe_model = response_metadata.get("model_name") + if isinstance(maybe_model, str): + model_name = model_name or maybe_model + + maybe_finish = response_metadata.get("finish_reason") + if isinstance(maybe_finish, str): + finish_reason = finish_reason or maybe_finish + + return token_usage, model_name, finish_reason + + +class OpenAICallLoggingCallback(BaseCallbackHandler): + """Log each OpenAI/LangChain LLM call with latency and token usage at INFO level.""" + + def __init__(self) -> None: + self._start_times: dict[UUID, float] = {} + + def on_llm_start( + self, + serialized: dict[str, Any], + prompts: list[str], + *, + run_id: UUID, + **kwargs: Any, + ) -> None: + self._start_times[run_id] = perf_counter() + model_name = None + if isinstance(serialized, dict): + model_name = ( + serialized.get("kwargs", {}).get("model") + if isinstance(serialized.get("kwargs"), dict) + else None + ) + prompt_chars = sum(len(prompt or "") for prompt in prompts) + logger.info( + "OpenAI call start run_id=%s model=%s prompts=%d chars=%d", + run_id, + model_name or OPENAI_MODEL, + len(prompts), + prompt_chars, + ) + + def on_llm_end( + self, + response: Any, + *, + run_id: UUID, + **kwargs: Any, + ) -> None: + started_at = self._start_times.pop(run_id, None) + duration_ms = int((perf_counter() - started_at) * 1000) if started_at is not None else None + token_usage, model_name, finish_reason = _extract_llm_call_metadata(response) + logger.info( + "OpenAI call end run_id=%s model=%s duration_ms=%s finish_reason=%s token_usage=%s", + run_id, + model_name or OPENAI_MODEL, + duration_ms if duration_ms is not None else "n/a", + finish_reason or "n/a", + token_usage or {}, + ) + + def on_llm_error( + self, + error: BaseException, + *, + run_id: UUID, + **kwargs: Any, + ) -> None: + started_at = self._start_times.pop(run_id, None) + duration_ms = int((perf_counter() - started_at) * 1000) if started_at is not None else None + logger.error( + "OpenAI call error run_id=%s duration_ms=%s error=%s", + run_id, + duration_ms if duration_ms is not None else "n/a", + error, + ) + + def _mcp_tool_to_langchain(mcp_client: MCPClient, tool: Any) -> StructuredTool: """ Convert MCP tool to LangChain StructuredTool. @@ -286,6 +434,8 @@ def __init__(self): # CSV tools only (do not expose operations or external MCP) self.tools = self._build_csv_tools() + self._system_prompt = self._build_system_prompt() + self._react_agent = create_react_agent(self.llm, self.tools) # Ticket MCP client state (unused) self._ticket_mcp_client: Optional[MCPClient] = None @@ -304,15 +454,52 @@ async def close(self): pass self._ticket_mcp_client = None + def _build_system_prompt(self) -> str: + """Build a concise system prompt optimized for low-latency tool usage.""" + efficiency_rules = ( + "- Plane möglichst einen einzelnen Tool-Aufruf und stoppe früh, sobald die Antwort klar ist.\n" + "- Nutze kleine Payloads: setze sinnvolle limits und kompakte fields.\n" + "- Fordere notes/resolution nur bei explizitem Bedarf an.\n" + ) if AGENT_EFFICIENCY_MODE else "" + return ( + "Du bist ein präziser CSV-Ticket-Assistent. Sprich Deutsch.\n\n" + "Verhalten:\n" + "- Verwende ausschließlich csv_* Tools für Ticketdaten.\n" + f"{efficiency_rules}" + "- Erfinde keine Daten; markiere fehlende Daten klar.\n" + "- Gib eine kurze Antwort und bei strukturierten Ergebnissen einen JSON-Codeblock " + "mit {\"rows\": [...]}." + ) + def _build_csv_tools(self) -> list[StructuredTool]: """Build LangChain tools backed by CSVTicketService.""" import json service = get_csv_ticket_service() + compact_default_fields = [ + "id", + "summary", + "status", + "priority", + "assignee", + "assigned_group", + "created_at", + "updated_at", + ] + + def _select_fields(fields: str | None) -> list[str] | None: + if not fields: + return compact_default_fields + normalized = fields.strip() + if normalized in {"*", "all"}: + return None + parsed = [f.strip() for f in normalized.split(",") if f.strip()] + return parsed or compact_default_fields def _csv_list_tickets( status: str | None = None, assigned_group: str | None = None, has_assignee: bool | None = None, + fields: str | None = None, limit: int = 50, ) -> str: try: @@ -321,9 +508,16 @@ def _csv_list_tickets( status_enum = None tickets = service.list_tickets(status=status_enum, assigned_group=assigned_group, has_assignee=has_assignee) bounded_limit = max(1, min(limit, 100)) - return json.dumps([t.model_dump() for t in tickets[:bounded_limit]], default=str) - - def _csv_get_ticket(ticket_id: str) -> str: + items = tickets[:bounded_limit] + selected_fields = _select_fields(fields) + if selected_fields is None: + return json.dumps([t.model_dump() for t in items], default=str) + return json.dumps([ + {k: v for k, v in t.model_dump().items() if k in selected_fields} + for t in items + ], default=str) + + def _csv_get_ticket(ticket_id: str, fields: str | None = None) -> str: try: tid = UUID(ticket_id) except Exception: @@ -331,12 +525,18 @@ def _csv_get_ticket(ticket_id: str) -> str: ticket = service.get_ticket(tid) if not ticket: return json.dumps({"error": "not found"}) - return json.dumps(ticket.model_dump(), default=str) + dump = ticket.model_dump() + selected_fields = _select_fields(fields) + if selected_fields is None: + return json.dumps(dump, default=str) + return json.dumps({k: v for k, v in dump.items() if k in selected_fields}, default=str) - def _csv_search_tickets(query: str, limit: int = 25) -> str: + def _csv_search_tickets(query: str, fields: str | None = None, limit: int = 25) -> str: q = query.lower() tickets = service.list_tickets() + selected_fields = _select_fields(fields) matched = [] + bounded_limit = max(1, min(limit, 100)) for t in tickets: text = " ".join([ t.summary or "", @@ -348,8 +548,11 @@ def _csv_search_tickets(query: str, limit: int = 25) -> str: t.city or "", ]).lower() if q in text: - matched.append(t.model_dump()) - if len(matched) >= limit: + dump = t.model_dump() + if selected_fields is not None: + dump = {k: v for k, v in dump.items() if k in selected_fields} + matched.append(dump) + if len(matched) >= bounded_limit: break return json.dumps(matched, default=str) @@ -365,19 +568,36 @@ def _csv_ticket_fields() -> str: description=( "List tickets from CSV with optional filters: status " "(new, assigned, in_progress, pending, resolved, closed, cancelled), " - "assigned_group, has_assignee (true/false), and limit (default 50, max 100). " - "Returns JSON array." + "assigned_group, has_assignee (true/false), limit (default 50, max 100), " + "and fields (comma-separated field names). " + "Default response is compact for speed: " + "'id,summary,status,priority,assignee,assigned_group,created_at,updated_at'. " + "For deterministic analytics, prefer status/priority/date fields and avoid wide payloads. " + "Notes/resolution are excluded by default unless requested via fields. " + "Use fields='*' only when full payload is absolutely needed. Returns JSON array." ), ), StructuredTool.from_function( func=_csv_get_ticket, name="csv_get_ticket", - description="Get a ticket by UUID (id). Returns JSON object including notes/resolution.", + description=( + "Get ticket by UUID (id). Supports optional fields (comma-separated). " + "Default response is compact fields without notes/resolution for speed. " + "Prefer requesting only required fields for drill-down. " + "Request notes/resolution explicitly via fields, or use fields='*' for full payload." + ), ), StructuredTool.from_function( func=_csv_search_tickets, name="csv_search_tickets", - description="Search tickets by text across summary, description, notes, resolution, requester, group, city. Returns JSON array.", + description=( + "Search tickets by text across summary, description, notes, resolution, requester, group, city. " + "Supports fields (comma-separated field names). " + "Notes/resolution are excluded by default unless requested via fields. " + "Prefer low limits and compact fields for latency-sensitive runs. " + "Default response is compact fields for speed; use fields='*' only when needed. " + "Returns JSON array." + ), ), StructuredTool.from_function( func=_csv_ticket_fields, @@ -411,72 +631,45 @@ async def run_agent(self, request: AgentRequest) -> AgentResponse: ValueError: If agent execution fails """ try: - # Create ReAct agent with LangGraph tools - # The tools are the actual Python functions with @tool decorator - agent = create_react_agent(self.llm, self.tools) - - # System message to guide the agent's behavior - tool_lines = [] - for t in self.tools: - name = t.name if hasattr(t, 'name') else str(t) - desc = (t.description if hasattr(t, 'description') else "") or "" - tool_lines.append(f"- `{name}`: {desc}".strip()) - tools_md = "\n".join(tool_lines) if tool_lines else "- (none)" - - system_msg = f""" -Du bist ein freundlicher CSV-Ticket-Assistent. Sprich **Deutsch**. - -Antwortstil: -- Starte immer mit einer kurzen Begrüßung. -- Liste sofort die verfügbaren Tools (Markdown-Bullets). -- Nutze **Markdown** mit klaren Überschriften (##), Bullet-Listen und Tabellen, wenn sinnvoll. -- Für JSON-Daten nutze fenced Code-Blöcke: - ```json - {{"example": "value"}} - ``` -- Halte Antworten knapp und gut strukturiert. - -Verfügbare Tools: -{tools_md} - -Verhalten: -- Verwende ausschließlich die csv_* Tools für Ticket-Informationen. Keine Daten erfinden. -- Falls Daten fehlen, sage das explizit. -- Fasse Ergebnisse klar zusammen; für Listen sind kompakte Tabellen ideal. -""" - # Execute agent with user prompt - print(f"\n{'='*60}") - print(f"🤖 AGENT EXECUTION START") - print(f"{'='*60}") - print(f" Prompt: {request.prompt[:100]}{'...' if len(request.prompt) > 100 else ''}") - print(f" Agent type: {request.agent_type}") - print(f" Available tools ({len(self.tools)}):") - for t in self.tools: - name = t.name if hasattr(t, 'name') else str(t) - print(f" • {name}") - print(f"{'='*60}\n") + if AGENT_TRACE_ENABLED: + print(f"\n{'='*60}") + print(f"🤖 AGENT EXECUTION START") + print(f"{'='*60}") + print(f" Prompt: {request.prompt[:100]}{'...' if len(request.prompt) > 100 else ''}") + print(f" Agent type: {request.agent_type}") + print(f" Available tools ({len(self.tools)}):") + for t in self.tools: + name = t.name if hasattr(t, 'name') else str(t) + print(f" • {name}") + print(f"{'='*60}\n") - result = await agent.ainvoke( - {"messages": [("system", system_msg), ("user", request.prompt)]} + invoke_config: dict[str, Any] = {"recursion_limit": REACT_AGENT_RECURSION_LIMIT} + if OPENAI_CALL_LOGGING_ENABLED: + invoke_config["callbacks"] = [OpenAICallLoggingCallback()] + + result = await self._react_agent.ainvoke( + {"messages": [("system", self._system_prompt), ("user", request.prompt)]}, + config=invoke_config, ) - print(f"\n{'='*60}") - print(f"📋 AGENT EXECUTION COMPLETE") - print(f"{'='*60}") - print(f" Total messages: {len(result['messages'])}") - for i, msg in enumerate(result["messages"]): - msg_type = type(msg).__name__ - has_tool_calls = hasattr(msg, 'tool_calls') and msg.tool_calls - content_preview = "" - if hasattr(msg, 'content') and msg.content: - content_preview = str(msg.content)[:80] + "..." if len(str(msg.content)) > 80 else str(msg.content) - print(f" [{i}] {msg_type}: {content_preview}") - if has_tool_calls: - for tc in msg.tool_calls: - tc_name = tc.get('name', tc) if isinstance(tc, dict) else str(tc) - print(f" 🔧 Tool call: {tc_name}") - print(f"{'='*60}\n") + if AGENT_TRACE_ENABLED: + print(f"\n{'='*60}") + print(f"📋 AGENT EXECUTION COMPLETE") + print(f"{'='*60}") + print(f" Total messages: {len(result['messages'])}") + for i, msg in enumerate(result["messages"]): + msg_type = type(msg).__name__ + has_tool_calls = hasattr(msg, 'tool_calls') and msg.tool_calls + content_preview = "" + if hasattr(msg, 'content') and msg.content: + content_preview = str(msg.content)[:80] + "..." if len(str(msg.content)) > 80 else str(msg.content) + print(f" [{i}] {msg_type}: {content_preview}") + if has_tool_calls: + for tc in msg.tool_calls: + tc_name = tc.get('name', tc) if isinstance(tc, dict) else str(tc) + print(f" 🔧 Tool call: {tc_name}") + print(f"{'='*60}\n") # Extract the agent's final response final_message = result["messages"][-1] diff --git a/backend/app.py b/backend/app.py index 953f39b..2040a74 100644 --- a/backend/app.py +++ b/backend/app.py @@ -36,7 +36,6 @@ # CSV ticket service from csv_data import Ticket, get_csv_ticket_service -from usecase_demo import UsecaseDemoRunCreate, usecase_demo_run_service # FastMCP client for direct ticket MCP calls (no AI) from fastmcp import Client as MCPClient @@ -51,6 +50,7 @@ op_update_task, task_service, ) +from usecase_demo import UsecaseDemoRunCreate, usecase_demo_run_service # Ticket MCP server URL (same as in agents.py) TICKET_MCP_SERVER_URL = "https://yodrrscbpxqnslgugwow.supabase.co/functions/v1/mcp/a7f2b8c4-d3e9-4f1a-b5c6-e8d9f0123456" @@ -375,6 +375,7 @@ def _map_mcp_ticket_to_frontend(mcp_ticket: dict) -> dict: return { "id": str(mcp_ticket.get("id", "")), + "incident_id": mcp_ticket.get("incident_id"), "title": mcp_ticket.get("summary", ""), "description": mcp_ticket.get("description", ""), "status": status, @@ -622,6 +623,28 @@ async def get_csv_ticket_stats(): }) +@app.route("/api/csv-tickets/sla-breach", methods=["GET"]) +async def get_csv_tickets_sla_breach(): + """ + Return unassigned tickets grouped by SLA breach status (breached → at_risk), + sorted by age_hours descending within each group. + + Query params: + - unassigned_only: true/false (default: true) + - include_ok: true/false (default: false) — include non-breached tickets too + """ + from tickets import get_sla_breach_report + + unassigned_only = request.args.get("unassigned_only", "true").lower() != "false" + include_ok = request.args.get("include_ok", "false").lower() == "true" + + tickets = _csv_ticket_service.list_tickets( + has_assignee=False if unassigned_only else None, + ) + report = get_sla_breach_report(tickets, reference_time=None, include_ok=include_ok) + return jsonify(report.model_dump(mode="json")) + + @app.route("/api/health", methods=["GET"]) async def health_check(): """Health check endpoint.""" diff --git a/backend/csv_data.py b/backend/csv_data.py index 1859193..8f78e0e 100644 --- a/backend/csv_data.py +++ b/backend/csv_data.py @@ -290,6 +290,7 @@ def csv_row_to_ticket(row: CSVTicketRow) -> Ticket: return Ticket( id=ticket_id, + incident_id=row.incident_id or row.entry_id or None, summary=row.summary or "No summary", description=row.notes or row.summary or "No description", status=map_status(row.status or row.status_ppl), diff --git a/backend/operations.py b/backend/operations.py index 450c89b..f065c91 100644 --- a/backend/operations.py +++ b/backend/operations.py @@ -12,7 +12,13 @@ from api_decorators import operation from csv_data import get_csv_ticket_service from tasks import Task, TaskCreate, TaskFilter, TaskService, TaskStats, TaskUpdate -from tickets import Ticket, TicketStatus +from tickets import ( + SlaBreachReport, + Ticket, + TicketSlaInfo, + TicketStatus, + get_sla_breach_report, +) # Service instances shared across interfaces _task_service = TaskService() @@ -21,6 +27,7 @@ CSV_TICKET_FIELDS = [ + {"name": "incident_id", "label": "Incident ID", "type": "string"}, {"name": "id", "label": "ID", "type": "uuid"}, {"name": "summary", "label": "Summary", "type": "string"}, {"name": "status", "label": "Status", "type": "enum"}, @@ -271,6 +278,44 @@ async def op_csv_ticket_fields() -> list[dict[str, str]]: return CSV_TICKET_FIELDS +@operation( + name="csv_sla_breach_tickets", + description=( + "Return tickets at SLA breach risk from the CSV dataset. " + "By default only unassigned tickets (assigned to a group but no individual) are included. " + "Results contain pre-computed age_hours, sla_threshold_hours, and breach_status. " + "Grouped: 'breached' first, then 'at_risk'. Within each group sorted by age_hours descending. " + "The reference timestamp is the maximum created_at date found in the selected tickets " + "(not the current system time), making results deterministic for historical datasets. " + "SLA thresholds: critical=4h, high=24h, medium=72h, low=120h." + ), + http_method="GET", +) +async def op_csv_sla_breach_tickets( + unassigned_only: bool = True, + include_ok: bool = False, +) -> SlaBreachReport: + """ + Pre-compute SLA breach status for CSV tickets. + + Args: + unassigned_only: When True (default), only return tickets that are assigned + to a group but have no individual assignee — the primary use case for + proactive SLA monitoring. + include_ok: When True, also include tickets that are within their SLA window. + Default False keeps the result focused on actionable items. + + Returns: + SlaBreachReport with reference_timestamp, counts, and a sorted list of + TicketSlaInfo objects ready for display or further AI commentary. + """ + _ensure_csv_loaded() + tickets = _csv_service.list_tickets( + has_assignee=False if unassigned_only else None, + ) + return get_sla_breach_report(tickets, reference_time=None, include_ok=include_ok) + + # Export shared services for callers (REST app, CLI tools, etc.) task_service = _task_service csv_ticket_service = _csv_service @@ -289,5 +334,6 @@ async def op_csv_ticket_fields() -> list[dict[str, str]]: "op_csv_search_tickets", "op_csv_ticket_stats", "op_csv_ticket_fields", + "op_csv_sla_breach_tickets", "CSV_TICKET_FIELDS", ] diff --git a/backend/tickets.py b/backend/tickets.py index 9dc5edf..1a7bb38 100644 --- a/backend/tickets.py +++ b/backend/tickets.py @@ -17,7 +17,6 @@ from pydantic import BaseModel, Field - # ============================================================================ # ENUMS - Status and Priority types # ============================================================================ @@ -58,7 +57,7 @@ class WorkLogType(str, Enum): # ============================================================================ -# PRIORITY SLA DEADLINES (in minutes) +# PRIORITY SLA DEADLINES (in minutes) — kept for backwards compatibility # ============================================================================ PRIORITY_SLA_MINUTES: dict[TicketPriority, int] = { @@ -69,6 +68,27 @@ class WorkLogType(str, Enum): } +# ============================================================================ +# SLA BREACH THRESHOLDS (in hours) — used for breach status calculations +# Aligns with ITSM standard expectations and the frontend dashboard +# ============================================================================ + +SLA_THRESHOLD_HOURS: dict[TicketPriority, float] = { + TicketPriority.CRITICAL: 4.0, + TicketPriority.HIGH: 24.0, + TicketPriority.MEDIUM: 72.0, + TicketPriority.LOW: 120.0, +} + + +class SlaBreachStatus(str, Enum): + """SLA breach status for a ticket.""" + BREACHED = "breached" # age > threshold + AT_RISK = "at_risk" # age > 75% of threshold + OK = "ok" # age <= 75% of threshold + UNKNOWN = "unknown" # cannot determine + + # ============================================================================ # WORKLOG MODEL # ============================================================================ @@ -159,6 +179,7 @@ class Ticket(BaseModel): """ # Core identifiers id: UUID = Field(..., description="Unique ticket identifier") + incident_id: Optional[str] = Field(None, description="Original incident ID (INC...)") # Summary and description summary: str = Field(..., max_length=500, description="Short issue summary") @@ -377,6 +398,141 @@ def build_reminder_candidate( ) +# ============================================================================ +# SLA BREACH MODELS — for breach-status reporting +# ============================================================================ + +class TicketSlaInfo(BaseModel): + """SLA breach information for a single ticket.""" + ticket_id: str = Field(..., description="Incident ID or UUID of the ticket") + priority: str = Field(..., description="Ticket priority (critical/high/medium/low)") + urgency: Optional[str] = Field(None, description="Urgency level") + assigned_group: Optional[str] = Field(None, description="Responsible support team") + reported_date: str = Field(..., description="Ticket creation date (ISO format)") + age_hours: float = Field(..., description="Hours elapsed since creation (1 decimal)") + sla_threshold_hours: float = Field(..., description="SLA threshold in hours for this priority") + breach_status: SlaBreachStatus = Field(..., description="Current SLA breach status") + + +class SlaBreachReport(BaseModel): + """Aggregated SLA breach report.""" + reference_timestamp: str = Field(..., description="Reference time used for age calculation") + total_breached: int = Field(..., ge=0) + total_at_risk: int = Field(..., ge=0) + tickets: list[TicketSlaInfo] = Field(default_factory=list) + + +# ============================================================================ +# SLA BREACH CALCULATIONS — Pure functions +# ============================================================================ + +def get_sla_threshold_hours(priority: TicketPriority) -> float: + """Return the SLA threshold in hours for a given priority.""" + return SLA_THRESHOLD_HOURS.get(priority, SLA_THRESHOLD_HOURS[TicketPriority.LOW]) + + +def calculate_ticket_sla_info( + ticket: "Ticket", + reference_time: Optional[datetime] = None, +) -> TicketSlaInfo: + """ + Compute SLA breach information for a single ticket. + + Args: + ticket: The ticket to evaluate. + reference_time: Anchor for age calculation. When None, uses the current time. + Callers managing historical datasets should pass the max date in the + dataset so results are deterministic. + + Returns: + TicketSlaInfo with pre-computed age_hours and breach_status. + """ + if reference_time is None: + reference_time = datetime.now(ticket.created_at.tzinfo) + + delta = reference_time - ticket.created_at + age_hours = round(delta.total_seconds() / 3600, 1) + + threshold = get_sla_threshold_hours(ticket.priority) + + if age_hours > threshold: + status = SlaBreachStatus.BREACHED + elif age_hours > threshold * 0.75: + status = SlaBreachStatus.AT_RISK + else: + status = SlaBreachStatus.OK + + ticket_id = ticket.incident_id or str(ticket.id) + + return TicketSlaInfo( + ticket_id=ticket_id, + priority=ticket.priority.value, + urgency=ticket.urgency, + assigned_group=ticket.assigned_group, + reported_date=ticket.created_at.isoformat(), + age_hours=age_hours, + sla_threshold_hours=threshold, + breach_status=status, + ) + + +def get_sla_breach_report( + tickets: "list[Ticket]", + reference_time: Optional[datetime] = None, + include_ok: bool = False, +) -> SlaBreachReport: + """ + Build a sorted, grouped SLA breach report from a ticket list. + + Grouping order: breached first, then at_risk, then ok (if include_ok). + Within each group, sorted by age_hours descending. + + Args: + tickets: Tickets to evaluate. + reference_time: Anchor timestamp. Uses max created_at in the list when None. + include_ok: Whether to include non-breached, non-at-risk tickets. + + Returns: + SlaBreachReport with grouped/sorted TicketSlaInfo entries. + """ + if not tickets: + ref_str = (reference_time or datetime.now()).isoformat() + return SlaBreachReport(reference_timestamp=ref_str, total_breached=0, total_at_risk=0) + + if reference_time is None: + reference_time = max(t.created_at for t in tickets) + + infos = [calculate_ticket_sla_info(t, reference_time) for t in tickets] + + group_order = { + SlaBreachStatus.BREACHED: 0, + SlaBreachStatus.AT_RISK: 1, + SlaBreachStatus.OK: 2, + SlaBreachStatus.UNKNOWN: 3, + } + + filtered = [ + i for i in infos + if i.breach_status in (SlaBreachStatus.BREACHED, SlaBreachStatus.AT_RISK) + or (include_ok and i.breach_status == SlaBreachStatus.OK) + ] + + sorted_infos = sorted( + filtered, + key=lambda i: (group_order[i.breach_status], -i.age_hours), + ) + + total_breached = sum(1 for i in sorted_infos if i.breach_status == SlaBreachStatus.BREACHED) + total_at_risk = sum(1 for i in sorted_infos if i.breach_status == SlaBreachStatus.AT_RISK) + + return SlaBreachReport( + reference_timestamp=reference_time.isoformat(), + total_breached=total_breached, + total_at_risk=total_at_risk, + tickets=sorted_infos, + ) + + # ============================================================================ # EXPORTS # ============================================================================ @@ -387,8 +543,10 @@ def build_reminder_candidate( "TicketPriority", "ModificationStatus", "WorkLogType", + "SlaBreachStatus", # Constants "PRIORITY_SLA_MINUTES", + "SLA_THRESHOLD_HOURS", # Models "Ticket", "TicketWithDetails", @@ -403,6 +561,9 @@ def build_reminder_candidate( "ModificationCreate", "ModificationReview", "OverlayMetadata", + # SLA breach models + "TicketSlaInfo", + "SlaBreachReport", # Reminder models "ReminderCandidate", "ReminderRequest", @@ -414,4 +575,8 @@ def build_reminder_candidate( "is_assigned_without_assignee", "count_reminders_in_worklogs", "build_reminder_candidate", + # SLA breach calculations + "get_sla_threshold_hours", + "calculate_ticket_sla_info", + "get_sla_breach_report", ] diff --git a/backend/usecase_demo.py b/backend/usecase_demo.py index 75f4d61..9d16e5b 100644 --- a/backend/usecase_demo.py +++ b/backend/usecase_demo.py @@ -23,7 +23,7 @@ from agents import AgentRequest, agent_service USECASE_DEMO_AGENT_TIMEOUT_SECONDS = float( - os.getenv("USECASE_DEMO_AGENT_TIMEOUT_SECONDS", "120") + os.getenv("USECASE_DEMO_AGENT_TIMEOUT_SECONDS", "300") ) @@ -191,14 +191,13 @@ async def _execute_run(self, run_id: str) -> None: # Enforce a predictable output block for table rendering. structured_prompt = ( f"{run.prompt}\n\n" - "Zusatzformat:\n" - "- Gib zuerst eine kurze Zusammenfassung.\n" - "- Füge danach einen JSON-Codeblock mit `rows` hinzu.\n" - "- JSON-Schema:\n" - " {\"rows\": [{\"menu_point\": \"...\", \"project_name\": \"...\", " - "\"summary\": \"...\", \"agent_prompt\": \"...\", \"ticket_ids\": \"...\", " - "\"csv_evidence\": \"...\"}]}\n" - "- Falls keine sinnvollen Zeilen existieren, gib `{\"rows\": []}` zurück." + "Antwortformat:\n" + "- Führe die Anfrage mit möglichst wenigen Tool-Aufrufen aus.\n" + "- Nutze kompakte fields und sinnvolle limits.\n" + "- Fordere notes/resolution nur bei explizitem Bedarf an.\n" + "- Gib einen JSON-Codeblock mit {\"rows\": [...]} zurück.\n" + "- Falls keine sinnvollen Zeilen existieren, gib {\"rows\": []} zurück.\n" + "- Optional danach: kurze Zusammenfassung in 2-4 Stichpunkten." ) try: diff --git a/frontend/src/features/csvtickets/CSVTicketTable.jsx b/frontend/src/features/csvtickets/CSVTicketTable.jsx index 03990d6..a14fd8e 100644 --- a/frontend/src/features/csvtickets/CSVTicketTable.jsx +++ b/frontend/src/features/csvtickets/CSVTicketTable.jsx @@ -10,24 +10,24 @@ */ import { - Badge, - Button, - Caption1, - Card, - CardHeader, - Dropdown, - makeStyles, - Option, - Spinner, - Subtitle1, - Text, - tokens, + Badge, + Button, + Caption1, + Card, + CardHeader, + Dropdown, + makeStyles, + Option, + Spinner, + Subtitle1, + Text, + tokens, } from '@fluentui/react-components' import { - ArrowDown24Regular, - ArrowSync24Regular, - ArrowUp24Regular, - Filter24Regular, + ArrowDown24Regular, + ArrowSync24Regular, + ArrowUp24Regular, + Filter24Regular, } from '@fluentui/react-icons' import { useCallback, useEffect, useMemo, useState } from 'react' import { getCSVTicketFields, getCSVTickets, getCSVTicketStats } from '../../services/api' @@ -219,7 +219,7 @@ export default function CSVTicketTable() { // Selected columns const [selectedFields, setSelectedFields] = useState([ - 'summary', 'status', 'priority', 'assignee', 'assigned_group', + 'incident_id', 'summary', 'status', 'priority', 'assignee', 'assigned_group', 'requester_name', 'city', 'created_at' ]) @@ -313,6 +313,12 @@ export default function CSVTicketTable() { const renderCell = (ticket, fieldName) => { const value = ticket[fieldName] + if (fieldName === 'incident_id') { + return value ? ( + {value} + ) : '—' + } + if (fieldName === 'status') { return getStatusBadge(value) } diff --git a/frontend/src/features/tickets/TicketList.jsx b/frontend/src/features/tickets/TicketList.jsx index d6a5174..4593557 100644 --- a/frontend/src/features/tickets/TicketList.jsx +++ b/frontend/src/features/tickets/TicketList.jsx @@ -11,39 +11,39 @@ */ import { - Badge, - DataGrid, - DataGridBody, - DataGridCell, - DataGridHeader, - DataGridHeaderCell, - DataGridRow, - Field, - Input, - MessageBar, - MessageBarBody, - Select, - Spinner, - Tab, - TabList, - TableCellLayout, - Text, - createTableColumn, - makeStyles, - tokens + Badge, + DataGrid, + DataGridBody, + DataGridCell, + DataGridHeader, + DataGridHeaderCell, + DataGridRow, + Field, + Input, + MessageBar, + MessageBarBody, + Select, + Spinner, + Tab, + TabList, + TableCellLayout, + Text, + createTableColumn, + makeStyles, + tokens } from '@fluentui/react-components' import { - ArrowClockwise20Regular, - Building20Regular, - Calendar20Regular, - Clock20Regular, - Document20Regular, - DocumentBulletList20Regular, - Info20Regular, - Location20Regular, - Person20Regular, - Search20Regular, - Tag20Regular + ArrowClockwise20Regular, + Building20Regular, + Calendar20Regular, + Clock20Regular, + Document20Regular, + DocumentBulletList20Regular, + Info20Regular, + Location20Regular, + Person20Regular, + Search20Regular, + Tag20Regular } from '@fluentui/react-icons' import { useEffect, useState } from 'react' @@ -291,6 +291,7 @@ function filterTickets(tickets, searchTerm, priorityFilter, statusFilter) { const term = searchTerm.toLowerCase() filtered = filtered.filter( (ticket) => + ticket.incident_id?.toLowerCase().includes(term) || ticket.id?.toLowerCase().includes(term) || ticket.summary?.toLowerCase().includes(term) || ticket.requester_name?.toLowerCase().includes(term) || @@ -333,6 +334,18 @@ export default function TicketList() { // Columns for DataGrid const columns = [ + createTableColumn({ + columnId: 'incident_id', + compare: (a, b) => (a.incident_id || '').localeCompare(b.incident_id || ''), + renderHeaderCell: () => 'Incident ID', + renderCell: (item) => ( + + + {item.incident_id || '—'} + + + ), + }), createTableColumn({ columnId: 'summary', compare: (a, b) => (a.summary || '').localeCompare(b.summary || ''), @@ -595,6 +608,11 @@ export default function TicketList() { {/* Detail Header */}
{detail.summary} + {detail.incident_id && ( + + {detail.incident_id} + + )}
{detail.status?.replace('_', ' ')} diff --git a/frontend/src/features/tickets/TicketsWithoutAnAssignee.jsx b/frontend/src/features/tickets/TicketsWithoutAnAssignee.jsx index 5605219..835ecf6 100644 --- a/frontend/src/features/tickets/TicketsWithoutAnAssignee.jsx +++ b/frontend/src/features/tickets/TicketsWithoutAnAssignee.jsx @@ -11,32 +11,32 @@ */ import { - Badge, - Button, - DataGrid, - DataGridBody, - DataGridCell, - DataGridHeader, - DataGridHeaderCell, - DataGridRow, - Field, - Input, - MessageBar, - MessageBarBody, - Select, - Spinner, - TableCellLayout, - Text, - createTableColumn, - makeStyles, - tokens + Badge, + Button, + DataGrid, + DataGridBody, + DataGridCell, + DataGridHeader, + DataGridHeaderCell, + DataGridRow, + Field, + Input, + MessageBar, + MessageBarBody, + Select, + Spinner, + TableCellLayout, + Text, + createTableColumn, + makeStyles, + tokens } from '@fluentui/react-components' import { - AlertUrgent20Regular, - Checkmark24Regular, - PlayCircle24Regular, - Search20Regular, - Warning24Regular, + AlertUrgent20Regular, + Checkmark24Regular, + PlayCircle24Regular, + Search20Regular, + Warning24Regular, } from '@fluentui/react-icons' import { useState } from 'react' import { getQATickets } from '../../services/api' @@ -157,7 +157,7 @@ function filterTickets(tickets, searchTerm, priorityFilter) { const term = searchTerm.toLowerCase() filtered = filtered.filter( (ticket) => - ticket.id.toLowerCase().includes(term) || + (ticket.incident_id || ticket.id).toLowerCase().includes(term) || ticket.title.toLowerCase().includes(term) || ticket.description.toLowerCase().includes(term) ) @@ -194,12 +194,14 @@ export default function TicketsWithoutAnAssignee() { // Columns for DataGrid const columns = [ createTableColumn({ - columnId: 'id', - compare: (a, b) => a.id.localeCompare(b.id), - renderHeaderCell: () => 'ID', + columnId: 'incident_id', + compare: (a, b) => (a.incident_id || a.id).localeCompare(b.incident_id || b.id), + renderHeaderCell: () => 'Incident ID', renderCell: (item) => ( - {item.id} + + {item.incident_id || item.id} + ), }), @@ -246,7 +248,7 @@ export default function TicketsWithoutAnAssignee() { const handleReminder = () => { if (selectedTicket) { - setReminderMessage(`Erinnerung für Ticket ${selectedTicket.id} wurde gesendet.`) + setReminderMessage(`Erinnerung für Ticket ${selectedTicket.incident_id || selectedTicket.id} wurde gesendet.`) // TODO: Backend integration - send reminder API call } } @@ -268,7 +270,7 @@ export default function TicketsWithoutAnAssignee() { const handleMarkAsGood = () => { if (selectedTicket) { setTicketDecisions(prev => ({ ...prev, [selectedTicket.id]: 'GOOD' })) - setReminderMessage(`Ticket ${selectedTicket.id} als GOOD markiert.`) + setReminderMessage(`Ticket ${selectedTicket.incident_id || selectedTicket.id} als GOOD markiert.`) // TODO: Backend integration - update ticket status } } @@ -276,7 +278,7 @@ export default function TicketsWithoutAnAssignee() { const handleMarkAsEscalate = () => { if (selectedTicket) { setTicketDecisions(prev => ({ ...prev, [selectedTicket.id]: 'ESCALATE' })) - setReminderMessage(`Ticket ${selectedTicket.id} zur Eskalation markiert.`) + setReminderMessage(`Ticket ${selectedTicket.incident_id || selectedTicket.id} zur Eskalation markiert.`) // TODO: Backend integration - escalate ticket } } @@ -387,8 +389,10 @@ export default function TicketsWithoutAnAssignee() {
- Ticket ID - {selectedTicket.id} + Incident ID + + {selectedTicket.incident_id || selectedTicket.id} +
diff --git a/frontend/src/features/usecase-demo/UsecaseDemoPage.jsx b/frontend/src/features/usecase-demo/UsecaseDemoPage.jsx index 8d378c9..02be053 100644 --- a/frontend/src/features/usecase-demo/UsecaseDemoPage.jsx +++ b/frontend/src/features/usecase-demo/UsecaseDemoPage.jsx @@ -1,34 +1,34 @@ import { - Badge, - Button, - Card, - CardHeader, - Field, - Spinner, - Subtitle1, - Text, - Textarea, - makeStyles, - tokens, + Badge, + Button, + Card, + CardHeader, + Field, + Spinner, + Subtitle1, + Text, + Textarea, + makeStyles, + tokens, } from '@fluentui/react-components' import { - ArrowSync24Regular, - Bot24Regular, - Play24Regular, + ArrowSync24Regular, + Bot24Regular, + Play24Regular, } from '@fluentui/react-icons' import { useCallback, useEffect, useMemo, useState } from 'react' import { - createUsecaseDemoAgentRun, - getCSVTicket, - getUsecaseDemoAgentRun, - listUsecaseDemoAgentRuns, + createUsecaseDemoAgentRun, + getCSVTicket, + getUsecaseDemoAgentRun, + listUsecaseDemoAgentRuns, } from '../../services/api' import { RESULT_VIEW_REGISTRY } from './resultViews' import { - extractTicketIdsFromRows, - formatDateTime, - sanitizeMarkdownForDisplay, - upsertRun, + extractTicketIdsFromRows, + formatDateTime, + sanitizeMarkdownForDisplay, + upsertRun, } from './usecaseDemoUtils' const STATUS_COLORS = { @@ -427,7 +427,7 @@ export default function UsecaseDemoPage({ definition }) { {config.description && ( {config.description} )} - {config.render({ run: currentRun, markdown: visibleResultMarkdown, styles })} + {config.render({ run: currentRun, markdown: visibleResultMarkdown, styles, matchingTickets, isLoadingTickets })}
)) )} diff --git a/frontend/src/features/usecase-demo/demoDefinitions.js b/frontend/src/features/usecase-demo/demoDefinitions.js index c5b6d5d..d6dac54 100644 --- a/frontend/src/features/usecase-demo/demoDefinitions.js +++ b/frontend/src/features/usecase-demo/demoDefinitions.js @@ -1,8 +1,28 @@ -const VPN_DEFAULT_PROMPT = `Find VPN issues where you think it's more a skill issue than a technical issue.` +const VPN_DEFAULT_PROMPT = `Find VPN issues where you think it's more a skill issue than a technical issue. + +For speed: +- First call csv_search_tickets with fields="id,summary,status,priority,assignee,assigned_group,created_at" and limit=20. +- Use csv_get_ticket only for the top 3-5 most relevant IDs when deeper context is required. +- Stop after the first sufficient result set; avoid extra tool loops. +- Do not request notes or resolution by default; only request them when explicitly needed for evidence.`; const OPS_DEFAULT_PROMPT = `Analysiere Tickets zu "Outlook" oder "E-Mail" und erstelle einen einzigen Operations-Usecase. +Für schnelle Ausführung: +- Nutze zuerst csv_search_tickets mit fields="id,summary,status,priority,assigned_group,created_at" und limit=20. +- Nutze csv_get_ticket nur für wenige ausgewählte Ticket-IDs, wenn Details nötig sind. +- Beende nach dem ersten ausreichenden Datensatz und vermeide zusätzliche Tool-Schleifen. +- Fordere notes/resolution nicht standardmäßig an, nur wenn sie für die Aussage zwingend notwendig sind. Liefere nur eine kurze, handlungsorientierte Zusammenfassung mit Prioritäten und nächstem Schritt. -Nutze ausschließlich CSV-Daten und nenne die verwendeten Ticket-IDs in Fließtext.` +Nutze ausschließlich CSV-Daten und nenne die verwendeten Ticket-IDs in Fließtext.`; + +const SLA_BREACH_DEFAULT_PROMPT = `Call csv_sla_breach_tickets with default parameters (unassigned_only=true, include_ok=false). + +Using the returned report, write ONLY a short markdown summary (max 200 words): +1. State the reference_timestamp used +2. Group ticket counts by breach_status and assigned_group +3. Recommend actions for the most critical breaches + +Do NOT output a JSON block — the frontend fetches and renders the ticket table directly from the API.`; /** * Add new demos here to create additional pages without duplicating UI logic. @@ -10,74 +30,118 @@ Nutze ausschließlich CSV-Daten und nenne die verwendeten Ticket-IDs in Fließte */ export const USECASE_DEMO_DEFINITIONS = [ { - id: 'usecase-demo-1', - route: '/usecase_demo_1', - tabValue: 'usecase-demo', - tabLabel: 'Usecase Demo', - tabTestId: 'tab-usecase-demo', - testIdPrefix: 'usecase-demo', - title: 'Usecase Demo Description', - menuPointBadge: '1 menu point', + id: "usecase-demo-1", + route: "/usecase_demo_1", + tabValue: "usecase-demo", + tabLabel: "Usecase Demo", + tabTestId: "tab-usecase-demo", + testIdPrefix: "usecase-demo", + title: "Usecase Demo Description", + menuPointBadge: "1 menu point", pageDescription: - 'This page documents one usecase demo menu point: summary first, then editable agent prompt, then background execution and results.', - promptLabel: 'Agent Prompt', - promptDescription: 'Edit the prompt, then start the agent run in the background.', + "This page documents one usecase demo menu point: summary first, then editable agent prompt, then background execution and results.", + promptLabel: "Agent Prompt", + promptDescription: + "Edit the prompt, then start the agent run in the background.", defaultPrompt: VPN_DEFAULT_PROMPT, runHistoryLimit: 25, pollIntervalMs: 2000, - resultViews: ['table', 'markdown'], - resultSectionTitle: 'Agent Results', + resultViews: ["table", "markdown"], + resultSectionTitle: "Agent Results", resultSectionDescription: - 'Configured result views are rendered below based on this demo definition.', - ticketIdFields: ['ticket_ids', 'ticket_id', 'ticketIds'], + "Configured result views are rendered below based on this demo definition.", + ticketIdFields: ["ticket_ids", "ticket_id", "ticketIds"], matchingTickets: { enabled: true, - title: 'Matching Tickets', + title: "Matching Tickets", description: - 'Ticket IDs from the agent result are resolved against CSV data. Click a ticket to inspect details.', + "Ticket IDs from the agent result are resolved against CSV data. Click a ticket to inspect details.", fields: [ - 'id', - 'summary', - 'status', - 'priority', - 'assignee', - 'assigned_group', - 'requester_name', - 'city', - 'service', - 'description', - 'notes', - 'resolution', - 'created_at', - 'updated_at', + "id", + "summary", + "status", + "priority", + "assignee", + "assigned_group", + "requester_name", + "city", + "service", + "description", + "notes", + "resolution", + "created_at", + "updated_at", ], }, }, { - id: 'usecase-demo-ops', - route: '/usecase_demo_ops', - tabValue: 'usecase-demo-ops', - tabLabel: 'Ops Demo', - tabTestId: 'tab-usecase-demo-ops', - testIdPrefix: 'ops-demo', - title: 'Operations Usecase Demo', - menuPointBadge: '1 menu point', + id: "usecase-demo-ops", + route: "/usecase_demo_ops", + tabValue: "usecase-demo-ops", + tabLabel: "Ops Demo", + tabTestId: "tab-usecase-demo-ops", + testIdPrefix: "ops-demo", + title: "Operations Usecase Demo", + menuPointBadge: "1 menu point", pageDescription: - 'This demo focuses on operational triage outcomes with a concise narrative output.', - promptLabel: 'Operations Prompt', - promptDescription: 'Adjust the ops prompt and run the agent in background.', + "This demo focuses on operational triage outcomes with a concise narrative output.", + promptLabel: "Operations Prompt", + promptDescription: "Adjust the ops prompt and run the agent in background.", defaultPrompt: OPS_DEFAULT_PROMPT, runHistoryLimit: 25, pollIntervalMs: 2000, - resultViews: ['markdown'], - resultSectionTitle: 'Operations Result', + resultViews: ["markdown"], + resultSectionTitle: "Operations Result", resultSectionDescription: - 'This demo is configured for concise narrative output only.', - ticketIdFields: ['ticket_ids', 'ticket_id', 'ticketIds'], + "This demo is configured for concise narrative output only.", + ticketIdFields: ["ticket_ids", "ticket_id", "ticketIds"], matchingTickets: { enabled: false, }, }, -] + { + id: "usecase-demo-sla-breach", + route: "/usecase_demo_sla_breach", + tabValue: "usecase-demo-sla-breach", + tabLabel: "SLA Breach Risk", + tabTestId: "tab-usecase-demo-sla-breach", + testIdPrefix: "sla-breach", + title: "SLA Breach Risk", + menuPointBadge: "1 menu point", + pageDescription: + "Identifies tickets assigned to a support group but with no individual assignee, that are approaching or have exceeded priority-based SLA thresholds. Helps teams prioritize pickup before service level agreements are breached.", + promptLabel: "SLA Breach Prompt", + promptDescription: + "Edit thresholds or filters in the prompt, then run the agent to scan for at-risk tickets.", + defaultPrompt: SLA_BREACH_DEFAULT_PROMPT, + runHistoryLimit: 25, + pollIntervalMs: 2000, + resultViews: ["sla-breach", "markdown"], + resultSectionTitle: "SLA Breach Results", + resultSectionDescription: + "Tickets at risk or already past their SLA threshold, sorted by severity.", + ticketIdFields: ["ticket_ids", "ticket_id", "ticketIds"], + // Tickets are shown inline in the sla-breach result view; disable the separate card. + matchingTickets: { + enabled: true, + title: "Affected Tickets (Group-Assigned, No Individual Assignee)", + description: + 'These tickets are routed to a support group but no individual has picked them up. The "Assigned Group" column shows the responsible team; "Assignee" is empty for all.', + fields: [ + "id", + "summary", + "assigned_group", + "assignee", + "status", + "priority", + "urgency", + "reported_date", + "last_modified_date", + "requester_name", + "service", + ], + }, + }, +]; -export const DEFAULT_USECASE_DEMO_DEFINITION = USECASE_DEMO_DEFINITIONS[0] +export const DEFAULT_USECASE_DEMO_DEFINITION = USECASE_DEMO_DEFINITIONS[0]; diff --git a/frontend/src/features/usecase-demo/resultViews.jsx b/frontend/src/features/usecase-demo/resultViews.jsx index 6f4c7f3..eedd0bf 100644 --- a/frontend/src/features/usecase-demo/resultViews.jsx +++ b/frontend/src/features/usecase-demo/resultViews.jsx @@ -1,6 +1,26 @@ -import { Text } from '@fluentui/react-components' +import { + Badge, + Button, + Checkbox, + Spinner, + Text, + ToolbarButton, + Tooltip, + makeStyles, + tokens +} from '@fluentui/react-components' +import { + ArrowUp24Regular, + CheckmarkCircle24Regular, + DismissCircle24Regular, + Mail24Regular, + SelectAllOn24Regular, + Warning24Regular, +} from '@fluentui/react-icons' +import { useCallback, useEffect, useMemo, useState } from 'react' import ReactMarkdown from 'react-markdown' import remarkGfm from 'remark-gfm' +import { getSlaBreach } from '../../services/api' function ResultTableView({ run, styles }) { const hasRows = Boolean(run?.result_rows?.length && run?.result_columns?.length) @@ -48,6 +68,314 @@ function ResultMarkdownView({ markdown, styles }) { ) } +const BREACH_COLORS = { + breached: { bg: '#fde7e9', border: '#d13438', text: '#a80000', label: 'Breached', icon: DismissCircle24Regular }, + at_risk: { bg: '#fff4ce', border: '#f7630c', text: '#bc4b00', label: 'At Risk', icon: Warning24Regular }, + ok: { bg: '#dff6dd', border: '#107c10', text: '#0b6a0b', label: 'OK', icon: CheckmarkCircle24Regular }, + unknown: { bg: '#f5f5f5', border: '#8a8886', text: '#605e5c', label: '—', icon: null }, +} + +const PRIORITY_COLORS = { + critical: { bg: '#fde7e9', color: '#a80000' }, + high: { bg: '#fff4ce', color: '#bc4b00' }, + medium: { bg: '#e8f4fd', color: '#0063b1' }, + low: { bg: '#f5f5f5', color: '#605e5c' }, +} + +function formatDate(iso) { + if (!iso) return '—' + const d = new Date(iso) + if (isNaN(d)) return iso + return d.toLocaleString(undefined, { month: 'short', day: 'numeric', hour: '2-digit', minute: '2-digit' }) +} + +function AgeBar({ ageHours, thresholdHours }) { + const pct = Math.min((ageHours / thresholdHours) * 100, 100) + const color = pct >= 100 ? '#d13438' : pct >= 75 ? '#f7630c' : '#107c10' + return ( +
+ {ageHours}h / {thresholdHours}h +
+
+
+
+ ) +} + +const useSlaStyles = makeStyles({ + wrapper: { + display: 'grid', + gap: tokens.spacingVerticalM, + }, + statsRow: { + display: 'grid', + gridTemplateColumns: 'repeat(auto-fit, minmax(140px, 1fr))', + gap: tokens.spacingHorizontalM, + }, + statCard: { + display: 'flex', + flexDirection: 'column', + alignItems: 'center', + justifyContent: 'center', + padding: `${tokens.spacingVerticalM} ${tokens.spacingHorizontalM}`, + borderRadius: tokens.borderRadiusMedium, + border: `1px solid ${tokens.colorNeutralStroke2}`, + gap: tokens.spacingVerticalXS, + textAlign: 'center', + }, + statNum: { + fontSize: '2rem', + fontWeight: tokens.fontWeightSemibold, + lineHeight: 1, + }, + refTime: { + color: tokens.colorNeutralForeground3, + fontFamily: tokens.fontFamilyMonospace, + }, + actionBar: { + display: 'flex', + gap: tokens.spacingHorizontalM, + alignItems: 'center', + padding: `${tokens.spacingVerticalS} ${tokens.spacingHorizontalM}`, + backgroundColor: tokens.colorNeutralBackground3, + borderRadius: tokens.borderRadiusMedium, + flexWrap: 'wrap', + }, + selectionInfo: { + marginLeft: 'auto', + color: tokens.colorNeutralForeground3, + }, + tableWrap: { + overflowX: 'auto', + border: `1px solid ${tokens.colorNeutralStroke2}`, + borderRadius: tokens.borderRadiusMedium, + }, + table: { + width: '100%', + borderCollapse: 'collapse', + }, + th: { + textAlign: 'left', + padding: `${tokens.spacingVerticalS} ${tokens.spacingHorizontalM}`, + borderBottom: `2px solid ${tokens.colorNeutralStroke2}`, + backgroundColor: tokens.colorNeutralBackground3, + whiteSpace: 'nowrap', + fontSize: tokens.fontSizeBase200, + fontWeight: tokens.fontWeightSemibold, + color: tokens.colorNeutralForeground2, + textTransform: 'uppercase', + letterSpacing: '0.04em', + }, + td: { + padding: `${tokens.spacingVerticalS} ${tokens.spacingHorizontalM}`, + borderBottom: `1px solid ${tokens.colorNeutralStroke1}`, + verticalAlign: 'middle', + }, + groupHeader: { + padding: `${tokens.spacingVerticalXS} ${tokens.spacingHorizontalM}`, + fontWeight: tokens.fontWeightSemibold, + fontSize: tokens.fontSizeBase200, + textTransform: 'uppercase', + letterSpacing: '0.06em', + }, +}) + +const SLA_COLUMNS = [ + { key: 'ticket_id', label: 'Ticket' }, + { key: 'priority', label: 'Priority' }, + { key: 'urgency', label: 'Urgency' }, + { key: 'assigned_group', label: 'Assigned Group' }, + { key: 'reported_date', label: 'Reported' }, + { key: 'age_progress', label: 'Age vs SLA' }, +] + +function SlaBreachResultView() { + const slaStyles = useSlaStyles() + const [selected, setSelected] = useState(new Set()) + const [report, setReport] = useState(null) + const [loading, setLoading] = useState(true) + const [fetchError, setFetchError] = useState(null) + + useEffect(() => { + let cancelled = false + setLoading(true) + getSlaBreach() + .then((data) => { if (!cancelled) { setReport(data); setLoading(false) } }) + .catch((err) => { if (!cancelled) { setFetchError(err.message); setLoading(false) } }) + return () => { cancelled = true } + }, []) + + const rows = report?.tickets ?? [] + // server already returns rows sorted: breached → at_risk, age_hours desc + + const allIds = useMemo(() => rows.map((r) => r.ticket_id), [rows]) + const allSelected = rows.length > 0 && selected.size === rows.length + + const toggleAll = useCallback(() => { + setSelected((prev) => prev.size === allIds.length ? new Set() : new Set(allIds)) + }, [allIds]) + + const toggleOne = useCallback((id) => { + setSelected((prev) => { + const next = new Set(prev) + next.has(id) ? next.delete(id) : next.add(id) + return next + }) + }, []) + + if (loading) return + if (fetchError) return Failed to load: {fetchError} + if (!rows.length) return No at-risk or breached tickets found. + + const selectedCount = selected.size + const breachedCount = report?.total_breached ?? 0 + const atRiskCount = report?.total_at_risk ?? 0 + const totalCount = rows.length + + // Build row groups for divider rendering + const groups = [] + let lastStatus = null + for (const row of rows) { + if (row.breach_status !== lastStatus) { + groups.push({ type: 'header', status: row.breach_status }) + lastStatus = row.breach_status + } + groups.push({ type: 'row', row }) + } + + return ( +
+ {/* Stats cards */} +
+
+ + {breachedCount} + Breached +
+
+ + {atRiskCount} + At Risk +
+
+ Total monitored + {totalCount} + ref: {formatDate(report?.reference_timestamp)} +
+
+ + {/* Action bar */} +
+ + + } + disabled={selectedCount === 0} + onClick={() => alert(`Send Reminder for ${selectedCount} ticket(s):\n${[...selected].join(', ')}`)} + > + Send Reminder + + + + } + disabled={selectedCount === 0} + onClick={() => alert(`Escalate ${selectedCount} ticket(s):\n${[...selected].join(', ')}`)} + > + Escalate + + + {selectedCount > 0 && ( + + {selectedCount} of {rows.length} selected + + )} +
+ + {/* Table */} +
+ + + + + + {SLA_COLUMNS.map(({ key, label }) => ( + + ))} + + + + {groups.map((item, i) => { + if (item.type === 'header') { + const colors = BREACH_COLORS[item.status] || BREACH_COLORS.unknown + const Icon = colors.icon + return ( + + + + ) + } + + const { row } = item + const colors = BREACH_COLORS[row.breach_status] || BREACH_COLORS.unknown + const isChecked = selected.has(row.ticket_id) + const priorityStyle = PRIORITY_COLORS[row.priority] || {} + + return ( + toggleOne(row.ticket_id)} + onKeyDown={(e) => e.key === ' ' && toggleOne(row.ticket_id)} + style={{ borderLeft: `4px solid ${colors.border}`, cursor: 'pointer', backgroundColor: isChecked ? colors.bg : undefined }} + > + + + {SLA_COLUMNS.map(({ key }) => ( + + ))} + + ) + })} + +
+ 0 ? 'mixed' : false} + onChange={toggleAll} + /> + Status{label}
+ + {Icon && } + {colors.label} + +
e.stopPropagation()}> + toggleOne(row.ticket_id)} /> + + + {colors.label} + + + {key === 'priority' ? ( + + {row.priority} + + ) : key === 'reported_date' ? ( + formatDate(row.reported_date) + ) : key === 'age_progress' ? ( + + ) : ( + String(row[key] ?? '—') + )} +
+
+
+ ) +} + export const RESULT_VIEW_REGISTRY = { table: { title: 'Structured Table', @@ -59,4 +387,9 @@ export const RESULT_VIEW_REGISTRY = { description: 'Human-readable summary from the run output.', render: (props) => , }, + 'sla-breach': { + title: 'SLA Breach Overview', + description: 'Unassigned tickets color-coded by SLA status. Select tickets to send reminders or escalate.', + render: (props) => , + }, } diff --git a/frontend/src/services/api.js b/frontend/src/services/api.js index 5aa2b62..ba265fd 100644 --- a/frontend/src/services/api.js +++ b/frontend/src/services/api.js @@ -260,3 +260,21 @@ export async function getUsecaseDemoAgentRun(runId) { export async function listUsecaseDemoAgentRuns(limit = 20) { return fetchJSON(`${API_BASE_URL}/usecase-demo/agent-runs?limit=${limit}`); } + +/** + * Get pre-computed SLA breach report for unassigned tickets. + * @param {Object} options + * @param {boolean} [options.unassignedOnly=true] - Only unassigned tickets + * @param {boolean} [options.includeOk=false] - Include non-breached tickets + * @returns {Promise<{reference_timestamp: string, total_breached: number, total_at_risk: number, tickets: Array}>} + */ +export async function getSlaBreach({ + unassignedOnly = true, + includeOk = false, +} = {}) { + const params = new URLSearchParams({ + unassigned_only: unassignedOnly.toString(), + include_ok: includeOk.toString(), + }); + return fetchJSON(`${API_BASE_URL}/csv-tickets/sla-breach?${params}`); +} diff --git a/net.drawio b/net.drawio index 460568d..06d6cca 100644 --- a/net.drawio +++ b/net.drawio @@ -1,86 +1,103 @@ - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + + + + + + + + + + + + + + + + + + \ No newline at end of file diff --git a/tests/e2e/app.spec.js b/tests/e2e/app.spec.js index 6675464..972c59a 100644 --- a/tests/e2e/app.spec.js +++ b/tests/e2e/app.spec.js @@ -38,6 +38,13 @@ test.describe("App shell", () => { ); await expect(page.getByText("Operations Usecase Demo")).toBeVisible(); + await page.getByTestId("tab-usecase-demo-sla-breach").click(); + await expect(page.getByTestId("tab-usecase-demo-sla-breach")).toHaveAttribute( + "aria-selected", + "true" + ); + await expect(page.getByTestId("sla-breach-prompt")).toBeVisible(); + await page.getByTestId("tab-fields").click(); await expect(page.getByTestId("tab-fields")).toHaveAttribute( "aria-selected", @@ -79,6 +86,12 @@ test.describe("App shell", () => { "true" ); + await visit(page, "/usecase_demo_sla_breach"); + await expect(page.getByTestId("tab-usecase-demo-sla-breach")).toHaveAttribute( + "aria-selected", + "true" + ); + await visit(page, "/agent"); await expect(page.getByTestId("tab-agent")).toHaveAttribute( "aria-selected", @@ -271,6 +284,23 @@ test.describe("Ops usecase demo page", () => { }); }); +test.describe("SLA Breach Risk demo page", () => { + test("uses config-specific prompt and table+markdown views", async ({ page }) => { + await visit(page, "/usecase_demo_sla_breach"); + + const prompt = page.getByTestId("sla-breach-prompt"); + const startButton = page.getByTestId("sla-breach-start-agent"); + + await expect(prompt).toBeVisible(); + await expect(prompt).toContainText("assignee"); + await expect(startButton).toBeEnabled(); + + await expect(page.getByText("SLA Breach Results")).toBeVisible(); + await expect(page.getByText("No result available yet.")).toBeVisible(); + await expect(page.getByText("Group-Assigned, No Individual Assignee")).toBeVisible(); + }); +}); + test.describe("Agent page", () => { test("has input and send button state behavior", async ({ page }) => { await visit(page, "/agent"); From 8738e2cdb4840007c69e4d1a5201b9718f9482bb Mon Sep 17 00:00:00 2001 From: Andre Bossard Date: Wed, 18 Feb 2026 15:17:37 +0100 Subject: [PATCH 2/8] fix: update API proxy target from localhost to 127.0.0.1 in vite.config.js (#16) Co-authored-by: luca Spring --- frontend/vite.config.js | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/frontend/vite.config.js b/frontend/vite.config.js index ed9b525..a85e1a5 100644 --- a/frontend/vite.config.js +++ b/frontend/vite.config.js @@ -8,7 +8,7 @@ export default defineConfig({ port: 3001, proxy: { "/api": { - target: "http://localhost:5001", + target: "http://127.0.0.1:5001", changeOrigin: true, }, }, From f47e48d4117dd7ee58f92566b4920f2dbe30034d Mon Sep 17 00:00:00 2001 From: Andre Bossard Date: Wed, 25 Feb 2026 13:10:11 +0100 Subject: [PATCH 3/8] Agent fabric (#17) * feat: Implement Tool Registry and Workbench Integration - Added ToolRegistry class to manage LangChain StructuredTool instances. - Created workbench_integration.py to wire tools into the Agent Workbench. - Developed WorkbenchPage component for agent management in the frontend. - Implemented backend tests for tool registration and agent operations. - Added end-to-end tests for agent creation and deletion in the UI. Signed-off-by: Andre Bossard * feat: Refactor Agent Workbench to Agent Fabric and enhance tool metadata handling Signed-off-by: Andre Bossard * feat: Add required input handling to agent definitions and update UI components Signed-off-by: Andre Bossard * feat: Enhance Markdown output handling in agent workflow and update UI components Signed-off-by: Andre Bossard --------- Signed-off-by: Andre Bossard --- .gitignore | 1 + backend/agent_workbench/__init__.py | 50 ++ backend/agent_workbench/evaluator.py | 128 ++++ backend/agent_workbench/models.py | 303 ++++++++ backend/agent_workbench/service.py | 480 +++++++++++++ backend/agent_workbench/tool_registry.py | 69 ++ backend/app.py | 188 ++++- backend/operations.py | 172 +++++ backend/test_agents.py | 118 ---- backend/test_mcp_client.py | 140 ---- backend/tests/__init__.py | 0 backend/tests/conftest.py | 6 + backend/tests/test_agents.py | 20 + backend/{ => tests}/test_tickets.py | 143 ++-- backend/{ => tests}/test_usecase_demo.py | 15 +- .../tests/test_workbench_integration_e2e.py | 216 ++++++ backend/usecase_demo.py | 41 +- backend/workbench_integration.py | 63 ++ explain.drawio | 48 +- frontend/src/App.jsx | 4 + .../features/usecase-demo/demoDefinitions.js | 16 +- .../src/features/usecase-demo/resultViews.jsx | 35 +- .../src/features/workbench/WorkbenchPage.jsx | 650 ++++++++++++++++++ frontend/src/services/api.js | 42 ++ tests/e2e/workbench.spec.js | 166 +++++ 25 files changed, 2730 insertions(+), 384 deletions(-) create mode 100644 backend/agent_workbench/__init__.py create mode 100644 backend/agent_workbench/evaluator.py create mode 100644 backend/agent_workbench/models.py create mode 100644 backend/agent_workbench/service.py create mode 100644 backend/agent_workbench/tool_registry.py delete mode 100644 backend/test_agents.py delete mode 100644 backend/test_mcp_client.py create mode 100644 backend/tests/__init__.py create mode 100644 backend/tests/conftest.py create mode 100644 backend/tests/test_agents.py rename backend/{ => tests}/test_tickets.py (72%) rename backend/{ => tests}/test_usecase_demo.py (92%) create mode 100644 backend/tests/test_workbench_integration_e2e.py create mode 100644 backend/workbench_integration.py create mode 100644 frontend/src/features/workbench/WorkbenchPage.jsx create mode 100644 tests/e2e/workbench.spec.js diff --git a/.gitignore b/.gitignore index f68dbae..46795ca 100644 --- a/.gitignore +++ b/.gitignore @@ -51,3 +51,4 @@ Thumbs.db logs/ *.db csv/*.csv +screenshots/ diff --git a/backend/agent_workbench/__init__.py b/backend/agent_workbench/__init__.py new file mode 100644 index 0000000..80e33cb --- /dev/null +++ b/backend/agent_workbench/__init__.py @@ -0,0 +1,50 @@ +""" +Agent Workbench - Public API + +Import everything you need from this package: + + from agent_workbench import ( + WorkbenchService, + ToolRegistry, + AgentDefinitionCreate, + AgentRunCreate, + SuccessCriteria, + CriteriaType, + ) +""" + +from .evaluator import compute_score, evaluate_run +from .models import ( + AgentDefinition, + AgentDefinitionCreate, + AgentDefinitionUpdate, + AgentEvaluation, + AgentRun, + AgentRunCreate, + CriteriaResult, + CriteriaType, + RunStatus, + SuccessCriteria, +) +from .service import WorkbenchService +from .tool_registry import ToolRegistry + +__all__ = [ + # Service + "WorkbenchService", + "ToolRegistry", + # Models + "AgentDefinition", + "AgentDefinitionCreate", + "AgentDefinitionUpdate", + "AgentEvaluation", + "AgentRun", + "AgentRunCreate", + "CriteriaResult", + "CriteriaType", + "RunStatus", + "SuccessCriteria", + # Evaluator helpers (useful for tests) + "compute_score", + "evaluate_run", +] diff --git a/backend/agent_workbench/evaluator.py b/backend/agent_workbench/evaluator.py new file mode 100644 index 0000000..65e0ef2 --- /dev/null +++ b/backend/agent_workbench/evaluator.py @@ -0,0 +1,128 @@ +""" +Agent Workbench - Evaluator + +Applies SuccessCriteria to a completed AgentRun and produces CriteriaResult list. + +Supported criteria types: + no_error - run completed without an error + tool_called - a specific tool name appears in tools_used + output_contains - final output contains the substring (case-insensitive) + llm_judge - the LLM grades the output via a judge prompt (requires LLM configuration) +""" + +from typing import Any + +from .models import AgentRun, CriteriaResult, CriteriaType, SuccessCriteria + +# ============================================================================ +# ASYNC LLM JUDGE (I/O) +# ============================================================================ + +async def _eval_llm_judge( + run: AgentRun, + criteria: SuccessCriteria, + llm: Any, # ChatOpenAI or compatible +) -> CriteriaResult: + """ + Ask an LLM to evaluate whether the run output satisfies the judge prompt. + + The criteria.value is a judge prompt that is appended with the run's + output. The LLM must respond with PASS or FAIL (case-insensitive) as + the first word. + """ + if llm is None: + raise ValueError("llm_judge criteria require an LLM instance") + + judge_prompt = ( + f"{criteria.value}\n\n" + f"--- Agent Output ---\n{run.output or '(empty)'}\n\n" + "Reply with a single word: PASS or FAIL, followed by an optional explanation." + ) + + try: + from langchain_core.messages import HumanMessage + response = await llm.ainvoke([HumanMessage(content=judge_prompt)]) + answer = (response.content or "").strip() + passed = answer.upper().startswith("PASS") + return CriteriaResult( + criteria=criteria, + passed=passed, + detail=answer[:500], + ) + except Exception as exc: + return CriteriaResult( + criteria=criteria, + passed=False, + detail=f"LLM judge error: {exc}", + ) + + +# ============================================================================ +# PUBLIC EVALUATOR +# ============================================================================ + +async def evaluate_run( + run: AgentRun, + criteria_list: list[SuccessCriteria], + llm: Any = None, +) -> list[CriteriaResult]: + """ + Evaluate all criteria for a run. + + Args: + run: Completed AgentRun (status = completed | failed). + criteria_list: List of SuccessCriteria from the AgentDefinition. + llm: Optional LLM instance; required when criteria include llm_judge. + + Returns: + Ordered list of CriteriaResult, one per criterion. + """ + results: list[CriteriaResult] = [] + + for criteria in criteria_list: + if criteria.type == CriteriaType.NO_ERROR: + passed = run.error is None and run.status == "completed" + results.append( + CriteriaResult( + criteria=criteria, + passed=passed, + detail="" if passed else f"Run error: {run.error or 'unexpected status ' + run.status}", + ) + ) + elif criteria.type == CriteriaType.TOOL_CALLED: + tool_name = criteria.value.strip() + results.append( + CriteriaResult( + criteria=criteria, + passed=tool_name in run.tools_used, + detail=f"tools_used={run.tools_used}", + ) + ) + elif criteria.type == CriteriaType.OUTPUT_CONTAINS: + needle = criteria.value + haystack = (run.output or "").lower() + results.append( + CriteriaResult( + criteria=criteria, + passed=needle.lower() in haystack, + detail=f"searched for '{needle}' in output ({len(run.output or '')} chars)", + ) + ) + elif criteria.type == CriteriaType.LLM_JUDGE: + results.append(await _eval_llm_judge(run, criteria, llm)) + else: + results.append(CriteriaResult( + criteria=criteria, + passed=False, + detail=f"Unknown criteria type: {criteria.type}", + )) + + return results + + +def compute_score(results: list[CriteriaResult]) -> float: + """Returns the fraction of passed criteria (0.0 when no criteria).""" + if not results: + return 1.0 # vacuously true + passed = sum(1 for r in results if r.passed) + return round(passed / len(results), 4) diff --git a/backend/agent_workbench/models.py b/backend/agent_workbench/models.py new file mode 100644 index 0000000..dba67c3 --- /dev/null +++ b/backend/agent_workbench/models.py @@ -0,0 +1,303 @@ +""" +Agent Workbench - Data Models + +SQLModel definitions for AgentDefinition, AgentRun, and AgentEvaluation. +JSON columns store list / dict data as serialized strings in SQLite. +""" + +import json +import uuid +from datetime import datetime +from enum import Enum +from pathlib import Path +from typing import Any, Optional + +from pydantic import BaseModel, Field +from sqlmodel import Column, Field as SField, Session, SQLModel, String, create_engine, select, text + + +# ============================================================================ +# ENUMS & EMBEDDED PYDANTIC TYPES +# ============================================================================ + +class RunStatus(str, Enum): + PENDING = "pending" + RUNNING = "running" + COMPLETED = "completed" + FAILED = "failed" + + +class CriteriaType(str, Enum): + TOOL_CALLED = "tool_called" # agent called a specific tool during the run + OUTPUT_CONTAINS = "output_contains" # final output contains a substring + NO_ERROR = "no_error" # run completed without error + LLM_JUDGE = "llm_judge" # OpenAI grades the answer via a judge prompt + + +class SuccessCriteria(BaseModel): + """A single evaluatable success criterion for an agent run.""" + type: CriteriaType + value: str = Field(description="Tool name / substring / judge prompt depending on type") + description: str = Field(default="", description="Human-readable explanation") + + +class CriteriaResult(BaseModel): + """Outcome of applying one SuccessCriteria to a completed run.""" + criteria: SuccessCriteria + passed: bool + detail: str = "" + + +# ============================================================================ +# HELPER - deterministic UUID string +# ============================================================================ + +def _new_id() -> str: + return str(uuid.uuid4()) + + +# ============================================================================ +# TABLE MODELS +# ============================================================================ + +class AgentDefinition(SQLModel, table=True): + """Persisted agent blueprint: system prompt + tools + success criteria.""" + __tablename__ = "workbench_agent_definitions" + + id: str = SField(default_factory=_new_id, primary_key=True) + name: str = SField(index=True, description="Human-readable agent name") + description: str = SField(default="", description="Optional description") + system_prompt: str = SField(description="System prompt sent to the LLM") + requires_input: bool = SField( + default=False, + description="When true, runs must include required_input_value", + ) + required_input_description: str = SField( + default="", + description="Description shown to operators for required runtime input", + ) + # Stored as JSON arrays in TEXT columns + tool_names_json: str = SField( + default="[]", + description="JSON-serialized list of tool names available to this agent", + sa_column=Column(String, name="tool_names"), + ) + success_criteria_json: str = SField( + default="[]", + description="JSON-serialized list of SuccessCriteria dicts", + sa_column=Column(String, name="success_criteria"), + ) + created_at: datetime = SField(default_factory=datetime.now) + updated_at: datetime = SField(default_factory=datetime.now) + + # ------------------------------------------------------------------ + # Convenience properties (not persisted; computed from JSON columns) + # ------------------------------------------------------------------ + + @property + def tool_names(self) -> list[str]: + try: + return json.loads(self.tool_names_json) + except (json.JSONDecodeError, TypeError): + return [] + + @tool_names.setter + def tool_names(self, value: list[str]) -> None: + self.tool_names_json = json.dumps(value) + + @property + def success_criteria(self) -> list[SuccessCriteria]: + try: + raw = json.loads(self.success_criteria_json) + return [SuccessCriteria(**c) for c in raw] + except (json.JSONDecodeError, TypeError, Exception): + return [] + + @success_criteria.setter + def success_criteria(self, value: list[SuccessCriteria]) -> None: + self.success_criteria_json = json.dumps([c.model_dump() for c in value]) + + def to_dict(self) -> dict[str, Any]: + return { + "id": self.id, + "name": self.name, + "description": self.description, + "system_prompt": self.system_prompt, + "requires_input": self.requires_input, + "required_input_description": self.required_input_description, + "tool_names": self.tool_names, + "success_criteria": [c.model_dump() for c in self.success_criteria], + "created_at": self.created_at.isoformat(), + "updated_at": self.updated_at.isoformat(), + } + + +class AgentRun(SQLModel, table=True): + """One execution of an AgentDefinition against a user prompt.""" + __tablename__ = "workbench_agent_runs" + + id: str = SField(default_factory=_new_id, primary_key=True) + agent_id: str = SField(foreign_key="workbench_agent_definitions.id", index=True) + input_prompt: str + status: str = SField(default=RunStatus.PENDING.value) + output: Optional[str] = SField(default=None) + agent_snapshot_json: str = SField( + default="{}", + sa_column=Column(String, name="agent_snapshot"), + ) + tools_used_json: str = SField( + default="[]", + sa_column=Column(String, name="tools_used"), + ) + error: Optional[str] = SField(default=None) + created_at: datetime = SField(default_factory=datetime.now) + completed_at: Optional[datetime] = SField(default=None) + + @property + def agent_snapshot(self) -> dict[str, Any]: + try: + raw = json.loads(self.agent_snapshot_json) + if isinstance(raw, dict): + return raw + return {} + except (json.JSONDecodeError, TypeError): + return {} + + @agent_snapshot.setter + def agent_snapshot(self, value: dict[str, Any]) -> None: + self.agent_snapshot_json = json.dumps(value) + + @property + def tools_used(self) -> list[str]: + try: + return json.loads(self.tools_used_json) + except (json.JSONDecodeError, TypeError): + return [] + + @tools_used.setter + def tools_used(self, value: list[str]) -> None: + self.tools_used_json = json.dumps(value) + + def to_dict(self) -> dict[str, Any]: + return { + "id": self.id, + "agent_id": self.agent_id, + "input_prompt": self.input_prompt, + "status": self.status, + "output": self.output, + "agent_snapshot": self.agent_snapshot, + "tools_used": self.tools_used, + "error": self.error, + "created_at": self.created_at.isoformat(), + "completed_at": self.completed_at.isoformat() if self.completed_at else None, + } + + +class AgentEvaluation(SQLModel, table=True): + """Evaluation result for a completed AgentRun.""" + __tablename__ = "workbench_agent_evaluations" + + id: str = SField(default_factory=_new_id, primary_key=True) + run_id: str = SField(foreign_key="workbench_agent_runs.id", unique=True, index=True) + criteria_results_json: str = SField( + default="[]", + sa_column=Column(String, name="criteria_results"), + ) + overall_passed: bool = SField(default=False) + score: float = SField(default=0.0, description="Ratio of passed criteria (0.0–1.0)") + evaluated_at: datetime = SField(default_factory=datetime.now) + + @property + def criteria_results(self) -> list[CriteriaResult]: + try: + raw = json.loads(self.criteria_results_json) + return [CriteriaResult(**r) for r in raw] + except (json.JSONDecodeError, TypeError, Exception): + return [] + + @criteria_results.setter + def criteria_results(self, value: list[CriteriaResult]) -> None: + self.criteria_results_json = json.dumps([r.model_dump() for r in value]) + + def to_dict(self) -> dict[str, Any]: + return { + "id": self.id, + "run_id": self.run_id, + "criteria_results": [r.model_dump() for r in self.criteria_results], + "overall_passed": self.overall_passed, + "score": self.score, + "evaluated_at": self.evaluated_at.isoformat(), + } + + +# ============================================================================ +# REQUEST / RESPONSE MODELS (REST layer) +# ============================================================================ + +class AgentDefinitionCreate(BaseModel): + name: str = Field(..., min_length=1, max_length=200) + description: str = Field(default="") + system_prompt: str = Field(..., min_length=1) + requires_input: bool = Field(default=False) + required_input_description: str = Field(default="") + tool_names: list[str] = Field(default_factory=list) + success_criteria: list[SuccessCriteria] = Field(default_factory=list) + + +class AgentDefinitionUpdate(BaseModel): + name: Optional[str] = Field(default=None) + description: Optional[str] = Field(default=None) + system_prompt: Optional[str] = Field(default=None) + requires_input: Optional[bool] = Field(default=None) + required_input_description: Optional[str] = Field(default=None) + tool_names: Optional[list[str]] = Field(default=None) + success_criteria: Optional[list[SuccessCriteria]] = Field(default=None) + + +class AgentRunCreate(BaseModel): + input_prompt: str = Field(default="", max_length=10000) + required_input_value: Optional[str] = Field(default=None, max_length=2000) + + +# ============================================================================ +# DATABASE INITIALISATION +# ============================================================================ + +def build_engine(db_path: Path): + db_path.parent.mkdir(parents=True, exist_ok=True) + engine = create_engine(f"sqlite:///{db_path}", echo=False) + SQLModel.metadata.create_all(engine) + _run_migrations(engine) + return engine + + +def _run_migrations(engine) -> None: + """Apply lightweight SQLite migrations for new columns.""" + _ensure_column( + engine, + "workbench_agent_definitions", + "requires_input", + "BOOLEAN NOT NULL DEFAULT 0", + ) + _ensure_column( + engine, + "workbench_agent_definitions", + "required_input_description", + "TEXT NOT NULL DEFAULT ''", + ) + _ensure_column( + engine, + "workbench_agent_runs", + "agent_snapshot", + "TEXT NOT NULL DEFAULT '{}'", + ) + + +def _ensure_column(engine, table_name: str, column_name: str, column_ddl: str) -> None: + with Session(engine) as session: + rows = list(session.exec(text(f"PRAGMA table_info({table_name})")).all()) + columns = {row[1] for row in rows if len(row) > 1} + if column_name in columns: + return + session.exec(text(f"ALTER TABLE {table_name} ADD COLUMN {column_name} {column_ddl}")) + session.commit() diff --git a/backend/agent_workbench/service.py b/backend/agent_workbench/service.py new file mode 100644 index 0000000..4592b18 --- /dev/null +++ b/backend/agent_workbench/service.py @@ -0,0 +1,480 @@ +""" +Agent Workbench - Service + +Core business logic: create / run / evaluate agents. +State is persisted in SQLite via SQLModel. + +This module is independent: it imports only from standard library, +pydantic, sqlmodel, langchain, and the workbench's own sub-modules. +The host project injects tools and LLM configuration at startup. +""" + +import os +from datetime import datetime +from pathlib import Path +from typing import Any, Optional + +from sqlmodel import Session, select + +from .evaluator import compute_score +from .evaluator import evaluate_run as _evaluate_criteria +from .models import ( + AgentDefinition, + AgentDefinitionCreate, + AgentDefinitionUpdate, + AgentEvaluation, + AgentRun, + AgentRunCreate, + CriteriaResult, + CriteriaType, + RunStatus, + SuccessCriteria, + build_engine, +) +from .tool_registry import ToolRegistry + +# ============================================================================ +# LLM HELPER - isolated so it stays optional at import time +# ============================================================================ + +def _build_llm(model: str, api_key: str, base_url: str = "") -> Any: + from langchain_openai import ChatOpenAI + return ChatOpenAI( + model=model, + api_key=api_key, + base_url=base_url or None, + temperature=0.0, + ) + + +def _build_react_agent(llm: Any, tools: list[Any], system_prompt: str) -> Any: + from langgraph.prebuilt import create_react_agent + return create_react_agent(llm, tools, prompt=system_prompt) + + +def _append_markdown_output_instruction(system_prompt: str) -> str: + instruction = ( + "Format your final answer as GitHub-flavored Markdown. " + "Use headings, bullet lists, and tables when helpful. " + "Do not wrap the entire response in a code block." + ) + base_prompt = (system_prompt or "").strip() + if not base_prompt: + return instruction + return f"{base_prompt}\n\n{instruction}" + + +# ============================================================================ +# WORKBENCH SERVICE +# ============================================================================ + +class WorkbenchService: + """ + Manages the full lifecycle of agent definitions, runs, and evaluations. + + Designed as a deep module: + - Simple public API (create_agent, run_agent, evaluate_run) + - Internal complexity hidden (LangGraph wiring, DB sessions, JSON columns) + + Host project provides: + - tool_registry : populated ToolRegistry instance + - db_path : path to SQLite file (default: backend/data/workbench.db) + - openai_api_key : required for running agents + - openai_model : model name (default: gpt-4o-mini) + - openai_base_url: optional custom endpoint + """ + + def __init__( + self, + tool_registry: ToolRegistry, + db_path: Optional[Path] = None, + openai_api_key: str = "", + openai_model: str = "gpt-4o-mini", + openai_base_url: str = "", + recursion_limit: int = 10, + ) -> None: + self._registry = tool_registry + self._api_key = openai_api_key or os.getenv("OPENAI_API_KEY", "") + self._model = openai_model or os.getenv("OPENAI_MODEL", "gpt-4o-mini") + self._base_url = openai_base_url or os.getenv("OPENAI_BASE_URL", "") + self._recursion_limit = recursion_limit + self._db_path = db_path or ( + Path(__file__).resolve().parents[2] / "data" / "workbench.db" + ) + self._engine = build_engine(self._db_path) + + # LLM is lazy-initialised so the service can be instantiated without + # a valid API key (useful for listing agents / tools only). + self._llm: Any = None + + @property + def llm(self) -> Any: + if self._llm is None: + if not self._api_key: + raise ValueError( + "OPENAI_API_KEY is required to run agents. " + "Set it via environment variable or pass openai_api_key." + ) + self._llm = _build_llm(self._model, self._api_key, self._base_url) + return self._llm + + # ------------------------------------------------------------------ + # Tool introspection + # ------------------------------------------------------------------ + + def list_tools(self) -> list[dict[str, Any]]: + """Return metadata about all registered tools.""" + result: list[dict[str, Any]] = [] + for t in self._registry.available_tools(): + input_schema: dict[str, Any] = {"type": "object", "properties": {}} + args_schema = getattr(t, "args_schema", None) + if args_schema and hasattr(args_schema, "model_json_schema"): + try: + input_schema = args_schema.model_json_schema() + except Exception: + input_schema = {"type": "object", "properties": {}} + result.append({ + "name": t.name, + "description": (t.description or "")[:200], + "input_schema": input_schema, + }) + return result + + def _normalize_tool_names(self, names: list[str]) -> list[str]: + normalized: list[str] = [] + seen: set[str] = set() + for raw in names: + if not isinstance(raw, str): + continue + name = raw.strip() + if not name or name in seen: + continue + normalized.append(name) + seen.add(name) + return normalized + + def _validate_tool_names(self, names: list[str]) -> list[str]: + normalized = self._normalize_tool_names(names) + missing = [name for name in normalized if not self._registry.has(name)] + if missing: + raise ValueError( + "Unknown tool(s): " + + ", ".join(sorted(missing)) + + ". Use workbench_list_tools to inspect available tools." + ) + return normalized + + def _build_agent_snapshot(self, agent: AgentDefinition) -> dict[str, Any]: + return { + "id": agent.id, + "name": agent.name, + "description": agent.description, + "system_prompt": agent.system_prompt, + "requires_input": agent.requires_input, + "required_input_description": agent.required_input_description, + "tool_names": list(agent.tool_names), + "success_criteria": [criteria.model_dump() for criteria in agent.success_criteria], + "captured_at": datetime.now().isoformat(), + } + + def _normalize_input_contract( + self, + requires_input: bool, + required_input_description: str, + ) -> tuple[bool, str]: + normalized_description = (required_input_description or "").strip() + if requires_input and not normalized_description: + raise ValueError( + "required_input_description must be provided when requires_input is true" + ) + if not requires_input: + normalized_description = "" + return requires_input, normalized_description + + def _build_run_user_message( + self, + agent_def: AgentDefinition, + run_request: AgentRunCreate, + ) -> tuple[str, str]: + run_prompt = (run_request.input_prompt or "").strip() + required_input_value = (run_request.required_input_value or "").strip() + message_parts: list[str] = [] + + if run_prompt: + message_parts.append(run_prompt) + + if agent_def.requires_input: + if not required_input_value: + raise ValueError( + "Missing required_input_value for this agent. " + f"Expected: {agent_def.required_input_description}" + ) + message_parts.append( + f"Required input ({agent_def.required_input_description}): {required_input_value}" + ) + elif required_input_value: + message_parts.append(f"Additional input: {required_input_value}") + + if not message_parts: + message_parts.append("Proceed with the configured system instructions and tools.") + + return "\n\n".join(message_parts), required_input_value + + def _criteria_from_run_snapshot(self, run: AgentRun) -> list[SuccessCriteria]: + snapshot = run.agent_snapshot + raw = snapshot.get("success_criteria") + if not isinstance(raw, list): + return [] + parsed: list[SuccessCriteria] = [] + for item in raw: + if not isinstance(item, dict): + continue + try: + parsed.append(SuccessCriteria(**item)) + except Exception: + continue + return parsed + + # ------------------------------------------------------------------ + # Agent definitions CRUD + # ------------------------------------------------------------------ + + def create_agent(self, data: AgentDefinitionCreate) -> AgentDefinition: + validated_tool_names = self._validate_tool_names(data.tool_names) + requires_input, required_input_description = self._normalize_input_contract( + data.requires_input, + data.required_input_description, + ) + agent = AgentDefinition( + name=data.name, + description=data.description, + system_prompt=data.system_prompt, + requires_input=requires_input, + required_input_description=required_input_description, + ) + agent.tool_names = validated_tool_names + agent.success_criteria = data.success_criteria + with Session(self._engine) as session: + session.add(agent) + session.commit() + session.refresh(agent) + return agent + + def get_agent(self, agent_id: str) -> Optional[AgentDefinition]: + with Session(self._engine) as session: + return session.get(AgentDefinition, agent_id) + + def list_agents(self) -> list[AgentDefinition]: + with Session(self._engine) as session: + return list(session.exec(select(AgentDefinition)).all()) + + def update_agent( + self, agent_id: str, data: AgentDefinitionUpdate + ) -> Optional[AgentDefinition]: + with Session(self._engine) as session: + agent = session.get(AgentDefinition, agent_id) + if agent is None: + return None + if data.name is not None: + agent.name = data.name + if data.description is not None: + agent.description = data.description + if data.system_prompt is not None: + agent.system_prompt = data.system_prompt + next_requires_input = agent.requires_input if data.requires_input is None else data.requires_input + next_required_input_description = ( + agent.required_input_description + if data.required_input_description is None + else data.required_input_description + ) + ( + agent.requires_input, + agent.required_input_description, + ) = self._normalize_input_contract( + next_requires_input, + next_required_input_description, + ) + if data.tool_names is not None: + agent.tool_names = self._validate_tool_names(data.tool_names) + if data.success_criteria is not None: + agent.success_criteria = data.success_criteria + agent.updated_at = datetime.now() + session.add(agent) + session.commit() + session.refresh(agent) + return agent + + def delete_agent(self, agent_id: str) -> bool: + with Session(self._engine) as session: + agent = session.get(AgentDefinition, agent_id) + if agent is None: + return False + session.delete(agent) + session.commit() + return True + + # ------------------------------------------------------------------ + # Run management + # ------------------------------------------------------------------ + + def get_run(self, run_id: str) -> Optional[AgentRun]: + with Session(self._engine) as session: + return session.get(AgentRun, run_id) + + def list_runs(self, agent_id: Optional[str] = None, limit: int = 50) -> list[AgentRun]: + with Session(self._engine) as session: + stmt = select(AgentRun) + if agent_id: + stmt = stmt.where(AgentRun.agent_id == agent_id) + stmt = stmt.order_by(AgentRun.created_at.desc()).limit(limit) # type: ignore[attr-defined] + return list(session.exec(stmt).all()) + + # ------------------------------------------------------------------ + # Core: run an agent + # ------------------------------------------------------------------ + + async def run_agent( + self, + agent_id: str, + run_request: AgentRunCreate, + ) -> AgentRun: + """ + Execute an AgentDefinition against a user prompt using LangGraph ReAct. + + Steps: + 1. Load AgentDefinition + 2. Resolve tools from registry + 3. Build a fresh ReAct agent (stateless; each run is independent) + 4. Invoke the agent + 5. Persist & return AgentRun + """ + # -- Load definition -- + agent_def = self.get_agent(agent_id) + if agent_def is None: + raise ValueError(f"Agent '{agent_id}' not found") + + validated_tool_names = self._validate_tool_names(agent_def.tool_names) + agent_snapshot = self._build_agent_snapshot(agent_def) + user_message, normalized_required_input = self._build_run_user_message(agent_def, run_request) + normalized_prompt = (run_request.input_prompt or "").strip() + agent_snapshot["input_prompt"] = normalized_prompt + agent_snapshot["required_input_value"] = normalized_required_input + agent_snapshot["composed_user_message"] = user_message + + # -- Persist a PENDING run -- + run = AgentRun( + agent_id=agent_id, + input_prompt=normalized_prompt, + status=RunStatus.RUNNING.value, + ) + run.agent_snapshot = agent_snapshot + with Session(self._engine) as session: + session.add(run) + session.commit() + session.refresh(run) + + run_id = run.id + + # -- Execute -- + try: + tools = self._registry.resolve(validated_tool_names) + runtime_system_prompt = _append_markdown_output_instruction(agent_def.system_prompt) + react = _build_react_agent(self.llm, tools, runtime_system_prompt) + + result = await react.ainvoke( + {"messages": [("user", user_message)]}, + config={"recursion_limit": self._recursion_limit}, + ) + + final_msg = result["messages"][-1] + output = final_msg.content if hasattr(final_msg, "content") else str(final_msg) + + # Collect tool names that were used + tools_used: list[str] = [] + for msg in result["messages"]: + if hasattr(msg, "tool_calls") and msg.tool_calls: + for tc in msg.tool_calls: + name = tc.get("name", "") if isinstance(tc, dict) else getattr(tc, "name", "") + if name: + tools_used.append(name) + + # -- Persist completion -- + with Session(self._engine) as session: + db_run = session.get(AgentRun, run_id) + if db_run: + db_run.status = RunStatus.COMPLETED.value + db_run.output = output + db_run.tools_used = list(dict.fromkeys(tools_used)) # deduplicate, preserve order + db_run.completed_at = datetime.now() + session.add(db_run) + session.commit() + session.refresh(db_run) + return db_run + + except Exception as exc: + with Session(self._engine) as session: + db_run = session.get(AgentRun, run_id) + if db_run: + db_run.status = RunStatus.FAILED.value + db_run.error = str(exc) + db_run.completed_at = datetime.now() + session.add(db_run) + session.commit() + session.refresh(db_run) + return db_run + raise + + # Fallback (should not reach here) + return self.get_run(run_id) # type: ignore[return-value] + + # ------------------------------------------------------------------ + # Evaluation + # ------------------------------------------------------------------ + + def get_evaluation(self, run_id: str) -> Optional[AgentEvaluation]: + with Session(self._engine) as session: + stmt = select(AgentEvaluation).where(AgentEvaluation.run_id == run_id) + return session.exec(stmt).first() + + async def evaluate_run(self, run_id: str) -> AgentEvaluation: + """ + Evaluate a completed run against its agent's success criteria. + + Idempotent: re-evaluates and overwrites any existing evaluation. + """ + run = self.get_run(run_id) + if run is None: + raise ValueError(f"Run '{run_id}' not found") + if run.status not in (RunStatus.COMPLETED.value, RunStatus.FAILED.value): + raise ValueError(f"Run '{run_id}' has not completed yet (status={run.status})") + + criteria = self._criteria_from_run_snapshot(run) + if not criteria: + agent_def = self.get_agent(run.agent_id) + criteria = agent_def.success_criteria if agent_def else [] + + has_llm_judge = any(criteria_item.type == CriteriaType.LLM_JUDGE for criteria_item in criteria) + judge_llm = self.llm if has_llm_judge else self._llm + + results: list[CriteriaResult] = await _evaluate_criteria(run, criteria, llm=judge_llm) + score = compute_score(results) + overall = score == 1.0 + + # Upsert evaluation + with Session(self._engine) as session: + stmt = select(AgentEvaluation).where(AgentEvaluation.run_id == run_id) + existing = session.exec(stmt).first() + if existing: + evaluation = existing + else: + evaluation = AgentEvaluation(run_id=run_id) + session.add(evaluation) + + evaluation.criteria_results = results + evaluation.overall_passed = overall + evaluation.score = score + evaluation.evaluated_at = datetime.now() + session.commit() + session.refresh(evaluation) + + return evaluation diff --git a/backend/agent_workbench/tool_registry.py b/backend/agent_workbench/tool_registry.py new file mode 100644 index 0000000..6517972 --- /dev/null +++ b/backend/agent_workbench/tool_registry.py @@ -0,0 +1,69 @@ +""" +Agent Workbench - Tool Registry + +Decouples the workbench from the project's specific tool implementations. +The project injects tools at startup; the workbench resolves them by name. +""" + +from typing import Any + + +class ToolRegistry: + """ + Maps string names to LangChain StructuredTool instances. + + The workbench itself has no knowledge of where tools come from. + The host project registers tools at startup (dependency injection). + + Usage: + registry = ToolRegistry() + registry.register(my_structured_tool) # from a StructuredTool + registry.register_all(list_of_tools) # bulk + tools = registry.resolve(["csv_list_tickets", "csv_search_tickets"]) + """ + + def __init__(self) -> None: + self._tools: dict[str, Any] = {} + + # ------------------------------------------------------------------ + # Registration + # ------------------------------------------------------------------ + + def register(self, tool: Any) -> None: + """Register a single LangChain StructuredTool (requires .name attribute).""" + name = getattr(tool, "name", None) + if not name or not isinstance(name, str): + raise ValueError(f"Tool must have a string .name attribute, got: {tool!r}") + self._tools[name] = tool + + def register_all(self, tools: list[Any]) -> None: + """Bulk-register a list of LangChain StructuredTools.""" + for t in tools: + self.register(t) + + # ------------------------------------------------------------------ + # Resolution + # ------------------------------------------------------------------ + + def resolve(self, names: list[str]) -> list[Any]: + """ + Return StructuredTool instances for the requested names. + + Silently skips names that are not registered so that persisted + AgentDefinitions don't break when a tool is unregistered. + """ + return [self._tools[n] for n in names if n in self._tools] + + def available_names(self) -> list[str]: + """Sorted list of all registered tool names.""" + return sorted(self._tools.keys()) + + def available_tools(self) -> list[Any]: + """All registered tools.""" + return list(self._tools.values()) + + def has(self, name: str) -> bool: + return name in self._tools + + def __len__(self) -> int: + return len(self._tools) diff --git a/backend/app.py b/backend/app.py index 2040a74..02d9ec5 100644 --- a/backend/app.py +++ b/backend/app.py @@ -30,9 +30,18 @@ # Import unified operation system +# Agent Fabric +from agent_workbench import ( + AgentDefinitionCreate, + AgentDefinitionUpdate, + AgentRunCreate, + CriteriaType, + RunStatus, +) + # Agent service for OpenAI LangGraph agents from agents import AgentRequest, AgentResponse, agent_service -from api_decorators import operation +from api_decorators import get_operation, operation # CSV ticket service from csv_data import Ticket, get_csv_ticket_service @@ -51,6 +60,7 @@ task_service, ) from usecase_demo import UsecaseDemoRunCreate, usecase_demo_run_service +from workbench_integration import _tool_registry, workbench_service # Ticket MCP server URL (same as in agents.py) TICKET_MCP_SERVER_URL = "https://yodrrscbpxqnslgugwow.supabase.co/functions/v1/mcp/a7f2b8c4-d3e9-4f1a-b5c6-e8d9f0123456" @@ -180,6 +190,182 @@ async def rest_run_agent(): return jsonify({"error": str(e)}), 500 +# ============================================================================ +# AGENT FABRIC ENDPOINTS +# ============================================================================ + +_WORKBENCH_UI_OPERATION_NAMES = [ + "workbench_list_tools", + "workbench_list_agents", + "workbench_create_agent", + "workbench_get_agent", + "workbench_update_agent", + "workbench_delete_agent", + "workbench_run_agent", + "workbench_list_agent_runs", + "workbench_list_runs", + "workbench_get_run", + "workbench_evaluate_run", + "workbench_get_evaluation", +] + + +@app.route("/api/workbench/ui-config", methods=["GET"]) +async def workbench_ui_config(): + """Expose UI-friendly endpoint metadata and enums for Agent Fabric.""" + endpoints: list[dict] = [] + for op_name in _WORKBENCH_UI_OPERATION_NAMES: + op = get_operation(op_name) + if op is None: + continue + endpoints.append({ + "name": op.name, + "method": op.http_method, + "path": op.http_path, + "description": op.description, + "input_schema": op.get_mcp_input_schema(), + }) + + return jsonify({ + "module": "agent_fabric", + "version": "1", + "criteria_types": [criteria.value for criteria in CriteriaType], + "run_statuses": [status.value for status in RunStatus], + "defaults": { + "run_list_limit": 50, + "max_run_list_limit": 500, + }, + "endpoints": endpoints, + }) + + +@app.route("/api/workbench/tools", methods=["GET"]) +async def workbench_list_tools(): + """List all tools available for use in agent definitions.""" + return jsonify({"tools": workbench_service.list_tools()}) + + +@app.route("/api/workbench/agents", methods=["GET"]) +async def workbench_list_agents(): + """List all agent definitions.""" + agents = workbench_service.list_agents() + return jsonify({"agents": [a.to_dict() for a in agents]}) + + +@app.route("/api/workbench/agents", methods=["POST"]) +async def workbench_create_agent(): + """Create a new agent definition.""" + try: + data = await request.get_json() + agent_def = workbench_service.create_agent(AgentDefinitionCreate(**data)) + return jsonify(agent_def.to_dict()), 201 + except ValidationError as exc: + return jsonify({"error": str(exc)}), 400 + except ValueError as exc: + return jsonify({"error": str(exc)}), 400 + except Exception as exc: + return jsonify({"error": str(exc)}), 500 + + +@app.route("/api/workbench/agents/", methods=["GET"]) +async def workbench_get_agent(agent_id: str): + """Get a single agent definition.""" + agent_def = workbench_service.get_agent(agent_id) + if agent_def is None: + return jsonify({"error": "Agent not found"}), 404 + return jsonify(agent_def.to_dict()) + + +@app.route("/api/workbench/agents/", methods=["PUT"]) +async def workbench_update_agent(agent_id: str): + """Update an agent definition.""" + try: + data = await request.get_json() + agent_def = workbench_service.update_agent(agent_id, AgentDefinitionUpdate(**data)) + if agent_def is None: + return jsonify({"error": "Agent not found"}), 404 + return jsonify(agent_def.to_dict()) + except ValidationError as exc: + return jsonify({"error": str(exc)}), 400 + except ValueError as exc: + return jsonify({"error": str(exc)}), 400 + except Exception as exc: + return jsonify({"error": str(exc)}), 500 + + +@app.route("/api/workbench/agents/", methods=["DELETE"]) +async def workbench_delete_agent(agent_id: str): + """Delete an agent definition.""" + if not workbench_service.delete_agent(agent_id): + return jsonify({"error": "Agent not found"}), 404 + return jsonify({"message": "Deleted"}), 200 + + +@app.route("/api/workbench/agents//runs", methods=["POST"]) +async def workbench_run_agent(agent_id: str): + """Run an agent against a prompt and return the completed AgentRun.""" + try: + data = await request.get_json() + run = await workbench_service.run_agent(agent_id, AgentRunCreate(**data)) + return jsonify(run.to_dict()), 200 + except ValueError as exc: + message = str(exc) + status = 404 if "not found" in message.lower() else 400 + return jsonify({"error": message}), status + except ValidationError as exc: + return jsonify({"error": str(exc)}), 400 + except Exception as exc: + return jsonify({"error": str(exc)}), 500 + + +@app.route("/api/workbench/agents//runs", methods=["GET"]) +async def workbench_list_agent_runs(agent_id: str): + """List all runs for an agent.""" + limit = request.args.get("limit", 50, type=int) + runs = workbench_service.list_runs(agent_id=agent_id, limit=limit) + return jsonify({"runs": [r.to_dict() for r in runs]}) + + +@app.route("/api/workbench/runs", methods=["GET"]) +async def workbench_list_all_runs(): + """List all runs across all agents.""" + limit = request.args.get("limit", 50, type=int) + runs = workbench_service.list_runs(limit=limit) + return jsonify({"runs": [r.to_dict() for r in runs]}) + + +@app.route("/api/workbench/runs/", methods=["GET"]) +async def workbench_get_run(run_id: str): + """Get a single run.""" + run = workbench_service.get_run(run_id) + if run is None: + return jsonify({"error": "Run not found"}), 404 + return jsonify(run.to_dict()) + + +@app.route("/api/workbench/runs//evaluate", methods=["POST"]) +async def workbench_evaluate_run(run_id: str): + """Evaluate a completed run against its agent's success criteria.""" + try: + evaluation = await workbench_service.evaluate_run(run_id) + return jsonify(evaluation.to_dict()), 200 + except ValueError as exc: + message = str(exc) + status = 404 if "not found" in message.lower() else 400 + return jsonify({"error": message}), status + except Exception as exc: + return jsonify({"error": str(exc)}), 500 + + +@app.route("/api/workbench/runs//evaluation", methods=["GET"]) +async def workbench_get_evaluation(run_id: str): + """Get the evaluation result for a run (if it exists).""" + evaluation = workbench_service.get_evaluation(run_id) + if evaluation is None: + return jsonify({"error": "No evaluation found for this run"}), 404 + return jsonify(evaluation.to_dict()) + + # ============================================================================ # USECASE DEMO AGENT RUN ENDPOINTS # ============================================================================ diff --git a/backend/operations.py b/backend/operations.py index f065c91..422aed9 100644 --- a/backend/operations.py +++ b/backend/operations.py @@ -9,6 +9,7 @@ from typing import Any from uuid import UUID +from agent_workbench import AgentDefinitionCreate, AgentDefinitionUpdate, AgentRunCreate from api_decorators import operation from csv_data import get_csv_ticket_service from tasks import Task, TaskCreate, TaskFilter, TaskService, TaskStats, TaskUpdate @@ -94,6 +95,12 @@ def sort_key(ticket: Ticket): return tickets +def _get_workbench_service(): + """Lazy import avoids circular import during module bootstrap.""" + from workbench_integration import workbench_service + return workbench_service + + @operation( name="list_tasks", description="List all tasks with optional filtering by completion status", @@ -316,6 +323,159 @@ async def op_csv_sla_breach_tickets( return get_sla_breach_report(tickets, reference_time=None, include_ok=include_ok) +@operation( + name="workbench_list_tools", + description="List tools available for Agent Fabric definitions", + http_method="GET", + http_path="/api/workbench/tools", +) +async def op_workbench_list_tools() -> list[dict[str, Any]]: + """Return all registered tool metadata from the workbench registry.""" + return _get_workbench_service().list_tools() + + +@operation( + name="workbench_list_agents", + description="List all Agent Fabric agent definitions", + http_method="GET", + http_path="/api/workbench/agents", +) +async def op_workbench_list_agents() -> list[dict[str, Any]]: + """List all persisted workbench agent definitions.""" + agents = _get_workbench_service().list_agents() + return [agent.to_dict() for agent in agents] + + +@operation( + name="workbench_create_agent", + description="Create a new Agent Fabric agent definition", + http_method="POST", + http_path="/api/workbench/agents", +) +async def op_workbench_create_agent(data: AgentDefinitionCreate) -> dict[str, Any]: + """Create and persist a workbench agent definition.""" + agent = _get_workbench_service().create_agent(data) + return agent.to_dict() + + +@operation( + name="workbench_get_agent", + description="Get one Agent Fabric agent definition by id", + http_method="GET", + http_path="/api/workbench/agents/{agent_id}", +) +async def op_workbench_get_agent(agent_id: str) -> dict[str, Any] | None: + """Fetch one workbench agent definition by id.""" + agent = _get_workbench_service().get_agent(agent_id) + return agent.to_dict() if agent else None + + +@operation( + name="workbench_update_agent", + description="Update an Agent Fabric agent definition", + http_method="PUT", + http_path="/api/workbench/agents/{agent_id}", +) +async def op_workbench_update_agent( + agent_id: str, + data: AgentDefinitionUpdate, +) -> dict[str, Any] | None: + """Update and return one workbench agent definition.""" + agent = _get_workbench_service().update_agent(agent_id, data) + return agent.to_dict() if agent else None + + +@operation( + name="workbench_delete_agent", + description="Delete an Agent Fabric agent definition", + http_method="DELETE", + http_path="/api/workbench/agents/{agent_id}", +) +async def op_workbench_delete_agent(agent_id: str) -> bool: + """Delete one workbench agent definition by id.""" + return _get_workbench_service().delete_agent(agent_id) + + +@operation( + name="workbench_run_agent", + description="Run an Agent Fabric agent with a prompt", + http_method="POST", + http_path="/api/workbench/agents/{agent_id}/runs", +) +async def op_workbench_run_agent(agent_id: str, data: AgentRunCreate) -> dict[str, Any]: + """Execute an existing workbench agent definition and persist the run.""" + run = await _get_workbench_service().run_agent(agent_id, data) + return run.to_dict() + + +@operation( + name="workbench_list_agent_runs", + description="List Agent Fabric runs for a specific agent", + http_method="GET", + http_path="/api/workbench/agents/{agent_id}/runs", +) +async def op_workbench_list_agent_runs( + agent_id: str, + limit: int = 50, +) -> list[dict[str, Any]]: + """Return recent runs for a single workbench agent.""" + normalized_limit = min(max(limit, 1), 500) + runs = _get_workbench_service().list_runs(agent_id=agent_id, limit=normalized_limit) + return [run.to_dict() for run in runs] + + +@operation( + name="workbench_list_runs", + description="List Agent Fabric runs, optionally filtered by agent id", + http_method="GET", + http_path="/api/workbench/runs", +) +async def op_workbench_list_runs( + agent_id: str | None = None, + limit: int = 50, +) -> list[dict[str, Any]]: + """Return recent workbench runs.""" + normalized_limit = min(max(limit, 1), 500) + runs = _get_workbench_service().list_runs(agent_id=agent_id, limit=normalized_limit) + return [run.to_dict() for run in runs] + + +@operation( + name="workbench_get_run", + description="Get one Agent Fabric run by id", + http_method="GET", + http_path="/api/workbench/runs/{run_id}", +) +async def op_workbench_get_run(run_id: str) -> dict[str, Any] | None: + """Fetch one persisted run by id.""" + run = _get_workbench_service().get_run(run_id) + return run.to_dict() if run else None + + +@operation( + name="workbench_evaluate_run", + description="Evaluate an Agent Fabric run against its success criteria", + http_method="POST", + http_path="/api/workbench/runs/{run_id}/evaluate", +) +async def op_workbench_evaluate_run(run_id: str) -> dict[str, Any]: + """Evaluate one run and upsert its evaluation record.""" + evaluation = await _get_workbench_service().evaluate_run(run_id) + return evaluation.to_dict() + + +@operation( + name="workbench_get_evaluation", + description="Get evaluation for an Agent Fabric run", + http_method="GET", + http_path="/api/workbench/runs/{run_id}/evaluation", +) +async def op_workbench_get_evaluation(run_id: str) -> dict[str, Any] | None: + """Get existing evaluation result for one run.""" + evaluation = _get_workbench_service().get_evaluation(run_id) + return evaluation.to_dict() if evaluation else None + + # Export shared services for callers (REST app, CLI tools, etc.) task_service = _task_service csv_ticket_service = _csv_service @@ -335,5 +495,17 @@ async def op_csv_sla_breach_tickets( "op_csv_ticket_stats", "op_csv_ticket_fields", "op_csv_sla_breach_tickets", + "op_workbench_list_tools", + "op_workbench_list_agents", + "op_workbench_create_agent", + "op_workbench_get_agent", + "op_workbench_update_agent", + "op_workbench_delete_agent", + "op_workbench_run_agent", + "op_workbench_list_agent_runs", + "op_workbench_list_runs", + "op_workbench_get_run", + "op_workbench_evaluate_run", + "op_workbench_get_evaluation", "CSV_TICKET_FIELDS", ] diff --git a/backend/test_agents.py b/backend/test_agents.py deleted file mode 100644 index d5b2f02..0000000 --- a/backend/test_agents.py +++ /dev/null @@ -1,118 +0,0 @@ -#!/usr/bin/env python3 -""" -Test script for LangGraph agent integration. - -This script tests: -1. Agent service initialization (without OpenAI) -2. Operation to LangChain tool conversion -3. MCP tool schema generation - -Run from backend directory: - python test_agents.py -""" - -import sys -from pathlib import Path - -# Add backend to path -sys.path.insert(0, str(Path(__file__).parent)) - -from api_decorators import LANGCHAIN_AVAILABLE, get_langchain_tools, get_operations -from tasks import TaskService - - -def test_operation_registry(): - """Test that operations are properly registered.""" - print("✓ Testing operation registry...") - ops = get_operations() - print(f" Found {len(ops)} registered operations:") - for name in sorted(ops.keys()): - print(f" - {name}") - assert len(ops) > 0, "No operations registered!" - print() - -def test_langchain_integration(): - """Test LangChain tool conversion.""" - print("✓ Testing LangChain integration...") - - if not LANGCHAIN_AVAILABLE: - print(" ⚠ LangChain not available, skipping tool conversion test") - return - - try: - tools = get_langchain_tools() - print(f" Converted {len(tools)} operations to LangChain tools:") - for tool in tools[:5]: # Show first 5 - print(f" - {tool.name}: {tool.description[:60]}...") - print() - except Exception as e: - print(f" ✗ Error converting to LangChain tools: {e}") - raise - -def test_agent_import(): - """Test that agent service can be imported.""" - print("✓ Testing agent module import...") - - try: - from agents import AgentRequest, AgentResponse, AgentService - print(" Successfully imported AgentRequest, AgentResponse, AgentService") - - # Test creating a request - request = AgentRequest( - prompt="Test prompt", - agent_type="task_assistant" - ) - print(f" Created test request: {request.prompt}") - print() - except ImportError as e: - print(f" ⚠ Could not import agents module: {e}") - print(" This is expected if OpenAI dependencies are not configured") - print() - -def test_task_service(): - """Test that task service works.""" - print("✓ Testing task service integration...") - - task_service = TaskService() - - # Get stats - stats = task_service.get_task_stats() - print(f" Task stats: {stats.total} total, {stats.completed} completed, {stats.pending} pending") - - # List tasks - tasks = task_service.list_tasks() - print(f" Found {len(tasks)} tasks") - print() - -def main(): - """Run all tests.""" - print("=" * 70) - print("LangGraph Agent Integration Tests") - print("=" * 70) - print() - - try: - test_operation_registry() - test_langchain_integration() - test_agent_import() - test_task_service() - - print("=" * 70) - print("✓ All tests passed!") - print("=" * 70) - print() - print("Next steps:") - print("1. Copy .env.example to .env") - print("2. Configure OPENAI_API_KEY in .env") - print("3. Start the server: python app.py") - print("4. Test agent: POST /api/agents/run with {\"prompt\": \"List all tasks\"}") - print() - - except Exception as e: - print(f"\n✗ Test failed: {e}") - import traceback - traceback.print_exc() - sys.exit(1) - -if __name__ == "__main__": - main() diff --git a/backend/test_mcp_client.py b/backend/test_mcp_client.py deleted file mode 100644 index d776b3c..0000000 --- a/backend/test_mcp_client.py +++ /dev/null @@ -1,140 +0,0 @@ -""" -Test FastMCP client integration with external MCP server. - -Tests: -1. Connect to MCP server and list tools -2. Convert MCP tools to LangChain format -3. Run agent with MCP tools - -Usage: - cd backend && python test_mcp_client.py -""" - -import asyncio - -# Import from agents module -from agents import MCP_SERVER_URL, AgentRequest, AgentService, _mcp_tool_to_langchain -from fastmcp import Client as MCPClient - - -async def test_mcp_connection(): - """Test basic connection to MCP server.""" - print("=" * 60) - print("TEST 1: Connect to MCP server") - print("=" * 60) - - async with MCPClient(MCP_SERVER_URL) as client: - await client.ping() - print(f"✓ Connected to {MCP_SERVER_URL}") - - # List tools - tools = await client.list_tools() - print(f"✓ Found {len(tools)} tools:") - for tool in tools: - desc = tool.description[:50] + "..." if len(tool.description or "") > 50 else tool.description - print(f" - {tool.name}: {desc}") - - return tools - - -async def test_tool_conversion(): - """Test converting MCP tools to LangChain format.""" - print("\n" + "=" * 60) - print("TEST 2: Convert MCP tools to LangChain") - print("=" * 60) - - async with MCPClient(MCP_SERVER_URL) as client: - tools = await client.list_tools() - - lc_tools = [] - for tool in tools: - lc_tool = _mcp_tool_to_langchain(client, tool) - lc_tools.append(lc_tool) - print(f"✓ Converted: {lc_tool.name}") - if hasattr(lc_tool.args_schema, 'model_fields'): - print(f" Args schema: {list(lc_tool.args_schema.model_fields.keys())}") - - print(f"\n✓ Converted {len(lc_tools)} tools to LangChain format") - return lc_tools - - -async def test_call_mcp_tool(): - """Test calling an MCP tool directly.""" - print("\n" + "=" * 60) - print("TEST 3: Call MCP tool directly") - print("=" * 60) - - async with MCPClient(MCP_SERVER_URL) as client: - tools = await client.list_tools() - - if not tools: - print("✗ No tools available to test") - return - - # Try to find a simple tool to call (e.g., list or get) - test_tool = None - for tool in tools: - if "list" in tool.name.lower() or "get" in tool.name.lower(): - test_tool = tool - break - - if not test_tool: - test_tool = tools[0] - - print(f"Calling tool: {test_tool.name}") - print(f"Input schema: {test_tool.inputSchema}") - - try: - # Call with empty args (may fail if required params) - result = await client.call_tool(test_tool.name, {}) - print(f"✓ Result: {result}") - except Exception as e: - print(f"✗ Tool call failed (may need args): {e}") - - -async def test_agent_with_mcp(): - """Test running agent with MCP tools loaded.""" - print("\n" + "=" * 60) - print("TEST 4: Run agent with MCP tools") - print("=" * 60) - - service = AgentService() - - # First run will load MCP tools - result = await service.run_agent( - AgentRequest(prompt="List all available tools and briefly describe what each one does") - ) - - print(f"Agent response:\n{result.result[:500]}..." if len(result.result) > 500 else f"Agent response:\n{result.result}") - print(f"\nTools used: {result.tools_used}") - print(f"Error: {result.error}") - - # Clean up - await service.close() - - return result - - -async def main(): - """Run all tests.""" - print("\n🧪 FastMCP Client Integration Tests\n") - print(f"MCP Server: {MCP_SERVER_URL}\n") - - try: - await test_mcp_connection() - await test_tool_conversion() - await test_call_mcp_tool() - await test_agent_with_mcp() - - print("\n" + "=" * 60) - print("✓ All tests completed!") - print("=" * 60) - - except Exception as e: - print(f"\n✗ Test failed with error: {e}") - import traceback - traceback.print_exc() - - -if __name__ == "__main__": - asyncio.run(main()) diff --git a/backend/tests/__init__.py b/backend/tests/__init__.py new file mode 100644 index 0000000..e69de29 diff --git a/backend/tests/conftest.py b/backend/tests/conftest.py new file mode 100644 index 0000000..fb1d9ba --- /dev/null +++ b/backend/tests/conftest.py @@ -0,0 +1,6 @@ +"""Shared fixtures and path setup for backend tests.""" +import sys +from pathlib import Path + +# Ensure backend package is importable from the tests directory +sys.path.insert(0, str(Path(__file__).resolve().parent.parent)) diff --git a/backend/tests/test_agents.py b/backend/tests/test_agents.py new file mode 100644 index 0000000..6d86a9c --- /dev/null +++ b/backend/tests/test_agents.py @@ -0,0 +1,20 @@ +"""Tests for operation registry and LangChain tool conversion.""" + +from api_decorators import LANGCHAIN_AVAILABLE, get_langchain_tools, get_operations + + +def test_operation_registry(): + """Test that operations are properly registered.""" + ops = get_operations() + assert len(ops) > 0, "No operations registered!" + + +def test_langchain_integration(): + """Test LangChain tool conversion.""" + if not LANGCHAIN_AVAILABLE: + import pytest + + pytest.skip("LangChain not available") + + tools = get_langchain_tools() + assert len(tools) > 0 diff --git a/backend/test_tickets.py b/backend/tests/test_tickets.py similarity index 72% rename from backend/test_tickets.py rename to backend/tests/test_tickets.py index c1cb291..86f8d33 100644 --- a/backend/test_tickets.py +++ b/backend/tests/test_tickets.py @@ -1,8 +1,11 @@ """ Test build_reminder_candidate with real support ticket data. -This script validates the Pydantic models and calculation functions +Validates the Pydantic models and calculation functions against sample data from the support-tickets MCP service. + +Run from backend directory: + python -m pytest tests/test_tickets.py """ from datetime import datetime, timezone @@ -82,7 +85,7 @@ "requester_email": "admin@company.org", "city": "Zürich", "service": "Infrastructure", - "created_at": "2025-12-17T11:00:00+00:00", # 30+ mins ago → overdue + "created_at": "2025-12-17T11:00:00+00:00", "updated_at": "2025-12-17T11:00:00+00:00", "work_logs": [], }, @@ -126,7 +129,7 @@ "requester_email": "worker@company.org", "city": "Basel", "service": "Workplace", - "created_at": "2025-12-17T11:30:00+00:00", # Recent → within SLA + "created_at": "2025-12-17T11:30:00+00:00", "updated_at": "2025-12-17T11:30:00+00:00", "work_logs": [], }, @@ -143,7 +146,7 @@ "requester_email": "mobile@company.org", "city": "Luzern", "service": "Communication", - "created_at": "2025-12-17T04:00:00+00:00", # 8+ hours ago + "created_at": "2025-12-17T04:00:00+00:00", "updated_at": "2025-12-17T10:00:00+00:00", "work_logs": [ { @@ -246,93 +249,61 @@ }, ] +TEST_NOW = datetime(2025, 12, 17, 12, 0, 0, tzinfo=timezone.utc) + -def parse_ticket_with_worklogs(data: dict) -> tuple[Ticket, list[WorkLog]]: +def _parse_ticket(data: dict) -> tuple[Ticket, list[WorkLog]]: """Parse raw ticket data into Ticket and WorkLog models.""" - work_logs_data = data.pop("work_logs", []) - ticket = Ticket.model_validate(data) + raw = {**data} + work_logs_data = raw.pop("work_logs", []) + ticket = Ticket.model_validate(raw) work_logs = [WorkLog.model_validate(wl) for wl in work_logs_data] return ticket, work_logs -def main(): - print("=" * 70) - print("Testing build_reminder_candidate with 10 support tickets") - print("=" * 70) - print() - - # Use a fixed "now" for consistent testing - test_now = datetime(2025, 12, 17, 12, 0, 0, tzinfo=timezone.utc) - print(f"Test time (now): {test_now.isoformat()}") - print() - - # Show SLA deadlines - print("SLA Deadlines by Priority:") - for priority, minutes in PRIORITY_SLA_MINUTES.items(): - print(f" {priority.value:10s}: {minutes:4d} min ({minutes // 60}h {minutes % 60}m)") - print() - - # Process each ticket - print("-" * 70) - print(f"{'#':>2} {'Priority':10s} {'Status':12s} {'Elapsed':>8s} {'SLA':>6s} {'Overdue':>8s} {'Reminded':>9s} {'NeedsReminder':>13s}") - print("-" * 70) - - candidates_needing_reminder = [] - - for i, raw_data in enumerate(SAMPLE_TICKETS, 1): - # Make a copy to avoid mutating original - data = {**raw_data, "work_logs": raw_data.get("work_logs", [])} - work_logs_data = data.pop("work_logs") - - ticket = Ticket.model_validate(data) - work_logs = [WorkLog.model_validate(wl) for wl in work_logs_data] - - # Build reminder candidate - candidate = build_reminder_candidate(ticket, work_logs, now=test_now) - - # Check if needs reminder (assigned without assignee + overdue) - needs_reminder = is_assigned_without_assignee(ticket) and candidate.is_overdue - - elapsed_str = f"{candidate.minutes_since_creation}m" - sla_str = f"{candidate.sla_deadline_minutes}m" - - print( - f"{i:2d} " - f"{ticket.priority.value:10s} " - f"{ticket.status.value:12s} " - f"{elapsed_str:>8s} " - f"{sla_str:>6s} " - f"{'YES' if candidate.is_overdue else 'no':>8s} " - f"{candidate.reminder_count:>9d} " - f"{'>>> YES <<<' if needs_reminder else 'no':>13s}" - ) - - if needs_reminder: - candidates_needing_reminder.append((i, ticket, candidate)) - - print("-" * 70) - print() - - # Summary - print("=" * 70) - print("TICKETS NEEDING REMINDER:") - print("=" * 70) - - if not candidates_needing_reminder: - print(" None") - else: - for idx, ticket, candidate in candidates_needing_reminder: - reminded_msg = f" (reminded {candidate.reminder_count}x)" if candidate.was_reminded_before else "" - print(f" #{idx}: {ticket.summary}") - print(f" ID: {ticket.id}") - print(f" Priority: {ticket.priority.value} | Group: {ticket.assigned_group}") - print(f" Overdue by: {candidate.minutes_since_creation - candidate.sla_deadline_minutes} minutes{reminded_msg}") - print() - - print("=" * 70) - print("TEST PASSED: All tickets parsed and analyzed successfully!") - print("=" * 70) +def test_all_tickets_parse(): + """All sample tickets parse into valid Pydantic models.""" + for raw in SAMPLE_TICKETS: + ticket, work_logs = _parse_ticket(raw) + assert ticket.id + assert ticket.priority + + +def test_critical_unassigned_overdue(): + """Critical ticket with no assignee that is overdue needs a reminder.""" + raw = SAMPLE_TICKETS[2] # "CRITICAL: Production database down" + ticket, work_logs = _parse_ticket(raw) + candidate = build_reminder_candidate(ticket, work_logs, now=TEST_NOW) + assert candidate.is_overdue + assert is_assigned_without_assignee(ticket) + + +def test_resolved_ticket_no_reminder(): + """Resolved ticket should not trigger a reminder.""" + raw = SAMPLE_TICKETS[6] # "Password reset request" + ticket, work_logs = _parse_ticket(raw) + assert not is_assigned_without_assignee(ticket) + + +def test_critical_with_assignee_no_reminder(): + """Critical ticket with an assignee does not need a reminder.""" + raw = SAMPLE_TICKETS[7] # "CRITICAL: Network switch failure" + ticket, work_logs = _parse_ticket(raw) + assert not is_assigned_without_assignee(ticket) + + +def test_high_overdue_reminder_count(): + """High-priority overdue ticket counts existing reminders.""" + raw = SAMPLE_TICKETS[5] # "Email not syncing on mobile" + ticket, work_logs = _parse_ticket(raw) + candidate = build_reminder_candidate(ticket, work_logs, now=TEST_NOW) + assert candidate.is_overdue + assert candidate.reminder_count == 2 -if __name__ == "__main__": - main() +def test_low_new_within_sla(): + """Low-priority new ticket within SLA is not overdue.""" + raw = SAMPLE_TICKETS[4] # "Request for second monitor" + ticket, work_logs = _parse_ticket(raw) + candidate = build_reminder_candidate(ticket, work_logs, now=TEST_NOW) + assert not candidate.is_overdue diff --git a/backend/test_usecase_demo.py b/backend/tests/test_usecase_demo.py similarity index 92% rename from backend/test_usecase_demo.py rename to backend/tests/test_usecase_demo.py index 41ddf79..d75529a 100644 --- a/backend/test_usecase_demo.py +++ b/backend/tests/test_usecase_demo.py @@ -1,10 +1,21 @@ +""" +Tests for UsecaseDemoRunService. + +Run from backend directory: + python -m pytest tests/test_usecase_demo.py +""" + import asyncio import unittest from unittest.mock import AsyncMock, patch -from agents import AgentResponse -from usecase_demo import UsecaseDemoRunCreate, UsecaseDemoRunService, UsecaseDemoRunStatus import usecase_demo +from agents import AgentResponse +from usecase_demo import ( + UsecaseDemoRunCreate, + UsecaseDemoRunService, + UsecaseDemoRunStatus, +) class UsecaseDemoRunServiceTests(unittest.IsolatedAsyncioTestCase): diff --git a/backend/tests/test_workbench_integration_e2e.py b/backend/tests/test_workbench_integration_e2e.py new file mode 100644 index 0000000..baebd07 --- /dev/null +++ b/backend/tests/test_workbench_integration_e2e.py @@ -0,0 +1,216 @@ +"""End-to-end verification for workbench_integration via REST endpoints.""" + +import sys +import unittest +from pathlib import Path +from tempfile import TemporaryDirectory +from unittest.mock import patch + +# Ensure backend modules are importable when running this file directly. +sys.path.insert(0, str(Path(__file__).resolve().parent.parent)) + +import app as backend_app_module +from agent_workbench import WorkbenchService +from workbench_integration import _tool_registry + + +class _ToolCallMessage: + def __init__(self, tool_name: str) -> None: + self.tool_calls = [{"name": tool_name}] + + +class _FinalMessage: + def __init__(self, content: str) -> None: + self.content = content + + +class _FakeReactAgent: + def __init__(self, tools: list[object]) -> None: + self._tools = tools + + async def ainvoke(self, _payload: dict, config: dict | None = None) -> dict: + _ = config + user_message = "" + messages = _payload.get("messages", []) + if messages: + first = messages[0] + if isinstance(first, (list, tuple)) and len(first) >= 2: + user_message = str(first[1]) + + tool = next( + (item for item in self._tools if getattr(item, "name", "") == "csv_ticket_stats"), + None, + ) + if tool is None: + raise AssertionError("csv_ticket_stats was not resolved from workbench registry") + + stats = await tool.ainvoke({}) + total = stats.get("total", 0) if isinstance(stats, dict) else 0 + output = f"Used csv_ticket_stats successfully. total={total}. context={user_message}" + return { + "messages": [ + _ToolCallMessage("csv_ticket_stats"), + _FinalMessage(output), + ] + } + + +def _fake_build_react_agent(_llm: object, tools: list[object], _prompt: str) -> _FakeReactAgent: + if "GitHub-flavored Markdown" not in _prompt: + raise AssertionError("Expected markdown output instruction in runtime system prompt") + return _FakeReactAgent(tools) + + +class WorkbenchIntegrationE2ETests(unittest.IsolatedAsyncioTestCase): + async def asyncSetUp(self) -> None: + self._tmpdir = TemporaryDirectory() + self._original_service = backend_app_module.workbench_service + + backend_app_module.workbench_service = WorkbenchService( + tool_registry=_tool_registry, + db_path=Path(self._tmpdir.name) / "workbench-e2e.db", + openai_api_key="test-key", + ) + # Avoid any real network/model dependency in this end-to-end API flow test. + backend_app_module.workbench_service._llm = object() + + async def asyncTearDown(self) -> None: + backend_app_module.workbench_service = self._original_service + self._tmpdir.cleanup() + + async def test_create_run_and_evaluate_agent_with_csv_tool(self) -> None: + with patch("agent_workbench.service._build_react_agent", new=_fake_build_react_agent): + async with backend_app_module.app.test_app() as test_app: + client = test_app.test_client() + + ui_config_resp = await client.get("/api/workbench/ui-config") + self.assertEqual(ui_config_resp.status_code, 200) + ui_config_data = await ui_config_resp.get_json() + endpoint_paths = [endpoint["path"] for endpoint in ui_config_data["endpoints"]] + self.assertIn("/api/workbench/agents", endpoint_paths) + self.assertIn("/api/workbench/agents/{agent_id}/runs", endpoint_paths) + self.assertIn("/api/workbench/runs/{run_id}/evaluate", endpoint_paths) + self.assertIn("tool_called", ui_config_data["criteria_types"]) + self.assertIn("completed", ui_config_data["run_statuses"]) + + tools_resp = await client.get("/api/workbench/tools") + self.assertEqual(tools_resp.status_code, 200) + tools_payload = await tools_resp.get_json() + tool_names = [tool["name"] for tool in tools_payload["tools"]] + self.assertIn("csv_ticket_stats", tool_names) + self.assertIn("csv_ticket_fields", tool_names) + self.assertFalse(any(name.startswith("list_task") for name in tool_names)) + self.assertFalse(any(name.startswith("create_task") for name in tool_names)) + self.assertFalse(any(name.startswith("workbench_") for name in tool_names)) + + list_tickets_tool = next( + item for item in tools_payload["tools"] if item["name"] == "csv_list_tickets" + ) + input_props = list_tickets_tool.get("input_schema", {}).get("properties", {}) + self.assertIn("status", input_props) + self.assertIn("limit", input_props) + + create_payload = { + "name": "CSV stats verifier", + "description": "E2E check for workbench integration", + "system_prompt": "Use csv_ticket_stats and report total.", + "tool_names": ["csv_ticket_stats"], + "success_criteria": [ + { + "type": "tool_called", + "value": "csv_ticket_stats", + "description": "Agent must call csv_ticket_stats", + }, + { + "type": "output_contains", + "value": "total=", + "description": "Output should contain total count", + }, + ], + } + create_resp = await client.post("/api/workbench/agents", json=create_payload) + create_data = await create_resp.get_json() + self.assertEqual(create_resp.status_code, 201, create_data) + agent_id = create_data["id"] + + run_resp = await client.post( + f"/api/workbench/agents/{agent_id}/runs", + json={"input_prompt": "Get me the current CSV ticket total."}, + ) + run_data = await run_resp.get_json() + self.assertEqual(run_resp.status_code, 200, run_data) + self.assertEqual(run_data["status"], "completed") + self.assertIn("csv_ticket_stats", run_data["tools_used"]) + self.assertIn("total=", run_data["output"] or "") + self.assertIn("csv_ticket_stats", run_data["agent_snapshot"].get("tool_names", [])) + + evaluate_resp = await client.post(f"/api/workbench/runs/{run_data['id']}/evaluate") + evaluate_data = await evaluate_resp.get_json() + self.assertEqual(evaluate_resp.status_code, 200, evaluate_data) + self.assertTrue(evaluate_data["overall_passed"]) + self.assertEqual(evaluate_data["score"], 1.0) + + async def test_required_input_agent_run_validation_and_context(self) -> None: + with patch("agent_workbench.service._build_react_agent", new=_fake_build_react_agent): + async with backend_app_module.app.test_app() as test_app: + client = test_app.test_client() + + invalid_create_resp = await client.post( + "/api/workbench/agents", + json={ + "name": "Needs Input Invalid", + "description": "", + "system_prompt": "Use CSV tools.", + "requires_input": True, + "required_input_description": "", + "tool_names": ["csv_ticket_stats"], + "success_criteria": [], + }, + ) + self.assertEqual(invalid_create_resp.status_code, 400) + + create_resp = await client.post( + "/api/workbench/agents", + json={ + "name": "Needs Input", + "description": "", + "system_prompt": "Use CSV tools.", + "requires_input": True, + "required_input_description": "Ticket INC number", + "tool_names": ["csv_ticket_stats"], + "success_criteria": [], + }, + ) + create_data = await create_resp.get_json() + self.assertEqual(create_resp.status_code, 201, create_data) + self.assertTrue(create_data["requires_input"]) + self.assertEqual(create_data["required_input_description"], "Ticket INC number") + agent_id = create_data["id"] + + missing_input_resp = await client.post( + f"/api/workbench/agents/{agent_id}/runs", + json={"input_prompt": ""}, + ) + self.assertEqual(missing_input_resp.status_code, 400) + + run_resp = await client.post( + f"/api/workbench/agents/{agent_id}/runs", + json={"required_input_value": "INC-12345"}, + ) + run_data = await run_resp.get_json() + self.assertEqual(run_resp.status_code, 200, run_data) + self.assertEqual(run_data["status"], "completed") + self.assertEqual(run_data["input_prompt"], "") + self.assertIn("INC-12345", run_data["output"] or "") + self.assertEqual( + run_data["agent_snapshot"].get("required_input_value"), + "INC-12345", + ) + self.assertIn( + "Required input (Ticket INC number): INC-12345", + run_data["agent_snapshot"].get("composed_user_message", ""), + ) + + +if __name__ == "__main__": + unittest.main() diff --git a/backend/usecase_demo.py b/backend/usecase_demo.py index 9d16e5b..935625c 100644 --- a/backend/usecase_demo.py +++ b/backend/usecase_demo.py @@ -18,9 +18,8 @@ from typing import Any from uuid import uuid4 -from pydantic import BaseModel, Field, field_validator - from agents import AgentRequest, agent_service +from pydantic import BaseModel, Field, field_validator USECASE_DEMO_AGENT_TIMEOUT_SECONDS = float( os.getenv("USECASE_DEMO_AGENT_TIMEOUT_SECONDS", "300") @@ -134,6 +133,11 @@ def _extract_columns(rows: list[dict[str, Any]]) -> list[str]: return columns +def _is_sla_breach_prompt(prompt: str) -> bool: + normalized = prompt.lower() + return "csv_sla_breach_tickets" in normalized or "sla breach" in normalized + + class UsecaseDemoRunService: """In-memory run orchestration with polling-friendly status updates.""" @@ -188,17 +192,28 @@ async def _execute_run(self, run_id: str) -> None: error=None, ) - # Enforce a predictable output block for table rendering. - structured_prompt = ( - f"{run.prompt}\n\n" - "Antwortformat:\n" - "- Führe die Anfrage mit möglichst wenigen Tool-Aufrufen aus.\n" - "- Nutze kompakte fields und sinnvolle limits.\n" - "- Fordere notes/resolution nur bei explizitem Bedarf an.\n" - "- Gib einen JSON-Codeblock mit {\"rows\": [...]} zurück.\n" - "- Falls keine sinnvollen Zeilen existieren, gib {\"rows\": []} zurück.\n" - "- Optional danach: kurze Zusammenfassung in 2-4 Stichpunkten." - ) + if _is_sla_breach_prompt(run.prompt): + structured_prompt = ( + f"{run.prompt}\n\n" + "Antwortformat für SLA-Breach Usecase:\n" + "- Rufe csv_sla_breach_tickets als primäre Quelle auf.\n" + "- Bevorzuge einen einzelnen Tool-Aufruf; keine unnötigen Tool-Schleifen.\n" + "- Liefere ausschließlich kurze Next-Actions als Markdown (max. 6 Bullet Points).\n" + "- Keine JSON-Blöcke zurückgeben.\n" + "- Fokus: Priorisierung, Verantwortliche Gruppen, sofortige Eskalationsschritte." + ) + else: + # Enforce a predictable output block for table rendering. + structured_prompt = ( + f"{run.prompt}\n\n" + "Antwortformat:\n" + "- Führe die Anfrage mit möglichst wenigen Tool-Aufrufen aus.\n" + "- Nutze kompakte fields und sinnvolle limits.\n" + "- Fordere notes/resolution nur bei explizitem Bedarf an.\n" + "- Gib einen JSON-Codeblock mit {\"rows\": [...]} zurück.\n" + "- Falls keine sinnvollen Zeilen existieren, gib {\"rows\": []} zurück.\n" + "- Optional danach: kurze Zusammenfassung in 2-4 Stichpunkten." + ) try: response = await asyncio.wait_for( diff --git a/backend/workbench_integration.py b/backend/workbench_integration.py new file mode 100644 index 0000000..08966a9 --- /dev/null +++ b/backend/workbench_integration.py @@ -0,0 +1,63 @@ +""" +Workbench Integration + +Wires the project's tools into the Agent Fabric module and exposes a +singleton WorkbenchService ready to use in app.py. + +Separation of concerns: + agent_workbench/ - independent module, knows nothing about this project + workbench_integration.py - knows about both; bridges the gap +""" + +import os +from pathlib import Path + +# Ensure operations are loaded so @operation decorators run +import operations # noqa: F401 + +from agent_workbench import ToolRegistry, WorkbenchService +from api_decorators import get_langchain_tools + +# ============================================================================ +# BUILD TOOL REGISTRY +# ============================================================================ + +def _build_registry() -> ToolRegistry: + """ + Populate a ToolRegistry with all tools available in this project. + + Sources: + 1. All @operation-decorated functions via api_decorators.get_langchain_tools() + Exposed to Agent Fabric: csv_* ticket operations only. + + The registry is built once at startup and shared with WorkbenchService. + """ + registry = ToolRegistry() + try: + all_tools = get_langchain_tools() + ticket_tools = [ + tool for tool in all_tools + if getattr(tool, "name", "").startswith("csv_") + ] + registry.register_all(ticket_tools) + except Exception as exc: + import logging + logging.getLogger(__name__).warning("Could not load langchain tools: %s", exc) + return registry + + +# ============================================================================ +# SINGLETON SERVICE +# ============================================================================ + +_tool_registry = _build_registry() + +workbench_service = WorkbenchService( + tool_registry=_tool_registry, + db_path=Path(__file__).parent / "data" / "workbench.db", + openai_api_key=os.getenv("OPENAI_API_KEY", ""), + openai_model=os.getenv("OPENAI_MODEL", "gpt-4o-mini"), + openai_base_url=os.getenv("OPENAI_BASE_URL", ""), +) + +__all__ = ["workbench_service", "_tool_registry"] diff --git a/explain.drawio b/explain.drawio index 739bb57..aaee8ca 100644 --- a/explain.drawio +++ b/explain.drawio @@ -1,6 +1,6 @@ - + @@ -97,4 +97,50 @@ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + \ No newline at end of file diff --git a/frontend/src/App.jsx b/frontend/src/App.jsx index a7803ed..4a7f3b9 100644 --- a/frontend/src/App.jsx +++ b/frontend/src/App.jsx @@ -20,6 +20,7 @@ import { DataHistogram24Regular, Info24Regular, Table24Regular, + Wrench24Regular, } from '@fluentui/react-icons' import { Navigate, Route, Routes, useLocation, useNavigate } from 'react-router-dom' import AgentChat from './features/agent/AgentChat' @@ -28,6 +29,7 @@ import FieldsDocs from './features/fields/FieldsDocs' import { USECASE_DEMO_DEFINITIONS } from './features/usecase-demo/demoDefinitions' import UsecaseDemoPage from './features/usecase-demo/UsecaseDemoPage' import KitchenSink from './features/kitchensink/KitchenSink' +import WorkbenchPage from './features/workbench/WorkbenchPage' const useStyles = makeStyles({ app: { @@ -79,6 +81,7 @@ export default function App() { ...usecaseTabs, { value: 'kitchensink', label: 'Kitchen Sink', icon: , path: '/kitchensink', testId: 'tab-kitchensink' }, { value: 'fields', label: 'Fields', icon: , path: '/fields', testId: 'tab-fields' }, + { value: 'workbench', label: 'Agent Fabric', icon: , path: '/workbench', testId: 'tab-workbench' }, { value: 'agent', label: 'Agent', icon: , path: '/agent', testId: 'tab-agent' }, ] const activeTab = tabs.find((tab) => location.pathname.startsWith(tab.path))?.value ?? 'csvtickets' @@ -124,6 +127,7 @@ export default function App() { ))} } /> } /> + } /> } /> } /> diff --git a/frontend/src/features/usecase-demo/demoDefinitions.js b/frontend/src/features/usecase-demo/demoDefinitions.js index d6dac54..5020bd3 100644 --- a/frontend/src/features/usecase-demo/demoDefinitions.js +++ b/frontend/src/features/usecase-demo/demoDefinitions.js @@ -17,12 +17,13 @@ Nutze ausschließlich CSV-Daten und nenne die verwendeten Ticket-IDs in Fließte const SLA_BREACH_DEFAULT_PROMPT = `Call csv_sla_breach_tickets with default parameters (unassigned_only=true, include_ok=false). -Using the returned report, write ONLY a short markdown summary (max 200 words): -1. State the reference_timestamp used -2. Group ticket counts by breach_status and assigned_group -3. Recommend actions for the most critical breaches +Then provide only a concise markdown "next actions" commentary: +1. Mention the reference_timestamp used. +2. Identify highest-risk assigned groups (breached first, then at_risk). +3. Give concrete next actions for the next 30-60 minutes. +4. Add one short escalation recommendation for unresolved breached tickets. -Do NOT output a JSON block — the frontend fetches and renders the ticket table directly from the API.`; +Do NOT output a JSON block — the frontend already renders the SLA table from the API.`; /** * Add new demos here to create additional pages without duplicating UI logic. @@ -116,14 +117,13 @@ export const USECASE_DEMO_DEFINITIONS = [ defaultPrompt: SLA_BREACH_DEFAULT_PROMPT, runHistoryLimit: 25, pollIntervalMs: 2000, - resultViews: ["sla-breach", "markdown"], + resultViews: ["sla-breach", "sla-next-actions"], resultSectionTitle: "SLA Breach Results", resultSectionDescription: "Tickets at risk or already past their SLA threshold, sorted by severity.", ticketIdFields: ["ticket_ids", "ticket_id", "ticketIds"], - // Tickets are shown inline in the sla-breach result view; disable the separate card. matchingTickets: { - enabled: true, + enabled: false, title: "Affected Tickets (Group-Assigned, No Individual Assignee)", description: 'These tickets are routed to a support group but no individual has picked them up. The "Assigned Group" column shows the responsible team; "Assignee" is empty for all.', diff --git a/frontend/src/features/usecase-demo/resultViews.jsx b/frontend/src/features/usecase-demo/resultViews.jsx index eedd0bf..a23e860 100644 --- a/frontend/src/features/usecase-demo/resultViews.jsx +++ b/frontend/src/features/usecase-demo/resultViews.jsx @@ -1,21 +1,21 @@ import { - Badge, - Button, - Checkbox, - Spinner, - Text, - ToolbarButton, - Tooltip, - makeStyles, - tokens + Badge, + Button, + Checkbox, + Spinner, + Text, + ToolbarButton, + Tooltip, + makeStyles, + tokens } from '@fluentui/react-components' import { - ArrowUp24Regular, - CheckmarkCircle24Regular, - DismissCircle24Regular, - Mail24Regular, - SelectAllOn24Regular, - Warning24Regular, + ArrowUp24Regular, + CheckmarkCircle24Regular, + DismissCircle24Regular, + Mail24Regular, + SelectAllOn24Regular, + Warning24Regular, } from '@fluentui/react-icons' import { useCallback, useEffect, useMemo, useState } from 'react' import ReactMarkdown from 'react-markdown' @@ -387,6 +387,11 @@ export const RESULT_VIEW_REGISTRY = { description: 'Human-readable summary from the run output.', render: (props) => , }, + 'sla-next-actions': { + title: 'Next Actions', + description: 'Agent commentary on immediate follow-up actions based on SLA breach data.', + render: (props) => , + }, 'sla-breach': { title: 'SLA Breach Overview', description: 'Unassigned tickets color-coded by SLA status. Select tickets to send reminders or escalate.', diff --git a/frontend/src/features/workbench/WorkbenchPage.jsx b/frontend/src/features/workbench/WorkbenchPage.jsx new file mode 100644 index 0000000..7553384 --- /dev/null +++ b/frontend/src/features/workbench/WorkbenchPage.jsx @@ -0,0 +1,650 @@ +import { + Button, + Caption1, + Card, + Checkbox, + Field, + Input, + Spinner, + Subtitle1, + Text, + Textarea, + makeStyles, + tokens, +} from '@fluentui/react-components' +import { useEffect, useState } from 'react' +import ReactMarkdown from 'react-markdown' +import remarkGfm from 'remark-gfm' +import { + createWorkbenchAgent, + deleteWorkbenchAgent, + getWorkbenchUiConfig, + listWorkbenchAgents, + listWorkbenchTools, + runWorkbenchAgent, +} from '../../services/api' + +const useStyles = makeStyles({ + container: { + padding: tokens.spacingVerticalL, + display: 'flex', + flexDirection: 'column', + gap: tokens.spacingVerticalL, + }, + cardsGrid: { + display: 'grid', + gridTemplateColumns: '1fr 1fr', + gap: tokens.spacingHorizontalL, + alignItems: 'start', + }, + cardBody: { + display: 'flex', + flexDirection: 'column', + gap: tokens.spacingVerticalM, + }, + toolsList: { + display: 'flex', + flexDirection: 'column', + gap: tokens.spacingVerticalXS, + maxHeight: '260px', + overflowY: 'auto', + padding: `${tokens.spacingVerticalXS} 0`, + }, + tableWrapper: { + overflowX: 'auto', + backgroundColor: tokens.colorNeutralBackground1, + borderRadius: tokens.borderRadiusMedium, + boxShadow: tokens.shadow4, + }, + table: { + width: '100%', + borderCollapse: 'collapse', + fontSize: tokens.fontSizeBase200, + }, + th: { + padding: `${tokens.spacingVerticalS} ${tokens.spacingHorizontalM}`, + textAlign: 'left', + backgroundColor: tokens.colorNeutralBackground3, + borderBottom: `2px solid ${tokens.colorNeutralStroke1}`, + whiteSpace: 'nowrap', + }, + td: { + padding: `${tokens.spacingVerticalS} ${tokens.spacingHorizontalM}`, + borderBottom: `1px solid ${tokens.colorNeutralStroke2}`, + verticalAlign: 'top', + }, + empty: { + padding: tokens.spacingVerticalL, + }, + select: { + width: '100%', + border: `1px solid ${tokens.colorNeutralStroke1}`, + borderRadius: tokens.borderRadiusMedium, + padding: `${tokens.spacingVerticalS} ${tokens.spacingHorizontalM}`, + backgroundColor: tokens.colorNeutralBackground1, + color: tokens.colorNeutralForeground1, + }, + runOutputMarkdown: { + border: `1px solid ${tokens.colorNeutralStroke1}`, + borderRadius: tokens.borderRadiusMedium, + padding: `${tokens.spacingVerticalS} ${tokens.spacingHorizontalM}`, + backgroundColor: tokens.colorNeutralBackground1, + maxHeight: '320px', + overflowY: 'auto', + '& h1, & h2, & h3': { + margin: `${tokens.spacingVerticalXS} 0`, + fontWeight: tokens.fontWeightSemibold, + }, + '& ul, & ol': { + margin: `${tokens.spacingVerticalXS} 0`, + paddingLeft: tokens.spacingHorizontalL, + }, + '& table': { + width: '100%', + borderCollapse: 'collapse', + marginTop: tokens.spacingVerticalXS, + }, + '& th, & td': { + border: `1px solid ${tokens.colorNeutralStroke1}`, + padding: tokens.spacingHorizontalXS, + textAlign: 'left', + }, + '& pre': { + backgroundColor: tokens.colorNeutralBackground3, + padding: tokens.spacingHorizontalM, + borderRadius: tokens.borderRadiusSmall, + overflowX: 'auto', + }, + '& code': { + fontFamily: 'monospace', + backgroundColor: tokens.colorNeutralBackground3, + padding: '0 4px', + borderRadius: tokens.borderRadiusSmall, + }, + '& a': { + color: tokens.colorBrandForegroundLink, + textDecoration: 'underline', + }, + }, +}) + +export default function WorkbenchPage() { + const styles = useStyles() + const [loading, setLoading] = useState(true) + const [error, setError] = useState('') + const [notice, setNotice] = useState('') + const [uiConfig, setUiConfig] = useState(null) + const [tools, setTools] = useState([]) + const [agents, setAgents] = useState([]) + const [submitting, setSubmitting] = useState(false) + const [formData, setFormData] = useState({ + name: '', + description: '', + systemPrompt: '', + requiresInput: false, + requiredInputDescription: '', + }) + const [fieldErrors, setFieldErrors] = useState({ + name: '', + systemPrompt: '', + tools: '', + requiredInputDescription: '', + }) + const [selectedToolNames, setSelectedToolNames] = useState([]) + const [runForm, setRunForm] = useState({ + agentId: '', + prompt: '', + requiredInputValue: '', + }) + const [runFieldErrors, setRunFieldErrors] = useState({ + agentId: '', + requiredInputValue: '', + }) + const [runError, setRunError] = useState('') + const [runOutput, setRunOutput] = useState('') + const [runButtonOutput, setRunButtonOutput] = useState('') + const [isRunningAgent, setIsRunningAgent] = useState(false) + const [runPulse, setRunPulse] = useState(0) + + const loadData = async () => { + setLoading(true) + setError('') + try { + const [configPayload, toolsPayload, agentsPayload] = await Promise.all([ + getWorkbenchUiConfig(), + listWorkbenchTools(), + listWorkbenchAgents(), + ]) + const nextTools = toolsPayload.tools || [] + const nextAgents = agentsPayload.agents || [] + + setUiConfig(configPayload) + setTools(nextTools) + setAgents(nextAgents) + setSelectedToolNames((prev) => { + const availableNames = nextTools.map((tool) => tool.name) + if (prev.length === 0) { + return availableNames + } + const filtered = prev.filter((name) => availableNames.includes(name)) + return filtered.length > 0 ? filtered : availableNames + }) + setRunForm((prev) => ({ + ...prev, + agentId: ( + prev.agentId && nextAgents.some((agent) => agent.id === prev.agentId) + ? prev.agentId + : (nextAgents[0]?.id || '') + ), + })) + } catch (err) { + setError(err?.message || 'Failed to load workbench data') + } finally { + setLoading(false) + } + } + + useEffect(() => { + loadData() + }, []) + + useEffect(() => { + if (!isRunningAgent) { + setRunPulse(0) + return undefined + } + + const timer = setInterval(() => { + setRunPulse((prev) => (prev + 1) % 4) + }, 350) + return () => clearInterval(timer) + }, [isRunningAgent]) + + const toggleTool = (toolName) => { + setSelectedToolNames((prev) => ( + prev.includes(toolName) + ? prev.filter((item) => item !== toolName) + : [...prev, toolName] + )) + setFieldErrors((prev) => ({ ...prev, tools: '' })) + } + + const selectedRunAgent = agents.find((agent) => agent.id === runForm.agentId) || null + + const validateForm = () => { + const nextErrors = { + name: '', + systemPrompt: '', + tools: '', + requiredInputDescription: '', + } + if (!formData.name.trim()) { + nextErrors.name = 'Agent name is required' + } + if (!formData.systemPrompt.trim()) { + nextErrors.systemPrompt = 'System prompt is required' + } + if (formData.requiresInput && !formData.requiredInputDescription.trim()) { + nextErrors.requiredInputDescription = 'Input description is required when input is required' + } + if (selectedToolNames.length === 0) { + nextErrors.tools = 'Select at least one tool' + } + setFieldErrors(nextErrors) + return !nextErrors.name && !nextErrors.systemPrompt && !nextErrors.tools && !nextErrors.requiredInputDescription + } + + const validateRunForm = () => { + const nextErrors = { + agentId: '', + requiredInputValue: '', + } + if (!runForm.agentId) { + nextErrors.agentId = 'Select an agent' + } + if (selectedRunAgent?.requires_input && !runForm.requiredInputValue.trim()) { + nextErrors.requiredInputValue = selectedRunAgent.required_input_description + ? `Required input is needed: ${selectedRunAgent.required_input_description}` + : 'Required input is needed for this agent' + } + setRunFieldErrors(nextErrors) + return !nextErrors.agentId && !nextErrors.requiredInputValue + } + + const handleCreateAgent = async () => { + setError('') + setNotice('') + if (!validateForm()) { + return + } + + setSubmitting(true) + try { + const createdAgent = await createWorkbenchAgent({ + name: formData.name.trim(), + description: formData.description.trim(), + system_prompt: formData.systemPrompt.trim(), + requires_input: formData.requiresInput, + required_input_description: formData.requiresInput + ? formData.requiredInputDescription.trim() + : '', + tool_names: selectedToolNames, + success_criteria: [], + }) + + const agentsPayload = await listWorkbenchAgents() + setAgents(agentsPayload.agents || []) + setFormData({ + name: '', + description: '', + systemPrompt: '', + requiresInput: false, + requiredInputDescription: '', + }) + setRunForm((prev) => ({ + ...prev, + agentId: createdAgent?.id || prev.agentId, + })) + setFieldErrors({ + name: '', + systemPrompt: '', + tools: '', + requiredInputDescription: '', + }) + setNotice('Agent created') + } catch (err) { + setError(err?.message || 'Failed to create agent') + } finally { + setSubmitting(false) + } + } + + const handleDeleteAgent = async (agentId) => { + setError('') + setNotice('') + try { + await deleteWorkbenchAgent(agentId) + setAgents((prev) => { + const nextAgents = prev.filter((agent) => agent.id !== agentId) + setRunForm((current) => ({ + ...current, + agentId: current.agentId === agentId ? (nextAgents[0]?.id || '') : current.agentId, + })) + return nextAgents + }) + setNotice('Agent deleted') + } catch (err) { + setError(err?.message || 'Failed to delete agent') + } + } + + const handleRunAgent = async () => { + setRunError('') + setRunOutput('') + setRunButtonOutput('') + setRunFieldErrors({ + agentId: '', + requiredInputValue: '', + }) + if (!validateRunForm()) { + return + } + + setIsRunningAgent(true) + try { + const run = await runWorkbenchAgent(runForm.agentId, { + inputPrompt: runForm.prompt.trim(), + requiredInputValue: runForm.requiredInputValue.trim(), + }) + const output = typeof run?.output === 'string' ? run.output : '' + setRunOutput(output || '(no output)') + + const preview = output.replace(/\s+/g, ' ').trim().slice(0, 90) + if (!preview) { + setRunButtonOutput('completed') + } else { + for (let index = 1; index <= preview.length; index += 3) { + setRunButtonOutput(preview.slice(0, index)) + // eslint-disable-next-line no-await-in-loop + await new Promise((resolve) => setTimeout(resolve, 14)) + } + } + } catch (err) { + setRunError(err?.message || 'Failed to run agent') + } finally { + setIsRunningAgent(false) + } + } + + const runButtonLabel = isRunningAgent + ? ( + runButtonOutput + ? `Running: ${runButtonOutput}` + : `Running${'.'.repeat(runPulse)}` + ) + : ( + runButtonOutput + ? `Last output: ${runButtonOutput}` + : 'Run Agent' + ) + + if (loading) { + return ( +
+ +
+ ) + } + + return ( +
+
+ Agent Fabric + + Minimal technical UI for agent definitions and lifecycle. + {' '} + Endpoints: + {' '} + {uiConfig?.endpoints?.length ?? 0} + +
+ + {error && {error}} + {notice && {notice}} + +
+ +
+ + { + setFormData((prev) => ({ ...prev, name: data.value })) + setFieldErrors((prev) => ({ ...prev, name: '' })) + }} + placeholder="e.g. CSV triage assistant" + aria-invalid={fieldErrors.name ? 'true' : 'false'} + /> + + {fieldErrors.name && {fieldErrors.name}} + + setFormData((prev) => ({ ...prev, description: data.value }))} + placeholder="optional" + /> + + { + const checked = Boolean(data.checked) + setFormData((prev) => ({ + ...prev, + requiresInput: checked, + requiredInputDescription: checked ? prev.requiredInputDescription : '', + })) + setFieldErrors((prev) => ({ ...prev, requiredInputDescription: '' })) + }} + /> + {formData.requiresInput && ( + <> + + { + setFormData((prev) => ({ ...prev, requiredInputDescription: data.value })) + setFieldErrors((prev) => ({ ...prev, requiredInputDescription: '' })) + }} + placeholder="e.g. Ticket INC number" + aria-invalid={fieldErrors.requiredInputDescription ? 'true' : 'false'} + /> + + {fieldErrors.requiredInputDescription && {fieldErrors.requiredInputDescription}} + + )} + +