From f6154526605a456b339991c9976174f019807c1c Mon Sep 17 00:00:00 2001 From: Josh Park <50765702+JoshParkSJ@users.noreply.github.com> Date: Mon, 2 Mar 2026 12:24:16 -0500 Subject: [PATCH 1/2] handle voice job --- src/uipath/_cli/cli_run.py | 70 ++++++++++++++++++++++++++++++++ src/uipath/agent/models/agent.py | 1 + 2 files changed, 71 insertions(+) diff --git a/src/uipath/_cli/cli_run.py b/src/uipath/_cli/cli_run.py index 6b5152ed4..cd6d0ffc1 100644 --- a/src/uipath/_cli/cli_run.py +++ b/src/uipath/_cli/cli_run.py @@ -1,4 +1,5 @@ import asyncio +import logging import click @@ -32,6 +33,69 @@ from .middlewares import Middlewares console = ConsoleLogger() +logger = logging.getLogger(__name__) + + +async def _handle_voice_job(ctx: UiPathRuntimeContext) -> None: + """Handle a voice agent job (config or toolCall). + + Voice bypasses the entire runtime/graph pipeline because + Gemini Live IS the agent loop — Python only supplies config and executes tools. + """ + from pathlib import Path + + from uipath_agents.voice.http import post_voice_config, post_voice_tool_response + from uipath_agents.voice.service import execute_voice_tool_call, get_voice_config + + from uipath.agent.utils import load_agent_definition + from uipath.runtime.result import UiPathRuntimeStatus + + agent_definition = load_agent_definition(Path(".")) + + if ctx.voice_mode == "config": + try: + config = await get_voice_config(agent_definition) + await post_voice_config(ctx, config) + except Exception as exc: + logger.exception("Voice config job failed") + try: + await post_voice_config(ctx, {"error": str(exc)}) + except Exception: + pass # best-effort error reporting + raise + + elif ctx.voice_mode == "toolCall": + input_data = ctx.get_input() or {} + tool_call = input_data.get("voiceToolCall", {}) + call_id = tool_call.get("callId", "") + tool_name = tool_call.get("toolName", "") + args = tool_call.get("args", {}) + + if not tool_name: + result_str, is_error = "Missing toolName in InputArguments", True + else: + result_str, is_error = await execute_voice_tool_call( + agent_definition, + tool_name, + args, + ) + + await post_voice_tool_response( + ctx, + { + "callId": call_id, + "result": result_str, + "isError": is_error, + }, + ) + + else: + raise RuntimeError( + f"Voice agent started with unknown voice.mode: {ctx.voice_mode!r}. " + "Expected 'config' or 'toolCall'." + ) + + ctx.result = UiPathRuntimeResult(status=UiPathRuntimeStatus.SUCCESSFUL) @click.command() @@ -182,6 +246,12 @@ async def execute() -> None: lambda: read_resource_overwrites_from_file(ctx.runtime_dir) ): with ctx: + # Voice detection — branch before runtime factory. + # CAS sets voice.mode via FpsProperties for voice jobs. + if ctx.voice_mode is not None: + await _handle_voice_job(ctx) + return + runtime: UiPathRuntimeProtocol | None = None chat_runtime: UiPathRuntimeProtocol | None = None factory: UiPathRuntimeFactoryProtocol | None = None diff --git a/src/uipath/agent/models/agent.py b/src/uipath/agent/models/agent.py index 5b412709d..bf89c6de0 100644 --- a/src/uipath/agent/models/agent.py +++ b/src/uipath/agent/models/agent.py @@ -1099,6 +1099,7 @@ class AgentSettings(BaseCfg): temperature: float byom_properties: Optional[AgentByomProperties] = Field(None, alias="byomProperties") max_iterations: Optional[int] = Field(None, alias="maxIterations") + persona: Optional[str] = Field(None, alias="persona") class AgentDefinition(BaseModel): From 82edf612cd762fa8aa5b25dcc796f347b861748c Mon Sep 17 00:00:00 2001 From: Josh Park <50765702+JoshParkSJ@users.noreply.github.com> Date: Tue, 3 Mar 2026 11:45:35 -0500 Subject: [PATCH 2/2] support voice mode --- src/uipath/_cli/cli_run.py | 51 +++++++++++++++++++++++++------------- 1 file changed, 34 insertions(+), 17 deletions(-) diff --git a/src/uipath/_cli/cli_run.py b/src/uipath/_cli/cli_run.py index cd6d0ffc1..566598e8e 100644 --- a/src/uipath/_cli/cli_run.py +++ b/src/uipath/_cli/cli_run.py @@ -39,13 +39,19 @@ async def _handle_voice_job(ctx: UiPathRuntimeContext) -> None: """Handle a voice agent job (config or toolCall). - Voice bypasses the entire runtime/graph pipeline because + Voice bypasses the conversational runtime/graph pipeline because Gemini Live IS the agent loop — Python only supplies config and executes tools. + Tool calls run through a minimal stub graph so that ``interrupt()``-based + tools (process tools) can suspend/resume correctly. """ from pathlib import Path - from uipath_agents.voice.http import post_voice_config, post_voice_tool_response - from uipath_agents.voice.service import execute_voice_tool_call, get_voice_config + from uipath_agents.voice.http import post_voice_config, post_voice_tool_call + from uipath_agents.voice.service import ( + execute_voice_tool_call, + extract_tool_result, + get_voice_config, + ) from uipath.agent.utils import load_agent_definition from uipath.runtime.result import UiPathRuntimeStatus @@ -72,22 +78,33 @@ async def _handle_voice_job(ctx: UiPathRuntimeContext) -> None: args = tool_call.get("args", {}) if not tool_name: - result_str, is_error = "Missing toolName in InputArguments", True - else: - result_str, is_error = await execute_voice_tool_call( - agent_definition, - tool_name, - args, + await post_voice_tool_call( + ctx, + {"callId": call_id, "result": "Missing toolName", "isError": True}, ) + else: + try: + result = await execute_voice_tool_call( + agent_definition, tool_name, args, ctx + ) - await post_voice_tool_response( - ctx, - { - "callId": call_id, - "result": result_str, - "isError": is_error, - }, - ) + if result.status == UiPathRuntimeStatus.SUSPENDED: + # Exit job as SUSPENDED — Orchestrator resumes when child process completes + ctx.result = result + return + + result_str, is_error = extract_tool_result(result) + await post_voice_tool_call( + ctx, + {"callId": call_id, "result": result_str, "isError": is_error}, + ) + except Exception as exc: + # Defense in depth: CAS always gets a response + logger.exception("Voice tool call failed unexpectedly") + await post_voice_tool_call( + ctx, + {"callId": call_id, "result": str(exc), "isError": True}, + ) else: raise RuntimeError(