From f6154526605a456b339991c9976174f019807c1c Mon Sep 17 00:00:00 2001
From: Josh Park <50765702+JoshParkSJ@users.noreply.github.com>
Date: Mon, 2 Mar 2026 12:24:16 -0500
Subject: [PATCH 1/2] handle voice job

---
 src/uipath/_cli/cli_run.py       | 70 ++++++++++++++++++++++++++++++++
 src/uipath/agent/models/agent.py |  1 +
 2 files changed, 71 insertions(+)

diff --git a/src/uipath/_cli/cli_run.py b/src/uipath/_cli/cli_run.py
index 6b5152ed4..cd6d0ffc1 100644
--- a/src/uipath/_cli/cli_run.py
+++ b/src/uipath/_cli/cli_run.py
@@ -1,4 +1,5 @@
 import asyncio
+import logging
 
 import click
 
@@ -32,6 +33,69 @@
 from .middlewares import Middlewares
 
 console = ConsoleLogger()
+logger = logging.getLogger(__name__)
+
+
+async def _handle_voice_job(ctx: UiPathRuntimeContext) -> None:
+    """Handle a voice agent job (config or toolCall).
+
+    Voice bypasses the entire runtime/graph pipeline because
+    Gemini Live IS the agent loop — Python only supplies config and executes tools.
+    """
+    from pathlib import Path
+
+    from uipath_agents.voice.http import post_voice_config, post_voice_tool_response
+    from uipath_agents.voice.service import execute_voice_tool_call, get_voice_config
+
+    from uipath.agent.utils import load_agent_definition
+    from uipath.runtime.result import UiPathRuntimeStatus
+
+    agent_definition = load_agent_definition(Path("."))
+
+    if ctx.voice_mode == "config":
+        try:
+            config = await get_voice_config(agent_definition)
+            await post_voice_config(ctx, config)
+        except Exception as exc:
+            logger.exception("Voice config job failed")
+            try:
+                await post_voice_config(ctx, {"error": str(exc)})
+            except Exception:
+                pass  # best-effort error reporting
+            raise
+
+    elif ctx.voice_mode == "toolCall":
+        input_data = ctx.get_input() or {}
+        tool_call = input_data.get("voiceToolCall", {})
+        call_id = tool_call.get("callId", "")
+        tool_name = tool_call.get("toolName", "")
+        args = tool_call.get("args", {})
+
+        if not tool_name:
+            result_str, is_error = "Missing toolName in InputArguments", True
+        else:
+            result_str, is_error = await execute_voice_tool_call(
+                agent_definition,
+                tool_name,
+                args,
+            )
+
+        await post_voice_tool_response(
+            ctx,
+            {
+                "callId": call_id,
+                "result": result_str,
+                "isError": is_error,
+            },
+        )
+
+    else:
+        raise RuntimeError(
+            f"Voice agent started with unknown voice.mode: {ctx.voice_mode!r}. "
+            "Expected 'config' or 'toolCall'."
+        )
+
+    ctx.result = UiPathRuntimeResult(status=UiPathRuntimeStatus.SUCCESSFUL)
 
 
 @click.command()
@@ -182,6 +246,12 @@ async def execute() -> None:
                     lambda: read_resource_overwrites_from_file(ctx.runtime_dir)
                 ):
                     with ctx:
+                        # Voice detection — branch before runtime factory.
+                        # CAS sets voice.mode via FpsProperties for voice jobs.
+                        if ctx.voice_mode is not None:
+                            await _handle_voice_job(ctx)
+                            return
+
                         runtime: UiPathRuntimeProtocol | None = None
                         chat_runtime: UiPathRuntimeProtocol | None = None
                         factory: UiPathRuntimeFactoryProtocol | None = None
diff --git a/src/uipath/agent/models/agent.py b/src/uipath/agent/models/agent.py
index 5b412709d..bf89c6de0 100644
--- a/src/uipath/agent/models/agent.py
+++ b/src/uipath/agent/models/agent.py
@@ -1099,6 +1099,7 @@ class AgentSettings(BaseCfg):
     temperature: float
     byom_properties: Optional[AgentByomProperties] = Field(None, alias="byomProperties")
     max_iterations: Optional[int] = Field(None, alias="maxIterations")
+    persona: Optional[str] = Field(None, alias="persona")
 
 
 class AgentDefinition(BaseModel):

From 82edf612cd762fa8aa5b25dcc796f347b861748c Mon Sep 17 00:00:00 2001
From: Josh Park <50765702+JoshParkSJ@users.noreply.github.com>
Date: Tue, 3 Mar 2026 11:45:35 -0500
Subject: [PATCH 2/2] support voice mode

---
 src/uipath/_cli/cli_run.py | 51 +++++++++++++++++++++++++-------------
 1 file changed, 34 insertions(+), 17 deletions(-)

diff --git a/src/uipath/_cli/cli_run.py b/src/uipath/_cli/cli_run.py
index cd6d0ffc1..566598e8e 100644
--- a/src/uipath/_cli/cli_run.py
+++ b/src/uipath/_cli/cli_run.py
@@ -39,13 +39,19 @@
 async def _handle_voice_job(ctx: UiPathRuntimeContext) -> None:
     """Handle a voice agent job (config or toolCall).
 
-    Voice bypasses the entire runtime/graph pipeline because
+    Voice bypasses the conversational runtime/graph pipeline because
     Gemini Live IS the agent loop — Python only supplies config and executes tools.
+    Tool calls run through a minimal stub graph so that ``interrupt()``-based
+    tools (process tools) can suspend/resume correctly.
     """
     from pathlib import Path
 
-    from uipath_agents.voice.http import post_voice_config, post_voice_tool_response
-    from uipath_agents.voice.service import execute_voice_tool_call, get_voice_config
+    from uipath_agents.voice.http import post_voice_config, post_voice_tool_call
+    from uipath_agents.voice.service import (
+        execute_voice_tool_call,
+        extract_tool_result,
+        get_voice_config,
+    )
 
     from uipath.agent.utils import load_agent_definition
     from uipath.runtime.result import UiPathRuntimeStatus
@@ -72,22 +78,33 @@ async def _handle_voice_job(ctx: UiPathRuntimeContext) -> None:
         args = tool_call.get("args", {})
 
         if not tool_name:
-            result_str, is_error = "Missing toolName in InputArguments", True
-        else:
-            result_str, is_error = await execute_voice_tool_call(
-                agent_definition,
-                tool_name,
-                args,
+            await post_voice_tool_call(
+                ctx,
+                {"callId": call_id, "result": "Missing toolName", "isError": True},
             )
+        else:
+            try:
+                result = await execute_voice_tool_call(
+                    agent_definition, tool_name, args, ctx
+                )
 
-        await post_voice_tool_response(
-            ctx,
-            {
-                "callId": call_id,
-                "result": result_str,
-                "isError": is_error,
-            },
-        )
+                if result.status == UiPathRuntimeStatus.SUSPENDED:
+                    # Exit job as SUSPENDED — Orchestrator resumes when child process completes
+                    ctx.result = result
+                    return
+
+                result_str, is_error = extract_tool_result(result)
+                await post_voice_tool_call(
+                    ctx,
+                    {"callId": call_id, "result": result_str, "isError": is_error},
+                )
+            except Exception as exc:
+                # Defense in depth: CAS always gets a response
+                logger.exception("Voice tool call failed unexpectedly")
+                await post_voice_tool_call(
+                    ctx,
+                    {"callId": call_id, "result": str(exc), "isError": True},
+                )
 
     else:
         raise RuntimeError(