Skip to content
Draft
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
87 changes: 87 additions & 0 deletions src/uipath/_cli/cli_run.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,5 @@
import asyncio
import logging

import click

Expand Down Expand Up @@ -32,6 +33,86 @@
from .middlewares import Middlewares

console = ConsoleLogger()
logger = logging.getLogger(__name__)


async def _handle_voice_job(ctx: UiPathRuntimeContext) -> None:
"""Handle a voice agent job (config or toolCall).

Voice bypasses the conversational runtime/graph pipeline because
Gemini Live IS the agent loop — Python only supplies config and executes tools.
Tool calls run through a minimal stub graph so that ``interrupt()``-based
tools (process tools) can suspend/resume correctly.
"""
from pathlib import Path

from uipath_agents.voice.http import post_voice_config, post_voice_tool_call
from uipath_agents.voice.service import (
execute_voice_tool_call,
extract_tool_result,
get_voice_config,
)

from uipath.agent.utils import load_agent_definition
from uipath.runtime.result import UiPathRuntimeStatus

agent_definition = load_agent_definition(Path("."))

if ctx.voice_mode == "config":
try:
config = await get_voice_config(agent_definition)
await post_voice_config(ctx, config)
except Exception as exc:
logger.exception("Voice config job failed")
try:
await post_voice_config(ctx, {"error": str(exc)})
except Exception:
pass # best-effort error reporting
raise

elif ctx.voice_mode == "toolCall":
input_data = ctx.get_input() or {}
tool_call = input_data.get("voiceToolCall", {})
call_id = tool_call.get("callId", "")
tool_name = tool_call.get("toolName", "")
args = tool_call.get("args", {})

if not tool_name:
await post_voice_tool_call(
ctx,
{"callId": call_id, "result": "Missing toolName", "isError": True},
)
else:
try:
result = await execute_voice_tool_call(
agent_definition, tool_name, args, ctx
)

if result.status == UiPathRuntimeStatus.SUSPENDED:
# Exit job as SUSPENDED — Orchestrator resumes when child process completes
ctx.result = result
return

result_str, is_error = extract_tool_result(result)
await post_voice_tool_call(
ctx,
{"callId": call_id, "result": result_str, "isError": is_error},
)
except Exception as exc:
# Defense in depth: CAS always gets a response
logger.exception("Voice tool call failed unexpectedly")
await post_voice_tool_call(
ctx,
{"callId": call_id, "result": str(exc), "isError": True},
)

else:
raise RuntimeError(
f"Voice agent started with unknown voice.mode: {ctx.voice_mode!r}. "
"Expected 'config' or 'toolCall'."
)

ctx.result = UiPathRuntimeResult(status=UiPathRuntimeStatus.SUCCESSFUL)


@click.command()
Expand Down Expand Up @@ -182,6 +263,12 @@ async def execute() -> None:
lambda: read_resource_overwrites_from_file(ctx.runtime_dir)
):
with ctx:
# Voice detection — branch before runtime factory.
# CAS sets voice.mode via FpsProperties for voice jobs.
if ctx.voice_mode is not None:
await _handle_voice_job(ctx)
return

runtime: UiPathRuntimeProtocol | None = None
chat_runtime: UiPathRuntimeProtocol | None = None
factory: UiPathRuntimeFactoryProtocol | None = None
Expand Down
1 change: 1 addition & 0 deletions src/uipath/agent/models/agent.py
Original file line number Diff line number Diff line change
Expand Up @@ -1099,6 +1099,7 @@ class AgentSettings(BaseCfg):
temperature: float
byom_properties: Optional[AgentByomProperties] = Field(None, alias="byomProperties")
max_iterations: Optional[int] = Field(None, alias="maxIterations")
persona: Optional[str] = Field(None, alias="persona")


class AgentDefinition(BaseModel):
Expand Down
Loading