Skip to content
Merged
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
54 changes: 27 additions & 27 deletions src/uipath/eval/runtime/runtime.py
Original file line number Diff line number Diff line change
Expand Up @@ -226,7 +226,7 @@ def __init__(
f"eval_set_run_id={context.eval_set_run_id}"
)
self.execution_id = context.execution_id
logger.info(f"EVAL RUNTIME: execution_id set to: {self.execution_id}")
logger.debug(f"EVAL RUNTIME: execution_id set to: {self.execution_id}")
self.coverage = coverage.Coverage(branch=True)

self._storage: UiPathRuntimeStorageProtocol | None = None
Expand Down Expand Up @@ -304,12 +304,12 @@ async def initiate_evaluation(

async def execute(self) -> UiPathRuntimeResult:
"""Execute the evaluation runtime."""
logger.info("=" * 80)
logger.info("EVAL RUNTIME: Starting evaluation execution")
logger.info(f"EVAL RUNTIME: Execution ID: {self.execution_id}")
logger.info(f"EVAL RUNTIME: Job ID: {self.context.job_id}")
logger.info(f"EVAL RUNTIME: Resume mode: {self.context.resume}")
logger.info("=" * 80)
logger.debug("=" * 80)
logger.debug("EVAL RUNTIME: Starting evaluation execution")
logger.debug(f"EVAL RUNTIME: Execution ID: {self.execution_id}")
logger.debug(f"EVAL RUNTIME: Job ID: {self.context.job_id}")
logger.debug(f"EVAL RUNTIME: Resume mode: {self.context.resume}")
logger.debug("=" * 80)

with self._mocker_cache():
tracer = self.trace_manager.tracer_provider.get_tracer(__name__)
Expand Down Expand Up @@ -396,8 +396,8 @@ async def execute(self) -> UiPathRuntimeResult:
)

# Collect triggers from all evaluation runs (pass-through from inner runtime)
logger.info("=" * 80)
logger.info(
logger.debug("=" * 80)
logger.debug(
"EVAL RUNTIME: Collecting triggers from all evaluation runs"
)
all_triggers = []
Expand All @@ -413,16 +413,16 @@ async def execute(self) -> UiPathRuntimeResult:
all_triggers.extend(runtime_result.triggers)

if all_triggers:
logger.info(
logger.debug(
f"EVAL RUNTIME: ✅ Passing through {len(all_triggers)} trigger(s) to top-level result"
)
for i, trigger in enumerate(all_triggers, 1):
logger.info(
logger.debug(
f"EVAL RUNTIME: Pass-through trigger {i}: {trigger.model_dump(by_alias=True)}"
)
Comment on lines 419 to 422
Copy link

Copilot AI Mar 3, 2026

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

These debug logs call trigger.model_dump(by_alias=True) inside an f-string. Even when debug logging is disabled, the f-string (and model_dump) will still be evaluated, which can add noticeable overhead if there are many triggers. Consider guarding this block with logger.isEnabledFor(logging.DEBUG) (or similar) so the model_dump only happens when debug logs will actually be emitted.

Copilot uses AI. Check for mistakes.
else:
logger.info("EVAL RUNTIME: No triggers to pass through")
logger.info("=" * 80)
logger.debug("EVAL RUNTIME: No triggers to pass through")
logger.debug("=" * 80)

# Determine overall status - propagate status from inner runtime
# This is critical for serverless executor to know to save state and suspend job
Expand All @@ -438,7 +438,7 @@ async def execute(self) -> UiPathRuntimeResult:
)
if inner_status == UiPathRuntimeStatus.SUSPENDED:
overall_status = UiPathRuntimeStatus.SUSPENDED
logger.info(
logger.debug(
"EVAL RUNTIME: Propagating SUSPENDED status from inner runtime"
)
break # SUSPENDED takes highest priority, stop checking
Expand Down Expand Up @@ -534,10 +534,10 @@ async def _execute_eval(
eval_set_run_id=self.context.eval_set_run_id,
)

logger.info(
logger.debug(
f"DEBUG: Agent execution result status: {agent_execution_output.result.status}"
)
logger.info(
logger.debug(
f"DEBUG: Agent execution result trigger: {agent_execution_output.result.trigger}"
)
Comment on lines +537 to 542
Copy link

Copilot AI Mar 3, 2026

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Now that these are emitted at debug level, the message prefix "DEBUG:" is redundant and makes log filtering/grepping noisier. Consider removing the "DEBUG:" prefix from these messages and keeping the rest of the context in the log text.

Copilot uses AI. Check for mistakes.

Expand Down Expand Up @@ -583,11 +583,11 @@ async def _execute_eval(
):
# For suspended executions, we don't run evaluators yet
# The serverless executor should save the triggers and resume later
logger.info("=" * 80)
logger.info(
logger.debug("=" * 80)
logger.debug(
f"🔴 EVAL RUNTIME: DETECTED SUSPENSION for eval '{eval_item.name}' (id: {eval_item.id})"
)
logger.info("EVAL RUNTIME: Agent returned SUSPENDED status")
logger.debug("EVAL RUNTIME: Agent returned SUSPENDED status")

# Extract triggers from result
triggers = []
Expand All @@ -596,15 +596,15 @@ async def _execute_eval(
if agent_execution_output.result.triggers:
triggers.extend(agent_execution_output.result.triggers)

logger.info(
logger.debug(
f"EVAL RUNTIME: Extracted {len(triggers)} trigger(s) from suspended execution"
)
for i, trigger in enumerate(triggers, 1):
logger.info(
logger.debug(
f"EVAL RUNTIME: Trigger {i}: {trigger.model_dump(by_alias=True)}"
)
Comment on lines +603 to 605
Copy link

Copilot AI Mar 3, 2026

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Same issue here: trigger.model_dump(by_alias=True) is computed unconditionally inside an f-string, even if debug logging is off. Please gate the model_dump behind a debug-level check to avoid unnecessary serialization work in normal runs.

Suggested change
logger.debug(
f"EVAL RUNTIME: Trigger {i}: {trigger.model_dump(by_alias=True)}"
)
if logger.isEnabledFor(logging.DEBUG):
trigger_dump = trigger.model_dump(by_alias=True)
logger.debug(
"EVAL RUNTIME: Trigger %s: %s",
i,
trigger_dump,
)

Copilot uses AI. Check for mistakes.

logger.info("=" * 80)
logger.debug("=" * 80)

# IMPORTANT: Always include execution output with triggers when suspended
# This ensures triggers are visible in the output JSON for serverless executor
Expand All @@ -618,7 +618,7 @@ async def _execute_eval(
# The evalRun should remain in IN_PROGRESS state until the agent completes
# and evaluators run. When the execution resumes, the evaluators will run
# and the evalRun will be properly updated with results.
logger.info(
logger.debug(
"EVAL RUNTIME: Skipping evalRun update - keeping status as IN_PROGRESS until resume"
)

Expand Down Expand Up @@ -862,7 +862,7 @@ async def execute_runtime(
# 3. Build resume map: {interrupt_id: resume_data}
# 4. Pass this map to the delegate runtime
if self.context.resume:
logger.info(f"Resuming evaluation {eval_item.id}")
logger.debug(f"Resuming evaluation {eval_item.id}")
input = input_overrides if self.context.job_id is None else None
else:
input = inputs_with_overrides
Expand All @@ -876,7 +876,7 @@ async def execute_runtime(

# Log suspend status if applicable
if result.status == UiPathRuntimeStatus.SUSPENDED:
logger.info(f"Evaluation {eval_item.id} suspended")
logger.debug(f"Evaluation {eval_item.id} suspended")

except Exception as e:
end_time = time()
Expand Down Expand Up @@ -1042,7 +1042,7 @@ async def _restore_parent_span(
trace_flags=TraceFlags(0x01), # Sampled
)
parent_span = NonRecordingSpan(span_context)
logger.info(
logger.debug(
f"EVAL RUNTIME: Restored {span_type} span context for resume - "
f"trace_id={saved_context['trace_id']}, span_id={saved_context['span_id']}"
)
Expand Down Expand Up @@ -1083,7 +1083,7 @@ async def _save_span_context_for_resume(
},
)

logger.info(
logger.debug(
f"EVAL RUNTIME: Saved {span_type} span context for resume - "
f"trace_id={trace_id_hex}, span_id={span_id_hex}"
)
Expand Down