|
53 | 53 | DtmfSent, |
54 | 54 | EndCall, |
55 | 55 | Interruption, |
| 56 | + LlmAvailabilityChanged, |
56 | 57 | PlayCompleted, |
| 58 | + SessionUsage, |
57 | 59 | ToolCall, |
| 60 | + ToolExecuted, |
58 | 61 | TurnCompleted, |
59 | 62 | TurnMetrics, |
| 63 | + UserBackchannel, |
60 | 64 | UserIdle, |
61 | 65 | UserStateChanged, |
62 | 66 | VoiceApp, |
@@ -271,8 +275,8 @@ async def init_agent(): |
271 | 275 | # "uncertain_turn_delay_ms": 800, # delay when uncertain |
272 | 276 | # "min_interruption_duration_ms": 300, # sustained speech before barge-in |
273 | 277 | # "false_interruption_timeout_ms": 800, # PAUSE -> COMMIT/RESUME wait |
274 | | - # "completed_turn_threshold": 0.7, # EOU probability for complete |
275 | | - # "incomplete_turn_threshold": 0.3, # EOU probability for incomplete |
| 278 | + # "completed_turn_threshold": 0.7, # turn detection probability for complete |
| 279 | + # "incomplete_turn_threshold": 0.3, # turn detection probability for incomplete |
276 | 280 | # } |
277 | 281 | semantic_vad="high", |
278 | 282 |
|
@@ -591,21 +595,44 @@ def on_user_idle(session, event: UserIdle): |
591 | 595 |
|
592 | 596 | @app.on("turn.metrics") |
593 | 597 | def on_metrics(session, event: TurnMetrics): |
594 | | - """Per-turn latency metrics (opt-in via metrics_events=True).""" |
595 | | - print( |
596 | | - f" Metrics [turn {event.turn_number}]: " |
597 | | - f"perceived={event.user_perceived_ms}ms " |
598 | | - f"stt={event.stt_delay_ms}ms " |
599 | | - f"llm_ttft={event.llm_ttft_ms}ms " |
600 | | - f"tts={event.tts_pipeline_ms}ms " |
601 | | - f"method={event.turn_method}" |
602 | | - ) |
| 598 | + """Per-turn latency metrics -- comprehensive pipeline observability. |
| 599 | +
|
| 600 | + Covers all pipeline metrics classes: |
| 601 | + - LLMMetrics (13 fields), STTMetrics (9), TTSMetrics (14), VADMetrics (5), |
| 602 | + Turn detection (6), InterruptionMetrics (8), RealtimeModelMetrics (15), |
| 603 | + ChatMessage.metrics (8 SDK-measured fields). |
| 604 | + """ |
| 605 | + parts = [ |
| 606 | + f"perceived={event.user_perceived_ms}ms", |
| 607 | + f"stt={event.stt_delay_ms}ms", |
| 608 | + f"turn={event.turn_decision_ms}ms", |
| 609 | + f"llm_ttft={event.llm_ttft_ms}ms", |
| 610 | + f"tts_ttfb={event.tts_ttfb_ms}ms", |
| 611 | + f"method={event.turn_method}", |
| 612 | + ] |
| 613 | + if event.llm_tokens_per_second: |
| 614 | + parts.append(f"tok/s={event.llm_tokens_per_second}") |
| 615 | + if event.llm_cache_hit_ratio: |
| 616 | + parts.append(f"cache={event.llm_cache_hit_ratio}") |
| 617 | + if event.endpointing_min_delay_ms is not None: |
| 618 | + parts.append(f"ep_min={event.endpointing_min_delay_ms}ms") |
| 619 | + parts.append(f"ep_max={event.endpointing_max_delay_ms}ms") |
| 620 | + if event.llm_cancelled: |
| 621 | + parts.append("llm_cancelled") |
| 622 | + if event.tts_cancelled: |
| 623 | + parts.append("tts_cancelled") |
| 624 | + if event.num_interruptions: |
| 625 | + parts.append(f"interruptions={event.num_interruptions}") |
| 626 | + if event.num_backchannels: |
| 627 | + parts.append(f"backchannels={event.num_backchannels}") |
| 628 | + print(f" Metrics [turn {event.turn_number}]: {' '.join(parts)}") |
603 | 629 |
|
604 | 630 |
|
605 | 631 | @app.on("turn.completed") |
606 | 632 | def on_turn(session, event: TurnCompleted): |
607 | | - print(f" User: {event.user_text}") |
608 | | - print(f" Agent: {event.agent_text}") |
| 633 | + prefix = "[agent-first] " if event.agent_first else "" |
| 634 | + print(f" {prefix}User: {event.user_text}") |
| 635 | + print(f" {prefix}Agent: {event.agent_text}") |
609 | 636 |
|
610 | 637 |
|
611 | 638 | @app.on("user.dtmf") |
@@ -674,6 +701,83 @@ def on_false_interruption(session, event): |
674 | 701 | print(" False interruption -- agent resumed") |
675 | 702 |
|
676 | 703 |
|
| 704 | +@app.on("tool.executed") |
| 705 | +def on_tool_executed(session, event: ToolExecuted): |
| 706 | + """Tool call results -- shows what tools were called and their outputs.""" |
| 707 | + for call in event.calls: |
| 708 | + output = call.get("output", "") |
| 709 | + is_error = call.get("is_error", False) |
| 710 | + status = "ERROR" if is_error else "ok" |
| 711 | + print( |
| 712 | + f" Tool executed: {call['name']}({call.get('arguments', '')}) " |
| 713 | + f"[{status}] {output[:100]}" |
| 714 | + ) |
| 715 | + |
| 716 | + |
| 717 | +@app.on("user.backchannel") |
| 718 | +def on_backchannel(session, event: UserBackchannel): |
| 719 | + """Overlapping speech detected during agent playback (adaptive mode only). |
| 720 | +
|
| 721 | + is_interruption=True means the user is genuinely interrupting. |
| 722 | + is_interruption=False means backchannel (e.g., "uh-huh", "yeah"). |
| 723 | + """ |
| 724 | + label = "INTERRUPTION" if event.is_interruption else "backchannel" |
| 725 | + print( |
| 726 | + f" Backchannel: {label} " |
| 727 | + f"(prob={event.probability}, delay={event.detection_delay_ms}ms)" |
| 728 | + ) |
| 729 | + |
| 730 | + |
| 731 | +@app.on("session.usage") |
| 732 | +def on_usage(session, event: SessionUsage): |
| 733 | + """Cumulative session usage -- full per-model breakdown for billing/cost tracking. |
| 734 | +
|
| 735 | + Each model entry contains ALL fields from the model_dump(). |
| 736 | + LLM: input_tokens, input_cached_tokens, output_tokens, session_duration, etc. |
| 737 | + TTS: characters_count, audio_duration, input/output_tokens, etc. |
| 738 | + STT: audio_duration, input/output_tokens, etc. |
| 739 | + Interruption: total_requests. |
| 740 | + """ |
| 741 | + if not event.models: |
| 742 | + return |
| 743 | + parts = [] |
| 744 | + for m in event.models: |
| 745 | + t = m.get("type", "") |
| 746 | + provider = m.get("provider", "?") |
| 747 | + model = m.get("model", "?") |
| 748 | + if t == "llm_usage": |
| 749 | + cached = m.get("input_cached_tokens", 0) |
| 750 | + parts.append( |
| 751 | + f"LLM({provider}/{model}): " |
| 752 | + f"{m.get('input_tokens', 0)}in/{m.get('output_tokens', 0)}out " |
| 753 | + f"cached={cached}" |
| 754 | + ) |
| 755 | + elif t == "tts_usage": |
| 756 | + parts.append( |
| 757 | + f"TTS({provider}/{model}): " |
| 758 | + f"{m.get('characters_count', 0)} chars, " |
| 759 | + f"{m.get('audio_duration', 0):.1f}s audio" |
| 760 | + ) |
| 761 | + elif t == "stt_usage": |
| 762 | + parts.append( |
| 763 | + f"STT({provider}/{model}): " |
| 764 | + f"{m.get('audio_duration', 0):.1f}s audio" |
| 765 | + ) |
| 766 | + elif t == "interruption_usage": |
| 767 | + parts.append( |
| 768 | + f"Interruption({provider}): {m.get('total_requests', 0)} reqs" |
| 769 | + ) |
| 770 | + if parts: |
| 771 | + print(f" Usage: {' | '.join(parts)}") |
| 772 | + |
| 773 | + |
| 774 | +@app.on("llm.availability_changed") |
| 775 | +def on_llm_availability(session, event: LlmAvailabilityChanged): |
| 776 | + """LLM fallback provider went up/down -- useful for monitoring fallback health.""" |
| 777 | + status = "available" if event.available else "UNAVAILABLE" |
| 778 | + print(f" LLM availability: {event.llm} -> {status}") |
| 779 | + |
| 780 | + |
677 | 781 | @app.on("session.error") |
678 | 782 | def on_error(session, event): |
679 | 783 | print(f" Error [{event.code}]: {event.message}") |
|
0 commit comments