-
Notifications
You must be signed in to change notification settings - Fork 0
Expand file tree
/
Copy pathmonitored_agent.py
More file actions
65 lines (54 loc) · 2.13 KB
/
monitored_agent.py
File metadata and controls
65 lines (54 loc) · 2.13 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
"""
Monitored Agent - Wrapper around Agent with API monitoring
"""
import sys
import time
sys.path.insert(0, 'agent')
from agent import Agent
from api_monitor import APIMonitor
class MonitoredAgent:
"""Agent wrapper that monitors all API calls"""
def __init__(self, persona: str, model: str, monitor: APIMonitor):
self.agent = Agent(persona, model=model)
self.monitor = monitor
self.model = model
def run_sync(self, message: str):
"""Run with monitoring"""
start = time.time()
try:
# Make the API call
result = self.agent.run_sync(message)
latency = time.time() - start
# Try to get token usage from PydanticAI internals
# This is a bit hacky since PydanticAI doesn't expose usage directly
# We'll estimate based on response length for now
prompt_tokens = len(message) // 4 # Rough estimate
completion_tokens = len(result) // 4 # Rough estimate
total_tokens = prompt_tokens + completion_tokens
# Log successful call
self.monitor.log_call(
prompt_tokens=prompt_tokens,
completion_tokens=completion_tokens,
total_tokens=total_tokens,
latency=latency,
status="success",
model=self.model,
prompt_length=len(message),
response_length=len(result)
)
return result
except Exception as e:
latency = time.time() - start
error_msg = str(e)
# Detect if it's a rate limit error
status = "rate_limited" if self.monitor.detect_rate_limit(error_msg) else "error"
# Log the error
self.monitor.log_call(
latency=latency,
status=status,
error=error_msg,
model=self.model,
prompt_length=len(message)
)
# Re-raise so caller can handle it
raise