Partners/monitored_agent.py at main · DanielVisca/Partners · GitHub

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
"""
Monitored Agent - Wrapper around Agent with API monitoring
"""
import sys
import time
sys.path.insert(0, 'agent')
from agent import Agent
from api_monitor import APIMonitor

class MonitoredAgent:
    """Agent wrapper that monitors all API calls"""

    def __init__(self, persona: str, model: str, monitor: APIMonitor):
        self.agent = Agent(persona, model=model)
        self.monitor = monitor
        self.model = model

    def run_sync(self, message: str):
        """Run with monitoring"""
        start = time.time()

        try:
            # Make the API call
            result = self.agent.run_sync(message)
            latency = time.time() - start

            # Try to get token usage from PydanticAI internals
            # This is a bit hacky since PydanticAI doesn't expose usage directly
            # We'll estimate based on response length for now
            prompt_tokens = len(message) // 4  # Rough estimate
            completion_tokens = len(result) // 4  # Rough estimate
            total_tokens = prompt_tokens + completion_tokens

            # Log successful call
            self.monitor.log_call(
                prompt_tokens=prompt_tokens,
                completion_tokens=completion_tokens,
                total_tokens=total_tokens,
                latency=latency,
                status="success",
                model=self.model,
                prompt_length=len(message),
                response_length=len(result)
            )

            return result

        except Exception as e:
            latency = time.time() - start
            error_msg = str(e)

            # Detect if it's a rate limit error
            status = "rate_limited" if self.monitor.detect_rate_limit(error_msg) else "error"

            # Log the error
            self.monitor.log_call(
                latency=latency,
                status=status,
                error=error_msg,
                model=self.model,
                prompt_length=len(message)
            )

            # Re-raise so caller can handle it
            raise