-
Notifications
You must be signed in to change notification settings - Fork 0
Expand file tree
/
Copy pathapi_monitor.py
More file actions
118 lines (104 loc) · 4.23 KB
/
api_monitor.py
File metadata and controls
118 lines (104 loc) · 4.23 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
"""
API Monitor - Track API responses, rate limits, and errors
"""
import json
import time
from pathlib import Path
from typing import Any, Dict, Optional
from datetime import datetime
class APIMonitor:
"""Monitor API calls for rate limiting and errors"""
def __init__(self, log_path: Optional[str] = None):
self.log_path = Path(log_path or "api_monitor.jsonl")
self.call_count = 0
self.error_count = 0
self.rate_limit_count = 0
self.total_tokens = 0
self.start_time = time.time()
def log_call(
self,
prompt_tokens: int = 0,
completion_tokens: int = 0,
total_tokens: int = 0,
latency: float = 0,
status: str = "success",
error: Optional[str] = None,
model: str = "",
**extra
):
"""Log an API call with all relevant metadata"""
self.call_count += 1
self.total_tokens += total_tokens
if status == "error":
self.error_count += 1
if status == "rate_limited":
self.rate_limit_count += 1
entry = {
"timestamp": datetime.now().isoformat(),
"call_number": self.call_count,
"model": model,
"prompt_tokens": prompt_tokens,
"completion_tokens": completion_tokens,
"total_tokens": total_tokens,
"latency": round(latency, 2),
"status": status,
"error": error,
**extra
}
# Append to JSONL log
with open(self.log_path, 'a') as f:
f.write(json.dumps(entry) + '\n')
return entry
def detect_rate_limit(self, error_msg: str) -> bool:
"""Check if error is a rate limit"""
rate_limit_indicators = [
"rate_limit",
"429",
"too many requests",
"quota exceeded",
"rate limit exceeded"
]
return any(indicator in error_msg.lower() for indicator in rate_limit_indicators)
def get_stats(self) -> Dict[str, Any]:
"""Get summary statistics"""
elapsed = time.time() - self.start_time
return {
"total_calls": self.call_count,
"errors": self.error_count,
"rate_limits": self.rate_limit_count,
"total_tokens": self.total_tokens,
"elapsed_seconds": round(elapsed, 1),
"calls_per_minute": round(self.call_count / (elapsed / 60), 2) if elapsed > 0 else 0,
"tokens_per_second": round(self.total_tokens / elapsed, 2) if elapsed > 0 else 0,
"avg_tokens_per_call": round(self.total_tokens / self.call_count, 0) if self.call_count > 0 else 0
}
def print_stats(self):
"""Print formatted statistics"""
stats = self.get_stats()
print("\n" + "="*70)
print("📊 API MONITOR STATS")
print("="*70)
print(f"Total calls: {stats['total_calls']}")
print(f"Errors: {stats['errors']} ({stats['errors']/stats['total_calls']*100:.1f}%)" if stats['total_calls'] > 0 else "Errors: 0")
print(f"Rate limits: {stats['rate_limits']}")
print(f"Total tokens: {stats['total_tokens']:,}")
print(f"Elapsed: {stats['elapsed_seconds']:.1f}s")
print(f"Calls/minute: {stats['calls_per_minute']}")
print(f"Tokens/second: {stats['tokens_per_second']}")
print(f"Avg tokens/call: {stats['avg_tokens_per_call']:,.0f}")
print("="*70)
def tail_log(self, n: int = 10):
"""Show last N log entries"""
if not self.log_path.exists():
print("No log file yet")
return
with open(self.log_path, 'r') as f:
lines = f.readlines()
print(f"\n📝 Last {min(n, len(lines))} API calls:")
for line in lines[-n:]:
entry = json.loads(line)
status_emoji = "✅" if entry["status"] == "success" else "❌"
print(f"{status_emoji} {entry['timestamp']} | {entry['model']} | "
f"{entry['total_tokens']} tokens | {entry['latency']}s | {entry['status']}")
if entry.get('error'):
print(f" Error: {entry['error']}")