Skip to content

Commit ea6ea87

Browse files
feat: Add telemetry
1 parent 6db99ef commit ea6ea87

6 files changed

Lines changed: 458 additions & 29 deletions

File tree

confidence/confidence.py

Lines changed: 68 additions & 29 deletions
Original file line numberDiff line numberDiff line change
@@ -19,6 +19,7 @@
1919
import requests
2020
import httpx
2121
from typing_extensions import TypeGuard
22+
import time
2223

2324
from confidence import __version__
2425
from confidence.errors import (
@@ -30,6 +31,7 @@
3031
)
3132
from .flag_types import FlagResolutionDetails, Reason, ErrorCode
3233
from .names import FlagName, VariantName
34+
from .telemetry import Telemetry, ProtoTraceId, ProtoStatus
3335

3436
EU_RESOLVE_API_ENDPOINT = "https://resolver.eu.confidence.dev"
3537
US_RESOLVE_API_ENDPOINT = "https://resolver.us.confidence.dev"
@@ -111,6 +113,16 @@ def __init__(
111113
self.async_client = async_client
112114
self._setup_logger(logger)
113115
self._custom_resolve_base_url = custom_resolve_base_url
116+
self._telemetry = Telemetry(__version__)
117+
118+
def _get_resolve_headers(self) -> Dict[str, str]:
119+
telemetry_header = self._telemetry.get_monitoring_header()
120+
headers = {
121+
"Content-Type": "application/json",
122+
"Accept": "application/json",
123+
"X-CONFIDENCE-TELEMETRY": telemetry_header
124+
}
125+
return headers
114126

115127
def resolve_boolean_details(
116128
self, flag_key: str, default_value: bool
@@ -367,7 +379,6 @@ def _send_event_internal(self, event_name: str, data: Dict[str, FieldType]) -> N
367379
)
368380
if response.status_code == 200:
369381
json = response.json()
370-
371382
json_errors = json.get("errors")
372383
if json_errors:
373384
self.logger.warning("events emitted with errors:")
@@ -407,37 +418,55 @@ def _handle_resolve_response(
407418
def _resolve(
408419
self, flag_name: FlagName, context: Dict[str, FieldType]
409420
) -> ResolveResult:
410-
request_body = {
411-
"clientSecret": self._client_secret,
412-
"evaluationContext": context,
413-
"apply": self._apply_on_resolve,
414-
"flags": [str(flag_name)],
415-
"sdk": {"id": "SDK_ID_PYTHON_CONFIDENCE", "version": __version__},
416-
}
417-
base_url = self._api_endpoint
418-
if self._custom_resolve_base_url is not None:
419-
base_url = self._custom_resolve_base_url
420-
421-
resolve_url = f"{base_url}/v1/flags:resolve"
422-
timeout_sec = None if self._timeout_ms is None else self._timeout_ms / 1000.0
421+
start_time = time.perf_counter()
423422
try:
424-
response = requests.post(
425-
resolve_url, json=request_body, timeout=timeout_sec
426-
)
427-
return self._handle_resolve_response(response, flag_name)
428-
except requests.exceptions.Timeout:
429-
self.logger.warning(
430-
f"Request timed out after {timeout_sec}s"
431-
f" when resolving flag {flag_name}"
432-
)
433-
raise TimeoutError()
434-
except requests.exceptions.RequestException as e:
435-
self.logger.warning(f"Error resolving flag {flag_name}: {str(e)}")
436-
raise GeneralError(str(e))
423+
request_body = {
424+
"clientSecret": self._client_secret,
425+
"evaluationContext": context,
426+
"apply": self._apply_on_resolve,
427+
"flags": [str(flag_name)],
428+
"sdk": {"id": "SDK_ID_PYTHON_CONFIDENCE", "version": __version__},
429+
}
430+
base_url = self._api_endpoint
431+
if self._custom_resolve_base_url is not None:
432+
base_url = self._custom_resolve_base_url
433+
434+
resolve_url = f"{base_url}/v1/flags:resolve"
435+
timeout_sec = None if self._timeout_ms is None else self._timeout_ms / 1000.0
436+
437+
try:
438+
response = requests.post(
439+
resolve_url,
440+
json=request_body,
441+
headers=self._get_resolve_headers(),
442+
timeout=timeout_sec
443+
)
444+
445+
result = self._handle_resolve_response(response, flag_name)
446+
duration_ms = int((time.perf_counter() - start_time) * 1000)
447+
self._telemetry.add_trace(ProtoTraceId.PROTO_TRACE_ID_RESOLVE_LATENCY, duration_ms, ProtoStatus.PROTO_STATUS_SUCCESS)
448+
return result
449+
except requests.exceptions.Timeout:
450+
duration_ms = int((time.perf_counter() - start_time) * 1000)
451+
self._telemetry.add_trace(ProtoTraceId.PROTO_TRACE_ID_RESOLVE_LATENCY, duration_ms, ProtoStatus.PROTO_STATUS_TIMEOUT)
452+
self.logger.warning(
453+
f"Request timed out after {timeout_sec}s"
454+
f" when resolving flag {flag_name}"
455+
)
456+
raise TimeoutError()
457+
except requests.exceptions.RequestException as e:
458+
duration_ms = int((time.perf_counter() - start_time) * 1000)
459+
self._telemetry.add_trace(ProtoTraceId.PROTO_TRACE_ID_RESOLVE_LATENCY, duration_ms, ProtoStatus.PROTO_STATUS_ERROR)
460+
self.logger.warning(f"Error resolving flag {flag_name}: {str(e)}")
461+
raise GeneralError(str(e))
462+
except Exception as e:
463+
# Just re-raise any other exceptions without adding another trace
464+
raise e
437465

438466
async def _resolve_async(
439467
self, flag_name: FlagName, context: Dict[str, FieldType]
440468
) -> ResolveResult:
469+
start_time = time.perf_counter()
441470
request_body = {
442471
"clientSecret": self._client_secret,
443472
"evaluationContext": context,
@@ -453,16 +482,26 @@ async def _resolve_async(
453482
timeout_sec = None if self._timeout_ms is None else self._timeout_ms / 1000.0
454483
try:
455484
response = await self.async_client.post(
456-
resolve_url, json=request_body, timeout=timeout_sec
485+
resolve_url,
486+
json=request_body,
487+
headers=self._get_resolve_headers(),
488+
timeout=timeout_sec
457489
)
458-
return self._handle_resolve_response(response, flag_name)
490+
result = self._handle_resolve_response(response, flag_name)
491+
duration_ms = int((time.perf_counter() - start_time) * 1000)
492+
self._telemetry.add_trace(ProtoTraceId.PROTO_TRACE_ID_RESOLVE_LATENCY, duration_ms, ProtoStatus.PROTO_STATUS_SUCCESS)
493+
return result
459494
except httpx.TimeoutException:
495+
duration_ms = int((time.perf_counter() - start_time) * 1000)
496+
self._telemetry.add_trace(ProtoTraceId.PROTO_TRACE_ID_RESOLVE_LATENCY, duration_ms, ProtoStatus.PROTO_STATUS_TIMEOUT)
460497
self.logger.warning(
461498
f"Request timed out after {timeout_sec}s"
462499
f" when resolving flag {flag_name}"
463500
)
464501
raise TimeoutError()
465502
except httpx.HTTPError as e:
503+
duration_ms = int((time.perf_counter() - start_time) * 1000)
504+
self._telemetry.add_trace(ProtoTraceId.PROTO_TRACE_ID_RESOLVE_LATENCY, duration_ms, ProtoStatus.PROTO_STATUS_ERROR)
466505
self.logger.warning(f"Error resolving flag {flag_name}: {str(e)}")
467506
raise GeneralError(str(e))
468507

confidence/telemetry.proto

Lines changed: 66 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,66 @@
1+
syntax = "proto3";
2+
3+
package confidence.telemetry.v1;
4+
5+
option go_package = "github.com/spotify/confidence-sdk-go/pkg/confidence";
6+
7+
enum ProtoPlatform {
8+
PROTO_PLATFORM_UNSPECIFIED = 0;
9+
PROTO_PLATFORM_JS_WEB = 4;
10+
PROTO_PLATFORM_JS_SERVER = 5;
11+
PROTO_PLATFORM_PYTHON = 6;
12+
PROTO_PLATFORM_GO = 7;
13+
}
14+
15+
message ProtoMonitoring {
16+
repeated ProtoLibraryTraces library_traces = 1;
17+
ProtoPlatform platform = 2;
18+
}
19+
20+
message ProtoLibraryTraces {
21+
ProtoLibrary library = 1;
22+
string library_version = 2;
23+
repeated ProtoTrace traces = 3;
24+
25+
message ProtoTrace {
26+
ProtoTraceId id = 1;
27+
28+
// DEPRECATED
29+
optional uint64 millisecond_duration = 2;
30+
31+
oneof trace {
32+
ProtoRequestTrace request_trace = 3;
33+
ProtoCountTrace count_trace = 4;
34+
}
35+
36+
message ProtoCountTrace {}
37+
38+
message ProtoRequestTrace {
39+
uint64 millisecond_duration = 1;
40+
ProtoStatus status = 2;
41+
42+
enum ProtoStatus {
43+
PROTO_STATUS_UNSPECIFIED = 0;
44+
PROTO_STATUS_SUCCESS = 1;
45+
PROTO_STATUS_ERROR = 2;
46+
PROTO_STATUS_TIMEOUT = 3;
47+
PROTO_STATUS_CACHED = 4;
48+
}
49+
}
50+
}
51+
52+
enum ProtoLibrary {
53+
PROTO_LIBRARY_UNSPECIFIED = 0;
54+
PROTO_LIBRARY_CONFIDENCE = 1;
55+
PROTO_LIBRARY_OPEN_FEATURE = 2;
56+
PROTO_LIBRARY_REACT = 3;
57+
}
58+
59+
enum ProtoTraceId {
60+
PROTO_TRACE_ID_UNSPECIFIED = 0;
61+
PROTO_TRACE_ID_RESOLVE_LATENCY = 1;
62+
PROTO_TRACE_ID_STALE_FLAG = 2;
63+
PROTO_TRACE_ID_FLAG_TYPE_MISMATCH = 3;
64+
PROTO_TRACE_ID_WITH_CONTEXT = 4;
65+
}
66+
}

confidence/telemetry.py

Lines changed: 61 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,61 @@
1+
import time
2+
from typing import List
3+
import base64
4+
from queue import Queue
5+
from confidence.telemetry_pb2 import (
6+
ProtoMonitoring,
7+
ProtoLibraryTraces,
8+
ProtoPlatform,
9+
)
10+
11+
# Get the nested classes from ProtoLibraryTraces
12+
ProtoTrace = ProtoLibraryTraces.ProtoTrace
13+
ProtoLibrary = ProtoLibraryTraces.ProtoLibrary
14+
ProtoTraceId = ProtoLibraryTraces.ProtoTraceId
15+
ProtoStatus = ProtoLibraryTraces.ProtoTrace.ProtoRequestTrace.ProtoStatus
16+
17+
class Telemetry:
18+
_instance = None
19+
_initialized = False
20+
21+
def __new__(cls, version: str):
22+
if cls._instance is None:
23+
cls._instance = super(Telemetry, cls).__new__(cls)
24+
return cls._instance
25+
26+
def __init__(self, version: str):
27+
if not self._initialized:
28+
self.version = version
29+
self._traces_queue = Queue()
30+
self._initialized = True
31+
32+
def add_trace(self, trace_id: ProtoTraceId, duration_ms: int, status: ProtoStatus) -> None:
33+
trace = ProtoTrace()
34+
trace.id = trace_id
35+
trace.millisecond_duration = duration_ms
36+
request_trace = ProtoTrace.ProtoRequestTrace()
37+
request_trace.status = status
38+
trace.request_trace.CopyFrom(request_trace)
39+
self._traces_queue.put(trace)
40+
41+
def get_monitoring_header(self) -> str:
42+
# Get all current traces atomically
43+
current_traces = []
44+
while not self._traces_queue.empty():
45+
try:
46+
current_traces.append(self._traces_queue.get_nowait())
47+
except:
48+
break
49+
50+
# Create monitoring data with the captured traces
51+
monitoring = ProtoMonitoring()
52+
library_traces = monitoring.library_traces.add()
53+
library_traces.library = ProtoLibrary.PROTO_LIBRARY_CONFIDENCE
54+
library_traces.library_version = self.version
55+
library_traces.traces.extend(current_traces)
56+
monitoring.platform = ProtoPlatform.PROTO_PLATFORM_PYTHON
57+
58+
# Serialize to protobuf and base64 encode
59+
serialized = monitoring.SerializeToString()
60+
encoded = base64.b64encode(serialized).decode()
61+
return encoded

confidence/telemetry_pb2.py

Lines changed: 53 additions & 0 deletions
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

generate_proto.py

Lines changed: 40 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,40 @@
1+
#!/usr/bin/env python3
2+
import os
3+
import subprocess
4+
import sys
5+
6+
def generate_proto():
7+
proto_file = "confidence/telemetry.proto"
8+
output_dir = "confidence"
9+
10+
# Check if protoc is installed
11+
try:
12+
version = subprocess.check_output(["protoc", "--version"]).decode().strip()
13+
print(f"Found protoc version: {version}")
14+
except FileNotFoundError:
15+
print("Error: protoc compiler not found. Please install it first.")
16+
print("You can install it via:")
17+
print(" - macOS: brew install protobuf")
18+
print(" - Linux: apt-get install protobuf-compiler")
19+
print(" - Windows: Download from https://github.com/protocolbuffers/protobuf/releases")
20+
sys.exit(1)
21+
22+
# Generate Python code
23+
cmd = [
24+
"protoc",
25+
f"--python_out={output_dir}",
26+
f"--proto_path={os.path.dirname(proto_file)}",
27+
proto_file
28+
]
29+
30+
print(f"Generating Python code from {proto_file}...")
31+
try:
32+
subprocess.check_call(cmd)
33+
output_file = os.path.join(output_dir, os.path.basename(os.path.splitext(proto_file)[0]) + "_pb2.py")
34+
print(f"Successfully generated {output_file}")
35+
except subprocess.CalledProcessError as e:
36+
print(f"Error generating proto code: {e}")
37+
sys.exit(1)
38+
39+
if __name__ == "__main__":
40+
generate_proto()

0 commit comments

Comments
 (0)