simplify

shellmayr · shellmayr · commit c7a1b581a36c · 2026-03-05T10:25:11.000+01:00
diff --git a/sentry_sdk/ai/span_config.py b/sentry_sdk/ai/span_config.py
@@ -14,44 +14,39 @@
     from sentry_sdk.tracing import Span
 
 
-def set_input_span_data(span, kwargs, integration, config):
-    # type: (Span, Dict[str, Any], Any, Dict[str, Any]) -> None
+def set_input_span_data(span, kwargs, integration, config, span_data=None):
+    # type: (Span, Dict[str, Any], Any, Dict[str, Any], Dict[str, Any] | None) -> None
     """
     Set input span data from a declarative config.
 
     Config keys:
-        system: str - gen_ai.system value
-        operation: str - gen_ai.operation.name value
+        static: dict - key/value pairs to set unconditionally
         params: dict - kwargs key -> span attr (always set if present)
         pii_params: dict - kwargs key -> span attr (only when PII allowed)
         extract_messages: callable(kwargs) -> list or None
         message_target: str - span attr for messages (default: GEN_AI_REQUEST_MESSAGES)
-        truncation_fn: callable or None - truncation function (default: truncate_and_annotate_messages, None to skip)
-        is_given: callable(value) -> bool - for NotGiven sentinels
-        extra_static: dict - additional key/value pairs to set
+
+    span_data: additional key/value pairs for dynamic per-call values
     """
-    set_data_normalized(span, SPANDATA.GEN_AI_SYSTEM, config["system"])
-    set_data_normalized(span, SPANDATA.GEN_AI_OPERATION_NAME, config["operation"])
+    for key, value in config.get("static", {}).items():
+        set_data_normalized(span, key, value)
+    if span_data:
+        for key, value in span_data.items():
+            set_data_normalized(span, key, value)
 
-    is_given = config.get("is_given")
     for kwarg_key, span_attr in config.get("params", {}).items():
         if kwarg_key in kwargs:
             value = kwargs[kwarg_key]
-            if is_given is None or is_given(value):
-                set_data_normalized(span, span_attr, value)
+            set_data_normalized(span, span_attr, value)
 
     if should_send_default_pii() and integration.include_prompts:
         extract = config.get("extract_messages")
         if extract is not None:
             messages = extract(kwargs)
             if messages:
                 messages = normalize_message_roles(messages)
-                truncation_fn = config.get(
-                    "truncation_fn", truncate_and_annotate_messages
-                )
-                if truncation_fn is not None:
-                    scope = sentry_sdk.get_current_scope()
-                    messages = truncation_fn(messages, span, scope)
+                scope = sentry_sdk.get_current_scope()
+                messages = truncate_and_annotate_messages(messages, span, scope)
                 if messages is not None:
                     target = config.get(
                         "message_target", SPANDATA.GEN_AI_REQUEST_MESSAGES
@@ -61,8 +56,4 @@ def set_input_span_data(span, kwargs, integration, config):
         for kwarg_key, span_attr in config.get("pii_params", {}).items():
             if kwarg_key in kwargs:
                 value = kwargs[kwarg_key]
-                if is_given is None or is_given(value):
-                    set_data_normalized(span, span_attr, value)
-
-    for key, value in config.get("extra_static", {}).items():
-        set_data_normalized(span, key, value)
+                set_data_normalized(span, span_attr, value)
diff --git a/sentry_sdk/ai/utils.py b/sentry_sdk/ai/utils.py
@@ -503,7 +503,8 @@ def normalize_message_role(role: str) -> str:
     Normalize a message role to one of the 4 allowed gen_ai role values.
     Maps "ai" -> "assistant" and keeps other standard roles unchanged.
     """
-    return GEN_AI_MESSAGE_ROLE_MAPPING.get(role, role)
+    role_lower = role.lower()
+    return GEN_AI_MESSAGE_ROLE_MAPPING.get(role_lower, role_lower)
 
 
 def normalize_message_roles(messages: "list[dict[str, Any]]") -> "list[dict[str, Any]]":
diff --git a/sentry_sdk/integrations/cohere/__init__.py b/sentry_sdk/integrations/cohere/__init__.py
@@ -32,8 +32,10 @@ def _normalize_embedding_input(texts):
 
 
 COHERE_EMBED_CONFIG = {
-    "system": "cohere",
-    "operation": "embeddings",
+    "static": {
+        SPANDATA.GEN_AI_SYSTEM: "cohere",
+        SPANDATA.GEN_AI_OPERATION_NAME: "embeddings",
+    },
     "params": {"model": SPANDATA.GEN_AI_REQUEST_MODEL},
     "extract_messages": lambda kw: (
         _normalize_embedding_input(kw["texts"]) if "texts" in kw else None
diff --git a/sentry_sdk/integrations/cohere/utils.py b/sentry_sdk/integrations/cohere/utils.py
@@ -3,7 +3,7 @@
 from typing import TYPE_CHECKING
 
 if TYPE_CHECKING:
-    from typing import Any
+    from typing import Any, Mapping, Sequence
 
 
 def transitive_getattr(obj, *attrs):
@@ -17,7 +17,7 @@ def transitive_getattr(obj, *attrs):
 
 
 def get_first_from_sources(obj, source_paths, require_truthy=False):
-    # type: (Any, list[tuple[str, ...]], bool) -> Any
+    # type: (Any, Sequence[tuple[str, ...]], bool) -> Any
     for source_path in source_paths:
         value = transitive_getattr(obj, *source_path)
         if not value:
@@ -28,7 +28,7 @@ def get_first_from_sources(obj, source_paths, require_truthy=False):
 
 
 def set_span_data_from_sources(span, obj, target_sources, require_truthy):
-    # type: (Any, Any, dict[str, list[tuple[str, ...]]], bool) -> None
+    # type: (Any, Any, Mapping[str, Sequence[tuple[str, ...]]], bool) -> None
     for spandata_key, source_paths in target_sources.items():
         value = get_first_from_sources(obj, source_paths, require_truthy=require_truthy)
         if value is not None:
diff --git a/sentry_sdk/integrations/cohere/v1.py b/sentry_sdk/integrations/cohere/v1.py
@@ -38,6 +38,14 @@
 except ImportError:
     _has_chat_types = False
 
+COHERE_V1_CHAT_CONFIG = {
+    "static": {
+        SPANDATA.GEN_AI_SYSTEM: "cohere",
+        SPANDATA.GEN_AI_OPERATION_NAME: "chat",
+    },
+    "extract_messages": lambda kw: _extract_messages(kw),
+}
+
 CHAT_RESPONSE_SOURCES = {
     SPANDATA.GEN_AI_RESPONSE_ID: [("generation_id",)],
     SPANDATA.GEN_AI_RESPONSE_FINISH_REASONS: [("finish_reason",)],
@@ -105,38 +113,31 @@ def new_chat(*args, **kwargs):
                 reraise(*exc_info)
 
             with capture_internal_exceptions():
+                span_data = {SPANDATA.GEN_AI_RESPONSE_STREAMING: streaming}
                 if model:
-                    set_data_normalized(span, SPANDATA.GEN_AI_REQUEST_MODEL, model)
+                    span_data[SPANDATA.GEN_AI_REQUEST_MODEL] = model
                 set_input_span_data(
-                    span,
-                    kwargs,
-                    integration,
-                    {
-                        "system": "cohere",
-                        "operation": "chat",
-                        "extract_messages": _extract_messages_v1,
-                        "extra_static": {SPANDATA.GEN_AI_RESPONSE_STREAMING: streaming},
-                    },
+                    span, kwargs, integration, COHERE_V1_CHAT_CONFIG, span_data
                 )
 
                 if streaming:
-                    return _iter_v1_stream_events(res, span, include_pii)
+                    return _iter_stream_events(res, span, include_pii)
                 if isinstance(res, NonStreamedChatResponse):
-                    _collect_v1_response_fields(span, res, include_pii=include_pii)
+                    _collect_response_fields(span, res, include_pii=include_pii)
                 else:
                     set_data_normalized(span, "unknown_response", True)
                 return res
 
     return new_chat
 
 
-def _extract_messages_v1(kwargs):
+def _extract_messages(kwargs):
     # type: (dict[str, Any]) -> list[dict[str, str]]
     messages = []
     for x in kwargs.get("chat_history", []):
         messages.append(
             {
-                "role": getattr(x, "role", "").lower(),
+                "role": getattr(x, "role", ""),
                 "content": transform_message_content(getattr(x, "message", "")),
             }
         )
@@ -146,7 +147,7 @@ def _extract_messages_v1(kwargs):
     return messages
 
 
-def _iter_v1_stream_events(old_iterator, span, include_pii):
+def _iter_stream_events(old_iterator, span, include_pii):
     # type: (Any, Any, bool) -> Iterator[StreamedChatResponse]
     with capture_internal_exceptions():
         for x in old_iterator:
@@ -161,10 +162,10 @@ def _collect_v1_stream_end_fields(span, event, include_pii):
     # type: (Any, Any, bool) -> None
     response = get_first_from_sources(event, STREAM_RESPONSE_SOURCES)
     if response is not None:
-        _collect_v1_response_fields(span, response, include_pii)
+        _collect_response_fields(span, response, include_pii)
 
 
-def _collect_v1_response_fields(span, response, include_pii):
+def _collect_response_fields(span, response, include_pii):
     # type: (Any, Any, bool) -> None
     if include_pii:
         text = get_first_from_sources(response, CHAT_RESPONSE_TEXT_SOURCES)
diff --git a/sentry_sdk/integrations/cohere/v2.py b/sentry_sdk/integrations/cohere/v2.py
@@ -46,6 +46,17 @@
 except ImportError:
     _has_v2 = False
 
+COHERE_V2_CHAT_CONFIG = {
+    "static": {
+        SPANDATA.GEN_AI_SYSTEM: "cohere",
+        SPANDATA.GEN_AI_OPERATION_NAME: "chat",
+    },
+    "pii_params": {
+        "tools": SPANDATA.GEN_AI_REQUEST_AVAILABLE_TOOLS,
+    },
+    "extract_messages": lambda kw: _extract_messages_v2(kw.get("messages", [])),
+}
+
 CHAT_RESPONSE_SOURCES = {
     SPANDATA.GEN_AI_RESPONSE_ID: [("id",)],
     SPANDATA.GEN_AI_RESPONSE_FINISH_REASONS: [("finish_reason",)],
@@ -55,7 +66,7 @@
 }
 CHAT_USAGE_SOURCES = [("usage",)]
 STREAM_DELTA_TEXT_SOURCES = [("delta", "message", "content", "text")]
-STREAM_CHAT_RESPONSE_SOURCES = {
+STREAM_CHAT_RESPONSE_SOURCES: "dict[str, list[tuple[str, ...]]]" = {
     SPANDATA.GEN_AI_RESPONSE_ID: [("id",)],
     SPANDATA.GEN_AI_RESPONSE_FINISH_REASONS: [("delta", "finish_reason")],
 }
@@ -104,29 +115,17 @@ def new_chat(*args, **kwargs):
                 reraise(*exc_info)
 
             with capture_internal_exceptions():
-                extra = {SPANDATA.GEN_AI_RESPONSE_STREAMING: streaming}
-                if model:
-                    set_data_normalized(span, SPANDATA.GEN_AI_REQUEST_MODEL, model)
-                    extra[SPANDATA.GEN_AI_RESPONSE_MODEL] = model
+                span_data = {
+                    SPANDATA.GEN_AI_RESPONSE_STREAMING: streaming,
+                    SPANDATA.GEN_AI_REQUEST_MODEL: model if model else None,
+                    SPANDATA.GEN_AI_RESPONSE_MODEL: model if model else None,
+                }
                 set_input_span_data(
-                    span,
-                    kwargs,
-                    integration,
-                    {
-                        "system": "cohere",
-                        "operation": "chat",
-                        "pii_params": {
-                            "tools": SPANDATA.GEN_AI_REQUEST_AVAILABLE_TOOLS,
-                        },
-                        "extract_messages": lambda kw: _extract_messages_v2(
-                            kw.get("messages", [])
-                        ),
-                        "extra_static": extra,
-                    },
+                    span, kwargs, integration, COHERE_V2_CHAT_CONFIG, span_data
                 )
                 if streaming:
                     return _iter_v2_stream_events(res, span, include_pii)
-                _collect_v2_response_fields(span, res, include_pii=include_pii)
+                _collect_v2_response_fields(span, res, include_pii)
                 return res
 
     return new_chat
@@ -146,7 +145,7 @@ def _extract_messages_v2(messages):
 
 def _iter_v2_stream_events(old_iterator, span, include_pii):
     # type: (Any, Span, bool) -> Iterator[V2ChatStreamResponse]
-    collected_text = []
+    collected_text = []  # type: list[str]
     with capture_internal_exceptions():
         for x in old_iterator:
             _append_stream_delta_text(collected_text, x)