Skip to content

Commit 9ab4ddd

Browse files
ref(ai): Revert binary blob truncation
1 parent 070efed commit 9ab4ddd

3 files changed

Lines changed: 0 additions & 382 deletions

File tree

sentry_sdk/_types.py

Lines changed: 0 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -12,7 +12,6 @@
1212

1313

1414
SENSITIVE_DATA_SUBSTITUTE = "[Filtered]"
15-
BLOB_DATA_SUBSTITUTE = "[Blob substitute]"
1615

1716

1817
class AnnotatedValue:

sentry_sdk/ai/utils.py

Lines changed: 0 additions & 104 deletions
Original file line numberDiff line numberDiff line change
@@ -3,8 +3,6 @@
33
from copy import deepcopy
44
from typing import TYPE_CHECKING
55

6-
from sentry_sdk._types import BLOB_DATA_SUBSTITUTE
7-
from sentry_sdk.ai.consts import DATA_URL_BASE64_REGEX
86

97
if TYPE_CHECKING:
108
from typing import Any, Callable, Dict, List, Optional, Tuple
@@ -198,104 +196,6 @@ def _find_truncation_index(messages: "List[Dict[str, Any]]", max_bytes: int) ->
198196
return 0
199197

200198

201-
def _is_image_type_with_blob_content(item: "Dict[str, Any]") -> bool:
202-
"""
203-
Some content blocks contain an image_url property with base64 content as its value.
204-
This is used to identify those while not leading to unnecessary copying of data when the image URL does not contain base64 content.
205-
"""
206-
if item.get("type") != "image_url":
207-
return False
208-
209-
image_url = item.get("image_url", {}).get("url", "")
210-
data_url_match = DATA_URL_BASE64_REGEX.match(image_url)
211-
212-
return bool(data_url_match)
213-
214-
215-
def redact_blob_message_parts(
216-
messages: "List[Dict[str, Any]]",
217-
) -> "List[Dict[str, Any]]":
218-
"""
219-
Redact blob message parts from the messages by replacing blob content with "[Filtered]".
220-
221-
This function creates a deep copy of messages that contain blob content to avoid
222-
mutating the original message dictionaries. Messages without blob content are
223-
returned as-is to minimize copying overhead.
224-
225-
e.g:
226-
{
227-
"role": "user",
228-
"content": [
229-
{
230-
"text": "How many ponies do you see in the image?",
231-
"type": "text"
232-
},
233-
{
234-
"type": "blob",
235-
"modality": "image",
236-
"mime_type": "image/jpeg",
237-
"content": "data:image/jpeg;base64,..."
238-
}
239-
]
240-
}
241-
becomes:
242-
{
243-
"role": "user",
244-
"content": [
245-
{
246-
"text": "How many ponies do you see in the image?",
247-
"type": "text"
248-
},
249-
{
250-
"type": "blob",
251-
"modality": "image",
252-
"mime_type": "image/jpeg",
253-
"content": "[Filtered]"
254-
}
255-
]
256-
}
257-
"""
258-
259-
# First pass: check if any message contains blob content
260-
has_blobs = False
261-
for message in messages:
262-
if not isinstance(message, dict):
263-
continue
264-
content = message.get("content")
265-
if isinstance(content, list):
266-
for item in content:
267-
if isinstance(item, dict) and (
268-
item.get("type") == "blob" or _is_image_type_with_blob_content(item)
269-
):
270-
has_blobs = True
271-
break
272-
if has_blobs:
273-
break
274-
275-
# If no blobs found, return original messages to avoid unnecessary copying
276-
if not has_blobs:
277-
return messages
278-
279-
# Deep copy messages to avoid mutating the original
280-
messages_copy = deepcopy(messages)
281-
282-
# Second pass: redact blob content in the copy
283-
for message in messages_copy:
284-
if not isinstance(message, dict):
285-
continue
286-
287-
content = message.get("content")
288-
if isinstance(content, list):
289-
for item in content:
290-
if isinstance(item, dict):
291-
if item.get("type") == "blob":
292-
item["content"] = BLOB_DATA_SUBSTITUTE
293-
elif _is_image_type_with_blob_content(item):
294-
item["image_url"]["url"] = BLOB_DATA_SUBSTITUTE
295-
296-
return messages_copy
297-
298-
299199
def truncate_messages_by_size(
300200
messages: "List[Dict[str, Any]]",
301201
max_bytes: int = MAX_GEN_AI_MESSAGE_BYTES,
@@ -341,8 +241,6 @@ def truncate_and_annotate_messages(
341241
if not messages:
342242
return None
343243

344-
messages = redact_blob_message_parts(messages)
345-
346244
truncated_message = _truncate_single_message_content_if_present(
347245
deepcopy(messages[-1]), max_chars=max_single_message_chars
348246
)
@@ -361,8 +259,6 @@ def truncate_and_annotate_embedding_inputs(
361259
if not messages:
362260
return None
363261

364-
messages = redact_blob_message_parts(messages)
365-
366262
truncated_messages, removed_count = truncate_messages_by_size(messages, max_bytes)
367263
if removed_count > 0:
368264
scope._gen_ai_original_message_count[span.span_id] = len(messages)

0 commit comments

Comments
 (0)