33from copy import deepcopy
44from typing import TYPE_CHECKING
55
6- from sentry_sdk ._types import BLOB_DATA_SUBSTITUTE
7- from sentry_sdk .ai .consts import DATA_URL_BASE64_REGEX
86
97if TYPE_CHECKING :
108 from typing import Any , Callable , Dict , List , Optional , Tuple
@@ -198,104 +196,6 @@ def _find_truncation_index(messages: "List[Dict[str, Any]]", max_bytes: int) ->
198196 return 0
199197
200198
201- def _is_image_type_with_blob_content (item : "Dict[str, Any]" ) -> bool :
202- """
203- Some content blocks contain an image_url property with base64 content as its value.
204- This is used to identify those while not leading to unnecessary copying of data when the image URL does not contain base64 content.
205- """
206- if item .get ("type" ) != "image_url" :
207- return False
208-
209- image_url = item .get ("image_url" , {}).get ("url" , "" )
210- data_url_match = DATA_URL_BASE64_REGEX .match (image_url )
211-
212- return bool (data_url_match )
213-
214-
215- def redact_blob_message_parts (
216- messages : "List[Dict[str, Any]]" ,
217- ) -> "List[Dict[str, Any]]" :
218- """
219- Redact blob message parts from the messages by replacing blob content with "[Filtered]".
220-
221- This function creates a deep copy of messages that contain blob content to avoid
222- mutating the original message dictionaries. Messages without blob content are
223- returned as-is to minimize copying overhead.
224-
225- e.g:
226- {
227- "role": "user",
228- "content": [
229- {
230- "text": "How many ponies do you see in the image?",
231- "type": "text"
232- },
233- {
234- "type": "blob",
235- "modality": "image",
236- "mime_type": "image/jpeg",
237- "content": "data:image/jpeg;base64,..."
238- }
239- ]
240- }
241- becomes:
242- {
243- "role": "user",
244- "content": [
245- {
246- "text": "How many ponies do you see in the image?",
247- "type": "text"
248- },
249- {
250- "type": "blob",
251- "modality": "image",
252- "mime_type": "image/jpeg",
253- "content": "[Filtered]"
254- }
255- ]
256- }
257- """
258-
259- # First pass: check if any message contains blob content
260- has_blobs = False
261- for message in messages :
262- if not isinstance (message , dict ):
263- continue
264- content = message .get ("content" )
265- if isinstance (content , list ):
266- for item in content :
267- if isinstance (item , dict ) and (
268- item .get ("type" ) == "blob" or _is_image_type_with_blob_content (item )
269- ):
270- has_blobs = True
271- break
272- if has_blobs :
273- break
274-
275- # If no blobs found, return original messages to avoid unnecessary copying
276- if not has_blobs :
277- return messages
278-
279- # Deep copy messages to avoid mutating the original
280- messages_copy = deepcopy (messages )
281-
282- # Second pass: redact blob content in the copy
283- for message in messages_copy :
284- if not isinstance (message , dict ):
285- continue
286-
287- content = message .get ("content" )
288- if isinstance (content , list ):
289- for item in content :
290- if isinstance (item , dict ):
291- if item .get ("type" ) == "blob" :
292- item ["content" ] = BLOB_DATA_SUBSTITUTE
293- elif _is_image_type_with_blob_content (item ):
294- item ["image_url" ]["url" ] = BLOB_DATA_SUBSTITUTE
295-
296- return messages_copy
297-
298-
299199def truncate_messages_by_size (
300200 messages : "List[Dict[str, Any]]" ,
301201 max_bytes : int = MAX_GEN_AI_MESSAGE_BYTES ,
@@ -341,8 +241,6 @@ def truncate_and_annotate_messages(
341241 if not messages :
342242 return None
343243
344- messages = redact_blob_message_parts (messages )
345-
346244 truncated_message = _truncate_single_message_content_if_present (
347245 deepcopy (messages [- 1 ]), max_chars = max_single_message_chars
348246 )
@@ -361,8 +259,6 @@ def truncate_and_annotate_embedding_inputs(
361259 if not messages :
362260 return None
363261
364- messages = redact_blob_message_parts (messages )
365-
366262 truncated_messages , removed_count = truncate_messages_by_size (messages , max_bytes )
367263 if removed_count > 0 :
368264 scope ._gen_ai_original_message_count [span .span_id ] = len (messages )
0 commit comments