Skip to content
9 changes: 9 additions & 0 deletions sentry_sdk/integrations/_wsgi_common.py
Original file line number Diff line number Diff line change
Expand Up @@ -209,6 +209,15 @@
)


def _serialize_request_body_data(data: "Any") -> str:
def _default(value: "Any") -> "Any":
if isinstance(value, AnnotatedValue):
return value.value
return str(value)

return json.dumps(data, default=_default)


Check warning on line 220 in sentry_sdk/integrations/_wsgi_common.py

View check run for this annotation

@sentry/warden / warden: security-review

Request body with sensitive fields bypasses EventScrubber when written to streamed span attributes

When span streaming is enabled, `_serialize_request_body_data` serializes the full parsed request body (including form fields like `password`, `token`, `api_key`) into a span attribute that is sent via `_capture_telemetry` — a path that never calls `EventScrubber.scrub_event`, so denylist-matched fields are transmitted to Sentry unredacted. Apply `event_scrubber.scrub_dict` to the parsed body dict before serializing it, or scrub the deserialized attribute after the fact.
Comment thread
ericapisani marked this conversation as resolved.
def _filter_headers(
headers: "Mapping[str, str]",
use_annotated_value: bool = True,
Expand Down
79 changes: 76 additions & 3 deletions sentry_sdk/integrations/flask.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,11 +5,16 @@
from sentry_sdk.integrations._wsgi_common import (
DEFAULT_HTTP_METHODS_TO_CAPTURE,
RequestExtractor,
_serialize_request_body_data,
request_body_within_bounds,
)
from sentry_sdk.integrations.wsgi import SentryWsgiMiddleware
from sentry_sdk.scope import should_send_default_pii
from sentry_sdk.traces import StreamedSpan, _get_current_streamed_span
from sentry_sdk.tracing import SOURCE_FOR_STYLE
from sentry_sdk.tracing_utils import has_span_streaming_enabled
from sentry_sdk.utils import (
AnnotatedValue,
capture_internal_exceptions,
ensure_integration_enabled,
event_from_exception,
Expand All @@ -36,9 +41,11 @@
from flask.signals import (
before_render_template,
got_request_exception,
request_finished,
request_started,
)
from markupsafe import Markup
from werkzeug.exceptions import ClientDisconnected
except ImportError:
raise DidNotEnable("Flask is not installed")

Expand Down Expand Up @@ -88,17 +95,17 @@

before_render_template.connect(_add_sentry_trace)
request_started.connect(_request_started)
Comment thread
ericapisani marked this conversation as resolved.
request_finished.connect(_request_finished)
got_request_exception.connect(_capture_exception)

old_app = Flask.__call__

def sentry_patched_wsgi_app(
self: "Any", environ: "Dict[str, str]", start_response: "Callable[..., Any]"
) -> "_ScopedResponse":
if sentry_sdk.get_client().get_integration(FlaskIntegration) is None:
return old_app(self, environ, start_response)

integration = sentry_sdk.get_client().get_integration(FlaskIntegration)
if integration is None:
return old_app(self, environ, start_response)

middleware = SentryWsgiMiddleware(
lambda *a, **kw: old_app(self, *a, **kw),
Expand Down Expand Up @@ -160,6 +167,72 @@
scope.add_event_processor(evt_processor)


def _request_finished(sender: "Flask", response: "Any", **kwargs: "Any") -> None:
integration = sentry_sdk.get_client().get_integration(FlaskIntegration)
if integration is None:
return

client = sentry_sdk.get_client()
if has_span_streaming_enabled(client.options):
request = flask_request._get_current_object()
_set_request_body_data_on_streaming_segment(request, client)
Comment thread
ericapisani marked this conversation as resolved.


def _set_request_body_data_on_streaming_segment(
request: "Request", client: "sentry_sdk.client.BaseClient"
) -> None:
current_span = _get_current_streamed_span()
if type(current_span) is not StreamedSpan:
return

with capture_internal_exceptions():
content_length = int(request.content_length or 0)

# Proceeding without a content length means that we may be consuming the request
# without respecting the bounds specified by the user via `max_request_body_size`
# option in the SDK.
if not content_length:
return

if not request_body_within_bounds(client, content_length):
data = AnnotatedValue.substituted_because_over_size_limit()
Copy link
Copy Markdown
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This general logic mostly follows what happens with the request extractor, but Instead of removing the string entirely, it substitutes it with a value to explain why a value was removed, as opposed to providing only an empty string.

else:
raw_data = getattr(request, "_cached_data", None)
parsed_body = None
if "form" in request.__dict__:
extractor = FlaskRequestExtractor(request)
parsed_body = extractor.parsed_body()
elif raw_data is not None:
extractor = FlaskRequestExtractor(request)
if extractor.is_json():
parsed_body = extractor.json()
else:
# The route never read the body via Werkzeug, but it
# may have consumed wsgi.input directly. get_data()
# raises ClientDisconnected if the stream is exhausted.
try:
raw_data = request.get_data()
except ClientDisconnected:
raw_data = None

if raw_data:
extractor = FlaskRequestExtractor(request)
if extractor.is_json():
parsed_body = extractor.json()

if parsed_body is not None:
data = parsed_body
elif raw_data:
data = AnnotatedValue.substituted_because_raw_data()
else:
return

current_span._segment.set_attribute(

Check warning on line 230 in sentry_sdk/integrations/flask.py

View check run for this annotation

@sentry/warden / warden: security-review

[W3M-9D6] Request body with sensitive fields bypasses EventScrubber when written to streamed span attributes (additional location)

When span streaming is enabled, `_serialize_request_body_data` serializes the full parsed request body (including form fields like `password`, `token`, `api_key`) into a span attribute that is sent via `_capture_telemetry` — a path that never calls `EventScrubber.scrub_event`, so denylist-matched fields are transmitted to Sentry unredacted. Apply `event_scrubber.scrub_dict` to the parsed body dict before serializing it, or scrub the deserialized attribute after the fact.
"http.request.body.data",
_serialize_request_body_data(data),
)


class FlaskRequestExtractor(RequestExtractor):
def env(self) -> "Dict[str, str]":
return self.request.environ
Expand Down
12 changes: 1 addition & 11 deletions sentry_sdk/integrations/starlette.py
Original file line number Diff line number Diff line change
@@ -1,5 +1,4 @@
import functools
import json
import sys
import warnings
from collections.abc import Set
Expand All @@ -18,6 +17,7 @@
DEFAULT_HTTP_METHODS_TO_CAPTURE,
HttpCodeRangeContainer,
_is_json_content_type,
_serialize_request_body_data,

Check warning on line 20 in sentry_sdk/integrations/starlette.py

View check run for this annotation

@sentry/warden / warden: security-review

[W3M-9D6] Request body with sensitive fields bypasses EventScrubber when written to streamed span attributes (additional location)

When span streaming is enabled, `_serialize_request_body_data` serializes the full parsed request body (including form fields like `password`, `token`, `api_key`) into a span attribute that is sent via `_capture_telemetry` — a path that never calls `EventScrubber.scrub_event`, so denylist-matched fields are transmitted to Sentry unredacted. Apply `event_scrubber.scrub_dict` to the parsed body dict before serializing it, or scrub the deserialized attribute after the fact.
request_body_within_bounds,
)
from sentry_sdk.integrations.asgi import SentryAsgiMiddleware
Expand Down Expand Up @@ -241,16 +241,6 @@
return middleware_class


def _serialize_request_body_data(data: "Any") -> str:
# data may be a JSON-serializable value, an AnnotatedValue, or a dict with AnnotatedValue values
def _default(value: "Any") -> "Any":
if isinstance(value, AnnotatedValue):
return value.value
return str(value)

return json.dumps(data, default=_default)


def _set_request_body_data_on_streaming_segment(
info: "Optional[Dict[str, Any]]",
) -> None:
Expand Down
Loading
Loading