Skip to content

Commit 3971a12

Browse files
sentrivanaclaudealexander-alderman-webb
authored
fix(stdlib): Instrument response body read for chunked HTTP responses (#6202)
The HTTP client span in the stdlib integration was finishing in `getresponse()`, which only waits for response headers, not the actual response. For chunked or large responses, the actual data transfer happens during `read()`, leaving that time uninstrumented. Defer span completion to `HTTPResponse.read()` for responses with a body (chunked or Content-Length > 0), with `HTTPResponse.close()` as a safety net for responses that are never read. ⚠️ Note that this means we might report some requests to be longer than they actually were, since in some cases we only close them once the GC gets to them (and `close()` is called). In a sense we're essentially flipping the current situation (where we report requests to be much shorter than they are, since they don't include the response part) -- but the current situation was incorrect for all spans, while this `close()` fallback should hopefully only kick in for edge cases. Responses with no body (Content-Length: 0, HEAD, 204, 304) still finish the span immediately in `getresponse()`. * Closes #2277 * Closes https://linear.app/getsentry/issue/PY-159/sentry-does-not-fully-instrument-requests-library-requests --------- Co-authored-by: Claude Opus 4.6 <noreply@anthropic.com> Co-authored-by: Alex Alderman Webb <alexander.webb@sentry.io>
1 parent fb587da commit 3971a12

2 files changed

Lines changed: 138 additions & 18 deletions

File tree

sentry_sdk/integrations/stdlib.py

Lines changed: 59 additions & 18 deletions
Original file line numberDiff line numberDiff line change
@@ -2,7 +2,7 @@
22
import subprocess
33
import sys
44
import platform
5-
from http.client import HTTPConnection
5+
from http.client import HTTPConnection, HTTPResponse
66

77
import sentry_sdk
88
from sentry_sdk.consts import OP, SPANDATA
@@ -66,9 +66,22 @@ def add_python_runtime_context(
6666
return event
6767

6868

69+
def _complete_span(span: "Union[Span, StreamedSpan]") -> None:
70+
if isinstance(span, StreamedSpan):
71+
with capture_internal_exceptions():
72+
add_http_request_source(span)
73+
span.end()
74+
else:
75+
span.finish()
76+
with capture_internal_exceptions():
77+
add_http_request_source(span)
78+
79+
6980
def _install_httplib() -> None:
7081
real_putrequest = HTTPConnection.putrequest
7182
real_getresponse = HTTPConnection.getresponse
83+
real_read = HTTPResponse.read
84+
real_close = HTTPResponse.close
7285

7386
def putrequest(
7487
self: "HTTPConnection", method: str, url: str, *args: "Any", **kwargs: "Any"
@@ -172,29 +185,57 @@ def getresponse(self: "HTTPConnection", *args: "Any", **kwargs: "Any") -> "Any":
172185

173186
try:
174187
rv = real_getresponse(self, *args, **kwargs)
188+
except BaseException:
189+
_complete_span(span)
190+
raise
191+
192+
if isinstance(span, StreamedSpan):
193+
status_code = int(rv.status)
194+
span.status = "error" if status_code >= 400 else "ok"
195+
span.set_attribute("http.response.status_code", status_code)
196+
else:
197+
span.set_http_status(int(rv.status))
198+
span.set_data("reason", rv.reason)
199+
200+
# getresponse doesn't include actually reading the response body. This
201+
# is done in read(). So if the metadata/headers suggest there's a body to
202+
# read, don't finish the span just yet, but save it for ending it later.
203+
has_body = rv.chunked or (rv.length is not None and rv.length > 0)
204+
if has_body:
205+
rv._sentrysdk_span = span # type: ignore[attr-defined]
206+
else:
207+
_complete_span(span)
175208

176-
if isinstance(span, StreamedSpan):
177-
status_code = int(rv.status)
178-
span.status = "error" if status_code >= 400 else "ok"
179-
span.set_attribute("http.response.status_code", status_code)
180-
else:
181-
span.set_http_status(int(rv.status))
182-
span.set_data("reason", rv.reason)
183-
finally:
184-
if isinstance(span, StreamedSpan):
185-
with capture_internal_exceptions():
186-
add_http_request_source(span)
187-
span.end()
188-
else:
189-
span.finish()
209+
return rv
190210

191-
with capture_internal_exceptions():
192-
add_http_request_source(span)
211+
def read(self: "HTTPResponse", *args: "Any", **kwargs: "Any") -> "Any":
212+
try:
213+
return real_read(self, *args, **kwargs)
214+
finally:
215+
span = getattr(self, "_sentrysdk_span", None)
216+
# read() might be called multiple times to consume a single body,
217+
# so we can't just end the span when read() is done. Instead,
218+
# try to figure out whether the response body has been fully read.
219+
if span and (self.fp is None or self.closed):
220+
self._sentrysdk_span = None # type: ignore[attr-defined]
221+
_complete_span(span)
222+
223+
def close(self: "HTTPResponse") -> None:
224+
# We patch close() as a best effort fallback in case the span is not
225+
# ended yet in getresponse() or read().
193226

194-
return rv
227+
try:
228+
real_close(self)
229+
finally:
230+
span = getattr(self, "_sentrysdk_span", None)
231+
if span is not None:
232+
self._sentrysdk_span = None # type: ignore[attr-defined]
233+
_complete_span(span)
195234

196235
HTTPConnection.putrequest = putrequest # type: ignore[method-assign]
197236
HTTPConnection.getresponse = getresponse # type: ignore[method-assign]
237+
HTTPResponse.read = read # type: ignore[method-assign]
238+
HTTPResponse.close = close # type: ignore[assignment,method-assign]
198239

199240

200241
def _init_argument(

tests/integrations/stdlib/test_httplib.py

Lines changed: 79 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,7 @@
11
import os
22
import socket
33
import datetime
4+
import time
45
from http.client import HTTPConnection, HTTPSConnection
56
from http.server import BaseHTTPRequestHandler, HTTPServer
67
from socket import SocketIO
@@ -44,6 +45,37 @@ def create_mock_proxy_server():
4445

4546
PROXY_PORT = create_mock_proxy_server()
4647

48+
CHUNK_DELAY = 0.1
49+
NUM_CHUNKS = 3
50+
51+
52+
class ChunkedResponseHandler(BaseHTTPRequestHandler):
53+
def do_GET(self):
54+
self.send_response(200)
55+
self.send_header("Transfer-Encoding", "chunked")
56+
self.end_headers()
57+
for _ in range(NUM_CHUNKS):
58+
chunk = b"x" * 100
59+
self.wfile.write(f"{len(chunk):x}\r\n".encode() + chunk + b"\r\n")
60+
self.wfile.flush()
61+
time.sleep(CHUNK_DELAY)
62+
self.wfile.write(b"0\r\n\r\n")
63+
64+
def log_message(self, *args):
65+
pass
66+
67+
68+
def create_chunked_server():
69+
port = get_free_port()
70+
server = HTTPServer(("localhost", port), ChunkedResponseHandler)
71+
thread = Thread(target=server.serve_forever)
72+
thread.daemon = True
73+
thread.start()
74+
return port
75+
76+
77+
CHUNKED_PORT = create_chunked_server()
78+
4779

4880
def test_crumb_capture(sentry_init, capture_events):
4981
sentry_init(integrations=[StdlibIntegration()])
@@ -1161,3 +1193,50 @@ def test_proxy_http_tunnel(
11611193
assert span["data"][SPANDATA.HTTP_METHOD] == "GET"
11621194
assert span["data"][SPANDATA.NETWORK_PEER_ADDRESS] == "localhost"
11631195
assert span["data"][SPANDATA.NETWORK_PEER_PORT] == PROXY_PORT
1196+
1197+
1198+
@pytest.mark.parametrize("span_streaming", [True, False])
1199+
def test_chunked_response_span_covers_body_read(
1200+
sentry_init,
1201+
capture_events,
1202+
capture_items,
1203+
span_streaming,
1204+
):
1205+
sentry_init(
1206+
traces_sample_rate=1.0,
1207+
_experiments={"trace_lifecycle": "stream" if span_streaming else "static"},
1208+
)
1209+
1210+
min_expected_duration = CHUNK_DELAY * NUM_CHUNKS
1211+
1212+
if span_streaming:
1213+
items = capture_items("span")
1214+
1215+
with sentry_sdk.traces.start_span(name="custom parent"):
1216+
conn = HTTPConnection("localhost", CHUNKED_PORT)
1217+
conn.request("GET", "/chunked")
1218+
response = conn.getresponse()
1219+
response.read()
1220+
1221+
sentry_sdk.flush()
1222+
http_span, parent_span = [item.payload for item in items]
1223+
1224+
duration = http_span["end_timestamp"] - http_span["start_timestamp"]
1225+
assert duration >= min_expected_duration
1226+
else:
1227+
events = capture_events()
1228+
1229+
with start_transaction(name="test_chunked"):
1230+
conn = HTTPConnection("localhost", CHUNKED_PORT)
1231+
conn.request("GET", "/chunked")
1232+
response = conn.getresponse()
1233+
response.read()
1234+
1235+
(event,) = events
1236+
(span,) = event["spans"]
1237+
1238+
fmt = "%Y-%m-%dT%H:%M:%S.%fZ"
1239+
start = datetime.datetime.strptime(span["start_timestamp"], fmt)
1240+
end = datetime.datetime.strptime(span["timestamp"], fmt)
1241+
duration = (end - start).total_seconds()
1242+
assert duration >= min_expected_duration

0 commit comments

Comments
 (0)