Skip to content

Commit d1c1927

Browse files
committed
fix(openai): preserve logprobs for streaming chat spans
Accumulate per-chunk logprob content and refusal entries when postprocessing chat completion streams so traced output matches non-streaming calls. Also read finish_reason from the streamed choice and add a VCR regression test for the wrapped sync path. Closes #180
1 parent 43faa35 commit d1c1927

File tree

3 files changed

+189
-5
lines changed

3 files changed

+189
-5
lines changed
Lines changed: 110 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,110 @@
1+
interactions:
2+
- request:
3+
body: '{"messages":[{"role":"system","content":"Reply with exactly OK and nothing
4+
else."},{"role":"user","content":"Reply with exactly: OK"}],"model":"gpt-4o-mini","logprobs":true,"seed":1,"stream":true,"temperature":0,"top_logprobs":2}'
5+
headers:
6+
Accept:
7+
- application/json
8+
Accept-Encoding:
9+
- gzip, deflate
10+
Connection:
11+
- keep-alive
12+
Content-Length:
13+
- '229'
14+
Content-Type:
15+
- application/json
16+
Host:
17+
- api.openai.com
18+
User-Agent:
19+
- OpenAI/Python 2.30.0
20+
X-Stainless-Arch:
21+
- arm64
22+
X-Stainless-Async:
23+
- 'false'
24+
X-Stainless-Lang:
25+
- python
26+
X-Stainless-OS:
27+
- MacOS
28+
X-Stainless-Package-Version:
29+
- 2.30.0
30+
X-Stainless-Runtime:
31+
- CPython
32+
X-Stainless-Runtime-Version:
33+
- 3.13.3
34+
x-stainless-read-timeout:
35+
- '600'
36+
x-stainless-retry-count:
37+
- '0'
38+
method: POST
39+
uri: https://api.openai.com/v1/chat/completions
40+
response:
41+
body:
42+
string: 'data: {"id":"chatcmpl-DS469BIj7I8lPBf6o1Vrh09KdRS1C","object":"chat.completion.chunk","created":1775581309,"model":"gpt-4o-mini-2024-07-18","service_tier":"default","system_fingerprint":"fp_218cd55417","choices":[{"index":0,"delta":{"role":"assistant","content":"","refusal":null},"logprobs":{"content":[],"refusal":null},"finish_reason":null}],"obfuscation":"4JaDqpR1wg0mz"}
43+
44+
45+
data: {"id":"chatcmpl-DS469BIj7I8lPBf6o1Vrh09KdRS1C","object":"chat.completion.chunk","created":1775581309,"model":"gpt-4o-mini-2024-07-18","service_tier":"default","system_fingerprint":"fp_218cd55417","choices":[{"index":0,"delta":{"content":"OK"},"logprobs":{"content":[{"token":"OK","logprob":0.0,"bytes":[79,75],"top_logprobs":[{"token":"OK","logprob":0.0,"bytes":[79,75]},{"token":"
46+
OK","logprob":-19.0,"bytes":[32,79,75]}]}],"refusal":null},"finish_reason":null}],"obfuscation":""}
47+
48+
49+
data: {"id":"chatcmpl-DS469BIj7I8lPBf6o1Vrh09KdRS1C","object":"chat.completion.chunk","created":1775581309,"model":"gpt-4o-mini-2024-07-18","service_tier":"default","system_fingerprint":"fp_218cd55417","choices":[{"index":0,"delta":{},"logprobs":null,"finish_reason":"stop"}],"obfuscation":"Ox"}
50+
51+
52+
data: [DONE]
53+
54+
55+
'
56+
headers:
57+
CF-Cache-Status:
58+
- DYNAMIC
59+
CF-Ray:
60+
- 9e8a98af68f1abb5-YYZ
61+
Connection:
62+
- keep-alive
63+
Content-Type:
64+
- text/event-stream; charset=utf-8
65+
Date:
66+
- Tue, 07 Apr 2026 17:01:49 GMT
67+
Server:
68+
- cloudflare
69+
Strict-Transport-Security:
70+
- max-age=31536000; includeSubDomains; preload
71+
Transfer-Encoding:
72+
- chunked
73+
X-Content-Type-Options:
74+
- nosniff
75+
access-control-expose-headers:
76+
- X-Request-ID
77+
alt-svc:
78+
- h3=":443"; ma=86400
79+
openai-organization:
80+
- braintrust-data
81+
openai-processing-ms:
82+
- '257'
83+
openai-project:
84+
- proj_vsCSXafhhByzWOThMrJcZiw9
85+
openai-version:
86+
- '2020-10-01'
87+
set-cookie:
88+
- __cf_bm=DQLjVZZ0JcnAvssCENysUlUUTA0hqjJMaJa7wzYHKE0-1775581309.3517559-1.0.1.1-EK2uxO2w8TTFRLvMzUrQQtAGpZR5morImfrMd8X9zUWXT3pumefyADMQdomdOYlUAkhXhNZfV4uO5fL7ZPdGs9rTIf.aNPpodlWzVW2uYc_NGpz4247vvIUafMMkggqg;
89+
HttpOnly; Secure; Path=/; Domain=api.openai.com; Expires=Tue, 07 Apr 2026
90+
17:31:49 GMT
91+
x-openai-proxy-wasm:
92+
- v0.1
93+
x-ratelimit-limit-requests:
94+
- '30000'
95+
x-ratelimit-limit-tokens:
96+
- '150000000'
97+
x-ratelimit-remaining-requests:
98+
- '29999'
99+
x-ratelimit-remaining-tokens:
100+
- '149999982'
101+
x-ratelimit-reset-requests:
102+
- 2ms
103+
x-ratelimit-reset-tokens:
104+
- 0s
105+
x-request-id:
106+
- req_5ae9848773d14dc8801267da556e753f
107+
status:
108+
code: 200
109+
message: OK
110+
version: 1

py/src/braintrust/integrations/openai/test_openai.py

Lines changed: 45 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -333,6 +333,51 @@ def test_openai_chat_streaming_sync(memory_logger):
333333
assert "24" in str(span["output"]) or "twenty-four" in str(span["output"]).lower()
334334

335335

336+
@pytest.mark.vcr
337+
def test_openai_chat_streaming_sync_preserves_logprobs(memory_logger):
338+
assert not memory_logger.pop()
339+
340+
client = wrap_openai(openai.OpenAI())
341+
stream = client.chat.completions.create(
342+
model=TEST_MODEL,
343+
messages=[
344+
{"role": "system", "content": "Reply with exactly OK and nothing else."},
345+
{"role": "user", "content": "Reply with exactly: OK"},
346+
],
347+
stream=True,
348+
temperature=0,
349+
seed=1,
350+
logprobs=True,
351+
top_logprobs=2,
352+
)
353+
354+
chunk_logprob_tokens = []
355+
content = ""
356+
for chunk in stream:
357+
if not chunk.choices:
358+
continue
359+
360+
choice = chunk.choices[0]
361+
if choice.delta.content:
362+
content += choice.delta.content
363+
if choice.logprobs and choice.logprobs.content:
364+
chunk_logprob_tokens.extend(entry.token for entry in choice.logprobs.content)
365+
366+
assert "OK" in content
367+
assert "OK" in "".join(chunk_logprob_tokens)
368+
369+
spans = memory_logger.pop()
370+
assert len(spans) == 1
371+
span = spans[0]
372+
assert span["output"]
373+
374+
output_choice = span["output"][0]
375+
assert output_choice["finish_reason"] == "stop"
376+
assert output_choice["logprobs"] is not None
377+
assert output_choice["logprobs"]["content"]
378+
assert "OK" in "".join(entry["token"] for entry in output_choice["logprobs"]["content"])
379+
380+
336381
@pytest.mark.vcr
337382
def test_openai_chat_with_system_prompt(memory_logger):
338383
assert not memory_logger.pop()

py/src/braintrust/integrations/openai/tracing.py

Lines changed: 34 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -464,6 +464,9 @@ def _postprocess_streaming_results(cls, all_results: list[dict[str, Any]]) -> di
464464
content = None
465465
tool_calls: list[Any] | None = None
466466
finish_reason = None
467+
logprobs_content: list[Any] | None = None
468+
logprobs_refusal: list[Any] | None = None
469+
saw_logprobs = False
467470
metrics: dict[str, float] = {}
468471
for result in all_results:
469472
usage = result.get("usage")
@@ -473,16 +476,35 @@ def _postprocess_streaming_results(cls, all_results: list[dict[str, Any]]) -> di
473476
choices = result["choices"]
474477
if not choices:
475478
continue
476-
delta = choices[0]["delta"]
479+
480+
choice = choices[0]
481+
fr = choice.get("finish_reason")
482+
if fr is not None:
483+
finish_reason = fr
484+
485+
choice_logprobs = choice.get("logprobs")
486+
if choice_logprobs is not None:
487+
saw_logprobs = True
488+
489+
chunk_content_logprobs = choice_logprobs.get("content")
490+
if chunk_content_logprobs is not None:
491+
if logprobs_content is None:
492+
logprobs_content = []
493+
logprobs_content.extend(chunk_content_logprobs)
494+
495+
chunk_refusal_logprobs = choice_logprobs.get("refusal")
496+
if chunk_refusal_logprobs is not None:
497+
if logprobs_refusal is None:
498+
logprobs_refusal = []
499+
logprobs_refusal.extend(chunk_refusal_logprobs)
500+
501+
delta = choice.get("delta")
477502
if not delta:
478503
continue
479504

480505
if role is None and delta.get("role") is not None:
481506
role = delta.get("role")
482507

483-
if delta.get("finish_reason") is not None:
484-
finish_reason = delta.get("finish_reason")
485-
486508
if delta.get("content") is not None:
487509
content = (content or "") + delta.get("content")
488510

@@ -524,7 +546,14 @@ def _postprocess_streaming_results(cls, all_results: list[dict[str, Any]]) -> di
524546
"content": content,
525547
"tool_calls": tool_calls,
526548
},
527-
"logprobs": None,
549+
"logprobs": (
550+
{
551+
"content": logprobs_content,
552+
"refusal": logprobs_refusal,
553+
}
554+
if saw_logprobs
555+
else None
556+
),
528557
"finish_reason": finish_reason,
529558
}
530559
],

0 commit comments

Comments
 (0)