Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Original file line number Diff line number Diff line change
@@ -0,0 +1,110 @@
interactions:
- request:
body: '{"messages":[{"role":"system","content":"Reply with exactly OK and nothing
else."},{"role":"user","content":"Reply with exactly: OK"}],"model":"gpt-4o-mini","logprobs":true,"seed":1,"stream":true,"temperature":0,"top_logprobs":2}'
headers:
Accept:
- application/json
Accept-Encoding:
- gzip, deflate
Connection:
- keep-alive
Content-Length:
- '229'
Content-Type:
- application/json
Host:
- api.openai.com
User-Agent:
- OpenAI/Python 2.30.0
X-Stainless-Arch:
- arm64
X-Stainless-Async:
- 'false'
X-Stainless-Lang:
- python
X-Stainless-OS:
- MacOS
X-Stainless-Package-Version:
- 2.30.0
X-Stainless-Runtime:
- CPython
X-Stainless-Runtime-Version:
- 3.13.3
x-stainless-read-timeout:
- '600'
x-stainless-retry-count:
- '0'
method: POST
uri: https://api.openai.com/v1/chat/completions
response:
body:
string: 'data: {"id":"chatcmpl-DS469BIj7I8lPBf6o1Vrh09KdRS1C","object":"chat.completion.chunk","created":1775581309,"model":"gpt-4o-mini-2024-07-18","service_tier":"default","system_fingerprint":"fp_218cd55417","choices":[{"index":0,"delta":{"role":"assistant","content":"","refusal":null},"logprobs":{"content":[],"refusal":null},"finish_reason":null}],"obfuscation":"4JaDqpR1wg0mz"}


data: {"id":"chatcmpl-DS469BIj7I8lPBf6o1Vrh09KdRS1C","object":"chat.completion.chunk","created":1775581309,"model":"gpt-4o-mini-2024-07-18","service_tier":"default","system_fingerprint":"fp_218cd55417","choices":[{"index":0,"delta":{"content":"OK"},"logprobs":{"content":[{"token":"OK","logprob":0.0,"bytes":[79,75],"top_logprobs":[{"token":"OK","logprob":0.0,"bytes":[79,75]},{"token":"
OK","logprob":-19.0,"bytes":[32,79,75]}]}],"refusal":null},"finish_reason":null}],"obfuscation":""}


data: {"id":"chatcmpl-DS469BIj7I8lPBf6o1Vrh09KdRS1C","object":"chat.completion.chunk","created":1775581309,"model":"gpt-4o-mini-2024-07-18","service_tier":"default","system_fingerprint":"fp_218cd55417","choices":[{"index":0,"delta":{},"logprobs":null,"finish_reason":"stop"}],"obfuscation":"Ox"}


data: [DONE]


'
headers:
CF-Cache-Status:
- DYNAMIC
CF-Ray:
- 9e8a98af68f1abb5-YYZ
Connection:
- keep-alive
Content-Type:
- text/event-stream; charset=utf-8
Date:
- Tue, 07 Apr 2026 17:01:49 GMT
Server:
- cloudflare
Strict-Transport-Security:
- max-age=31536000; includeSubDomains; preload
Transfer-Encoding:
- chunked
X-Content-Type-Options:
- nosniff
access-control-expose-headers:
- X-Request-ID
alt-svc:
- h3=":443"; ma=86400
openai-organization:
- braintrust-data
openai-processing-ms:
- '257'
openai-project:
- proj_vsCSXafhhByzWOThMrJcZiw9
openai-version:
- '2020-10-01'
set-cookie:
- __cf_bm=DQLjVZZ0JcnAvssCENysUlUUTA0hqjJMaJa7wzYHKE0-1775581309.3517559-1.0.1.1-EK2uxO2w8TTFRLvMzUrQQtAGpZR5morImfrMd8X9zUWXT3pumefyADMQdomdOYlUAkhXhNZfV4uO5fL7ZPdGs9rTIf.aNPpodlWzVW2uYc_NGpz4247vvIUafMMkggqg;
HttpOnly; Secure; Path=/; Domain=api.openai.com; Expires=Tue, 07 Apr 2026
17:31:49 GMT
x-openai-proxy-wasm:
- v0.1
x-ratelimit-limit-requests:
- '30000'
x-ratelimit-limit-tokens:
- '150000000'
x-ratelimit-remaining-requests:
- '29999'
x-ratelimit-remaining-tokens:
- '149999982'
x-ratelimit-reset-requests:
- 2ms
x-ratelimit-reset-tokens:
- 0s
x-request-id:
- req_5ae9848773d14dc8801267da556e753f
status:
code: 200
message: OK
version: 1
45 changes: 45 additions & 0 deletions py/src/braintrust/integrations/openai/test_openai.py
Original file line number Diff line number Diff line change
Expand Up @@ -383,6 +383,51 @@ def test_openai_chat_stream_helper_sync(memory_logger):
assert "24" in str(span["output"]) or "twenty-four" in str(span["output"]).lower()


@pytest.mark.vcr
def test_openai_chat_streaming_sync_preserves_logprobs(memory_logger):
assert not memory_logger.pop()

client = wrap_openai(openai.OpenAI())
stream = client.chat.completions.create(
model=TEST_MODEL,
messages=[
{"role": "system", "content": "Reply with exactly OK and nothing else."},
{"role": "user", "content": "Reply with exactly: OK"},
],
stream=True,
temperature=0,
seed=1,
logprobs=True,
top_logprobs=2,
)

chunk_logprob_tokens = []
content = ""
for chunk in stream:
if not chunk.choices:
continue

choice = chunk.choices[0]
if choice.delta.content:
content += choice.delta.content
if choice.logprobs and choice.logprobs.content:
chunk_logprob_tokens.extend(entry.token for entry in choice.logprobs.content)

assert "OK" in content
assert "OK" in "".join(chunk_logprob_tokens)

spans = memory_logger.pop()
assert len(spans) == 1
span = spans[0]
assert span["output"]

output_choice = span["output"][0]
assert output_choice["finish_reason"] == "stop"
assert output_choice["logprobs"] is not None
assert output_choice["logprobs"]["content"]
assert "OK" in "".join(entry["token"] for entry in output_choice["logprobs"]["content"])


@pytest.mark.vcr
def test_openai_chat_with_system_prompt(memory_logger):
assert not memory_logger.pop()
Expand Down
39 changes: 34 additions & 5 deletions py/src/braintrust/integrations/openai/tracing.py
Original file line number Diff line number Diff line change
Expand Up @@ -464,6 +464,9 @@ def _postprocess_streaming_results(cls, all_results: list[dict[str, Any]]) -> di
content = None
tool_calls: list[Any] | None = None
finish_reason = None
logprobs_content: list[Any] | None = None
logprobs_refusal: list[Any] | None = None
saw_logprobs = False
metrics: dict[str, float] = {}
for result in all_results:
usage = result.get("usage")
Expand All @@ -473,16 +476,35 @@ def _postprocess_streaming_results(cls, all_results: list[dict[str, Any]]) -> di
choices = result["choices"]
if not choices:
continue
delta = choices[0]["delta"]

choice = choices[0]
fr = choice.get("finish_reason")
if fr is not None:
finish_reason = fr

choice_logprobs = choice.get("logprobs")
if choice_logprobs is not None:
saw_logprobs = True

chunk_content_logprobs = choice_logprobs.get("content")
if chunk_content_logprobs is not None:
if logprobs_content is None:
logprobs_content = []
logprobs_content.extend(chunk_content_logprobs)

chunk_refusal_logprobs = choice_logprobs.get("refusal")
if chunk_refusal_logprobs is not None:
if logprobs_refusal is None:
logprobs_refusal = []
logprobs_refusal.extend(chunk_refusal_logprobs)

delta = choice.get("delta")
if not delta:
continue

if role is None and delta.get("role") is not None:
role = delta.get("role")

if delta.get("finish_reason") is not None:
finish_reason = delta.get("finish_reason")

if delta.get("content") is not None:
content = (content or "") + delta.get("content")

Expand Down Expand Up @@ -524,7 +546,14 @@ def _postprocess_streaming_results(cls, all_results: list[dict[str, Any]]) -> di
"content": content,
"tool_calls": tool_calls,
},
"logprobs": None,
"logprobs": (
{
"content": logprobs_content,
"refusal": logprobs_refusal,
}
if saw_logprobs
else None
),
"finish_reason": finish_reason,
}
],
Expand Down
Loading