codespaces-models/samples/python/openai/streaming.py at main · github/codespaces-models · GitHub

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
"""For a better user experience, you will want to stream the response of the model
so that the first token shows up early and you avoid waiting for long responses."""

import os
from openai import OpenAI

token = os.environ["GITHUB_TOKEN"]
endpoint = "https://models.github.ai/inference"

# Pick one of the Azure OpenAI models from the GitHub Models service
model_name = "openai/gpt-4o-mini"

# Create a client
client = OpenAI(
    base_url=endpoint,
    api_key=token,
)

# Call the chat completion API
response = client.chat.completions.create(
    messages=[
        {
            "role": "system",
            "content": "You are a helpful assistant.",
        },
        {
            "role": "user",
            "content": "Give me 5 good reasons why I should exercise every day.",
        },
    ],
    model=model_name,
    stream=True,
)

# Print the streamed response
for update in response:
    if update.choices:
        content = update.choices[0].delta.content
        if content:
            print(content, end="")

print()