Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
25 changes: 19 additions & 6 deletions src/regolo/cli.py
Original file line number Diff line number Diff line change
Expand Up @@ -723,7 +723,8 @@ def get_available_models(api_key: str, model_type: str):
@click.option('--api-key', required=False, help='The API key used to chat with Regolo.')
@click.option('--disable-newlines', required=False, is_flag=True, default=False,
help='Disable new lines, they will be replaced with space character')
def chat(no_hide: bool, api_key: str, disable_newlines: bool):
@click.option('--max-tokens', default=2048, help='Max tokens per response (default: 2048)')
def chat(no_hide: bool, api_key: str, disable_newlines: bool, max_tokens: int):
if not api_key:
api_key = click.prompt("Insert your regolo API key", hide_input=not no_hide)
available_models: list[dict] = regolo.RegoloClient.get_available_models(api_key, model_info=True)
Expand Down Expand Up @@ -767,17 +768,29 @@ def chat(no_hide: bool, api_key: str, disable_newlines: bool):
if user_input == "/bye":
exit(0)
# get chat response and save in the client
response = client.run_chat(user_input, stream=True, full_output=False)
response = client.run_chat(user_input, stream=True, full_output=False, max_tokens=max_tokens)

# print output
was_thinking = False
while True:
try:
res = next(response)
if res[0]:
click.echo(res[0] + ":")
if res is None:
continue
is_thinking = res[0] == "thinking"
text = res[1]
if is_thinking and not was_thinking:
click.echo(click.style("Thinking...", dim=True, italic=True))
was_thinking = True
elif not is_thinking and was_thinking and text:
click.echo(click.style("\n─────────────────────\n", dim=True))
was_thinking = False
if disable_newlines:
res[1] = res[1].replace("\n", " ")
click.echo(res[1], nl=False)
text = text.replace("\n", " ")
if is_thinking:
click.echo(click.style(text, dim=True), nl=False)
else:
click.echo(text, nl=False)
except StopIteration:
break

Expand Down
25 changes: 17 additions & 8 deletions src/regolo/client/regolo_client.py
Original file line number Diff line number Diff line change
Expand Up @@ -227,7 +227,7 @@ def create_stream_generator(client: httpx.Client,
try:
# Repair and parse the JSON chunk
data_chunk = json.loads(json_repair.repair_json(decoded_line))
except (Exception,):
except Exception:
continue

if full_output:
Expand Down Expand Up @@ -419,22 +419,31 @@ def static_chat_completions(messages: Conversation | List[Dict[str, str]],
:return for stream=False, full_output=True: Tuple, which consists of role and content of response.
"""

_state = {"in_reasoning": False}

def handle_search_text_chat_completions(data: dict) -> Optional[tuple[Role, Content]]:
"""
Internal method, describes how RegoloClient.create_stream_generator() should handle
output from chat_completions.
output from chat_completions. Reasoning tokens are rendered separately in the CLI (dim + "Thinking..." header).
"""
def resolve(delta: dict) -> tuple[Role, Content]:
content = delta.get("content")
reasoning = delta.get("reasoning_content")
if content:
_state["in_reasoning"] = False
return "", content
elif reasoning:
_state["in_reasoning"] = True
return "thinking", reasoning
return "", ""

if isinstance(data, dict):
delta = data.get("choices", [{}])[0].get("delta", {})
out_role: Role = delta.get("role", "")
out_content: Content = delta.get("content", "")
return out_role, out_content
return resolve(delta)
elif isinstance(data, list):
for element in data:
delta = element.get("choices", [{}])[0].get("delta", {})
out_role: Role = delta.get("role", "")
out_content: Content = delta.get("content", "")
return out_role, out_content
return resolve(delta)
return None

# Use the default API key if not provided
Expand Down