diff --git a/src/regolo/cli.py b/src/regolo/cli.py index df3b30f..25723ec 100644 --- a/src/regolo/cli.py +++ b/src/regolo/cli.py @@ -723,7 +723,8 @@ def get_available_models(api_key: str, model_type: str): @click.option('--api-key', required=False, help='The API key used to chat with Regolo.') @click.option('--disable-newlines', required=False, is_flag=True, default=False, help='Disable new lines, they will be replaced with space character') -def chat(no_hide: bool, api_key: str, disable_newlines: bool): +@click.option('--max-tokens', default=2048, help='Max tokens per response (default: 2048)') +def chat(no_hide: bool, api_key: str, disable_newlines: bool, max_tokens: int): if not api_key: api_key = click.prompt("Insert your regolo API key", hide_input=not no_hide) available_models: list[dict] = regolo.RegoloClient.get_available_models(api_key, model_info=True) @@ -767,17 +768,29 @@ def chat(no_hide: bool, api_key: str, disable_newlines: bool): if user_input == "/bye": exit(0) # get chat response and save in the client - response = client.run_chat(user_input, stream=True, full_output=False) + response = client.run_chat(user_input, stream=True, full_output=False, max_tokens=max_tokens) # print output + was_thinking = False while True: try: res = next(response) - if res[0]: - click.echo(res[0] + ":") + if res is None: + continue + is_thinking = res[0] == "thinking" + text = res[1] + if is_thinking and not was_thinking: + click.echo(click.style("Thinking...", dim=True, italic=True)) + was_thinking = True + elif not is_thinking and was_thinking and text: + click.echo(click.style("\n─────────────────────\n", dim=True)) + was_thinking = False if disable_newlines: - res[1] = res[1].replace("\n", " ") - click.echo(res[1], nl=False) + text = text.replace("\n", " ") + if is_thinking: + click.echo(click.style(text, dim=True), nl=False) + else: + click.echo(text, nl=False) except StopIteration: break diff --git a/src/regolo/client/regolo_client.py b/src/regolo/client/regolo_client.py index 0bb2489..f40e39e 100644 --- a/src/regolo/client/regolo_client.py +++ b/src/regolo/client/regolo_client.py @@ -227,7 +227,7 @@ def create_stream_generator(client: httpx.Client, try: # Repair and parse the JSON chunk data_chunk = json.loads(json_repair.repair_json(decoded_line)) - except (Exception,): + except Exception: continue if full_output: @@ -419,22 +419,31 @@ def static_chat_completions(messages: Conversation | List[Dict[str, str]], :return for stream=False, full_output=True: Tuple, which consists of role and content of response. """ + _state = {"in_reasoning": False} + def handle_search_text_chat_completions(data: dict) -> Optional[tuple[Role, Content]]: """ Internal method, describes how RegoloClient.create_stream_generator() should handle - output from chat_completions. + output from chat_completions. Reasoning tokens are rendered separately in the CLI (dim + "Thinking..." header). """ + def resolve(delta: dict) -> tuple[Role, Content]: + content = delta.get("content") + reasoning = delta.get("reasoning_content") + if content: + _state["in_reasoning"] = False + return "", content + elif reasoning: + _state["in_reasoning"] = True + return "thinking", reasoning + return "", "" + if isinstance(data, dict): delta = data.get("choices", [{}])[0].get("delta", {}) - out_role: Role = delta.get("role", "") - out_content: Content = delta.get("content", "") - return out_role, out_content + return resolve(delta) elif isinstance(data, list): for element in data: delta = element.get("choices", [{}])[0].get("delta", {}) - out_role: Role = delta.get("role", "") - out_content: Content = delta.get("content", "") - return out_role, out_content + return resolve(delta) return None # Use the default API key if not provided