regolo-ai · matteo-brandolino · Feb 28, 2026
diff --git a/src/regolo/cli.py b/src/regolo/cli.py
@@ -723,7 +723,8 @@ def get_available_models(api_key: str, model_type: str):
 @click.option('--api-key', required=False, help='The API key used to chat with Regolo.')
 @click.option('--disable-newlines', required=False, is_flag=True, default=False,
               help='Disable new lines, they will be replaced with space character')
-def chat(no_hide: bool, api_key: str, disable_newlines: bool):
+@click.option('--max-tokens', default=2048, help='Max tokens per response (default: 2048)')
+def chat(no_hide: bool, api_key: str, disable_newlines: bool, max_tokens: int):
     if not api_key:
         api_key = click.prompt("Insert your regolo API key", hide_input=not no_hide)
     available_models: list[dict] = regolo.RegoloClient.get_available_models(api_key, model_info=True)
@@ -767,17 +768,29 @@ def chat(no_hide: bool, api_key: str, disable_newlines: bool):
         if user_input == "/bye":
             exit(0)
         # get chat response and save in the client
-        response = client.run_chat(user_input, stream=True, full_output=False)
+        response = client.run_chat(user_input, stream=True, full_output=False, max_tokens=max_tokens)
 
         # print output
+        was_thinking = False
         while True:
             try:
                 res = next(response)
-                if res[0]:
-                    click.echo(res[0] + ":")
+                if res is None:
+                    continue
+                is_thinking = res[0] == "thinking"
+                text = res[1]
+                if is_thinking and not was_thinking:
+                    click.echo(click.style("Thinking...", dim=True, italic=True))
+                    was_thinking = True
+                elif not is_thinking and was_thinking and text:
+                    click.echo(click.style("\n─────────────────────\n", dim=True))
+                    was_thinking = False
                 if disable_newlines:
-                    res[1] = res[1].replace("\n", " ")
-                click.echo(res[1], nl=False)
+                    text = text.replace("\n", " ")
+                if is_thinking:
+                    click.echo(click.style(text, dim=True), nl=False)
+                else:
+                    click.echo(text, nl=False)
             except StopIteration:
                 break
 

diff --git a/src/regolo/client/regolo_client.py b/src/regolo/client/regolo_client.py
@@ -227,7 +227,7 @@ def create_stream_generator(client: httpx.Client,
                 try:
                     # Repair and parse the JSON chunk
                     data_chunk = json.loads(json_repair.repair_json(decoded_line))
-                except (Exception,):
+                except Exception:
                     continue
 
                 if full_output:
@@ -419,22 +419,31 @@ def static_chat_completions(messages: Conversation | List[Dict[str, str]],
         :return for stream=False, full_output=True: Tuple, which consists of role and content of response.
         """
 
+        _state = {"in_reasoning": False}
+
         def handle_search_text_chat_completions(data: dict) -> Optional[tuple[Role, Content]]:
             """
             Internal method, describes how RegoloClient.create_stream_generator() should handle
-            output from chat_completions.
+            output from chat_completions. Reasoning tokens are rendered separately in the CLI (dim + "Thinking..." header).
             """
+            def resolve(delta: dict) -> tuple[Role, Content]:
+                content = delta.get("content")
+                reasoning = delta.get("reasoning_content")
+                if content:
+                    _state["in_reasoning"] = False
+                    return "", content
+                elif reasoning:
+                    _state["in_reasoning"] = True
+                    return "thinking", reasoning
+                return "", ""
+
             if isinstance(data, dict):
                 delta = data.get("choices", [{}])[0].get("delta", {})
-                out_role: Role = delta.get("role", "")
-                out_content: Content = delta.get("content", "")
-                return out_role, out_content
+                return resolve(delta)
             elif isinstance(data, list):
                 for element in data:
                     delta = element.get("choices", [{}])[0].get("delta", {})
-                    out_role: Role = delta.get("role", "")
-                    out_content: Content = delta.get("content", "")
-                    return out_role, out_content
+                    return resolve(delta)
             return None
 
         # Use the default API key if not provided