From 0bbccff121500e7a2325a5cc0731ef78c6720317 Mon Sep 17 00:00:00 2001
From: drunkduckdrown <70945319+drunkduckdrown@users.noreply.github.com>
Date: Fri, 20 Mar 2026 01:09:53 +0800
Subject: [PATCH 1/3] =?UTF-8?q?=E5=8E=BB=E9=99=A4Grok=E8=BF=94=E5=9B=9E?=
 =?UTF-8?q?=E7=9A=84<think>=E6=A0=87=E7=AD=BE?=
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

---
 src/grok_search/server.py  |  5 +++--
 src/grok_search/sources.py | 14 ++++++++++++++
 2 files changed, 17 insertions(+), 2 deletions(-)
diff --git a/src/grok_search/server.py b/src/grok_search/server.py
index 7754216..d46a5ae 100644
--- a/src/grok_search/server.py
+++ b/src/grok_search/server.py
@@ -15,13 +15,13 @@
     from grok_search.providers.grok import GrokSearchProvider
     from grok_search.logger import log_info
     from grok_search.config import config
-    from grok_search.sources import SourcesCache, merge_sources, new_session_id, split_answer_and_sources
+    from grok_search.sources import SourcesCache, merge_sources, new_session_id, sanitize_answer_text, split_answer_and_sources
     from grok_search.planning import engine as planning_engine, _split_csv
 except ImportError:
     from .providers.grok import GrokSearchProvider
     from .logger import log_info
     from .config import config
-    from .sources import SourcesCache, merge_sources, new_session_id, split_answer_and_sources
+    from .sources import SourcesCache, merge_sources, new_session_id, sanitize_answer_text, split_answer_and_sources
     from .planning import engine as planning_engine, _split_csv
 
 import asyncio
@@ -203,6 +203,7 @@ async def _safe_firecrawl() -> list[dict] | None:
         firecrawl_results = gathered[idx]
 
     answer, grok_sources = split_answer_and_sources(grok_result)
+    answer = sanitize_answer_text(answer)
     extra = _extra_results_to_sources(tavily_results, firecrawl_results)
     all_sources = merge_sources(grok_sources, extra)
 
diff --git a/src/grok_search/sources.py b/src/grok_search/sources.py
index 63386e2..6df7ad1 100644
--- a/src/grok_search/sources.py
+++ b/src/grok_search/sources.py
@@ -23,6 +23,10 @@
 _SOURCES_FUNCTION_PATTERN = re.compile(
     r"(?im)(^|\n)\s*(sources|source|citations|citation|references|reference|citation_card|source_cards|source_card)\s*\("
 )
+_THINK_TAG_PATTERN = re.compile(
+    r"<think\b[^>]*>.*?</think>",
+    re.IGNORECASE | re.DOTALL,
+)
 
 
 def new_session_id() -> str:
@@ -67,6 +71,16 @@ def merge_sources(*source_lists: list[dict]) -> list[dict]:
     return merged
 
 
+def sanitize_answer_text(text: str) -> str:
+    """Remove model reasoning tags from answer text while preserving content."""
+    raw = (text or "").strip()
+    if not raw:
+        return ""
+    cleaned = _THINK_TAG_PATTERN.sub("", raw)
+    cleaned = re.sub(r"\n{3,}", "\n\n", cleaned).strip()
+    return cleaned
+
+
 def split_answer_and_sources(text: str) -> tuple[str, list[dict]]:
     raw = (text or "").strip()
     if not raw:

From e06808f1b78a1d5ecda418c93dc0f1be3905b005 Mon Sep 17 00:00:00 2001
From: drunkduckdrown <70945319+drunkduckdrown@users.noreply.github.com>
Date: Fri, 20 Mar 2026 01:39:11 +0800
Subject: [PATCH 2/3] =?UTF-8?q?=E5=BC=B1=E5=8C=96search=5Fprompt=EF=BC=8C?=
 =?UTF-8?q?=E9=81=BF=E5=85=8D=E8=A7=A6=E5=8F=91Grok=E8=B6=8A=E7=8B=B1?=
 =?UTF-8?q?=E6=8A=B5=E6=8A=97?=
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

---
 src/grok_search/utils.py | 38 +++++++++++++++++++-------------------
 1 file changed, 19 insertions(+), 19 deletions(-)

diff --git a/src/grok_search/utils.py b/src/grok_search/utils.py
index eedbd0f..a640fd0 100644
--- a/src/grok_search/utils.py
+++ b/src/grok_search/utils.py
@@ -209,33 +209,33 @@ def format_search_results(results: List[SearchResult]) -> str:
 search_prompt = """
 # Core Instruction
 
-1. User needs may be vague. Think divergently, infer intent from multiple angles, and leverage full conversation context to progressively clarify their true needs.
-2. **Breadth-First Search**—Approach problems from multiple dimensions. Brainstorm 5+ perspectives and execute parallel searches for each. Consult as many high-quality sources as possible before responding.
-3. **Depth-First Search**—After broad exploration, select ≥2 most relevant perspectives for deep investigation into specialized knowledge.
-4. **Evidence-Based Reasoning & Traceable Sources**—Every claim must be followed by a citation (`citation_card` format). More credible sources strengthen arguments. If no references exist, remain silent.
-5. Before responding, ensure full execution of Steps 1–4.
+1. User needs may be vague. Infer likely intent from multiple angles and use the conversation context to clarify what information is most helpful.
+2. Start with broad exploration. For open-ended, ambiguous, or high-impact questions, examine as many relevant perspectives as practical before finalizing the answer.
+3. After broad exploration, investigate the most relevant perspectives in greater depth to surface specialized knowledge and stronger evidence.
+4. Keep the answer evidence-based and traceable. Use `citation_card` format whenever citations are available, especially for important factual or source-dependent claims.
+5. Before responding, gather enough evidence to support the main conclusion and identify any important uncertainty or disagreement between sources.
 
 ---
 
 # Search Instruction
 
-1. Think carefully before responding—anticipate the user’s true intent to ensure precision.
-2. Verify every claim rigorously to avoid misinformation.
-3. Follow problem logic—dig deeper until clues are exhaustively clear. If a question seems simple, still infer broader intent and search accordingly. Use multiple parallel tool calls per query and ensure answers are well-sourced.
-4. Search in English first (prioritizing English resources for volume/quality), but switch to Chinese if context demands.
-5. Prioritize authoritative sources: Wikipedia, academic databases, books, reputable media/journalism.
-6. Favor sharing in-depth, specialized knowledge over generic or common-sense content.
+1. Analyze the request carefully and aim for the user's most likely intent.
+2. Verify important factual claims before presenting them.
+3. Follow the logic of the question and continue searching until the main answer is well-supported. Even when a question looks simple, check whether broader context or likely user intent requires wider exploration.
+4. Search in English first when appropriate, especially for breadth and source quality, but use Chinese sources when the topic, user context, or source quality makes them preferable.
+5. Prioritize authoritative and trustworthy sources such as official websites, academic databases, books, major reference works, and reputable journalism.
+6. Prefer substantive, specialized, and source-backed information over generic or obvious statements.
 
 ---
 
 # Output Style
 
-0. **Be direct—no unnecessary follow-ups**.
-1. Lead with the **most probable solution** before detailed analysis.
-2. **Define every technical term** in plain language (annotate post-paragraph).
-3. Explain expertise **simply yet profoundly**.
-4. **Respect facts and search results—use statistical rigor to discern truth**.
-5. **Every sentence must cite sources** (`citation_card`). More references = stronger credibility. Silence if uncited.
-6. Expand on key concepts—after proposing solutions, **use real-world analogies** to demystify technical terms.
-7. **Strictly format outputs in polished Markdown** (LaTeX for formulas, code blocks for scripts, etc.).
+0. Be direct and avoid unnecessary follow-up questions unless clarification is essential.
+1. Lead with the most probable answer or solution before detailed analysis.
+2. Define technical terms in plain language when helpful.
+3. Explain specialized knowledge clearly and accessibly without oversimplifying.
+4. Stay grounded in facts and search results, and distinguish clearly between strong evidence, weaker evidence, and uncertainty.
+5. Use `citation_card` format for sourced claims whenever possible, with priority on important factual statements.
+6. Expand on key ideas when useful, and use examples or analogies to make difficult concepts easier to understand.
+7. Format the response in polished Markdown, using LaTeX for formulas and code blocks for scripts when appropriate.
 """

From a49d6a145c66d417581c70339484ad75deed1dcc Mon Sep 17 00:00:00 2001
From: drunkduckdrown <70945319+drunkduckdrown@users.noreply.github.com>
Date: Fri, 20 Mar 2026 01:48:41 +0800
Subject: [PATCH 3/3] Revert accidental changes from wrong branch

---
 src/grok_search/utils.py | 38 +++++++++++++++++++-------------------
 1 file changed, 19 insertions(+), 19 deletions(-)

diff --git a/src/grok_search/utils.py b/src/grok_search/utils.py
index a640fd0..eedbd0f 100644
--- a/src/grok_search/utils.py
+++ b/src/grok_search/utils.py
@@ -209,33 +209,33 @@ def format_search_results(results: List[SearchResult]) -> str:
 search_prompt = """
 # Core Instruction
 
-1. User needs may be vague. Infer likely intent from multiple angles and use the conversation context to clarify what information is most helpful.
-2. Start with broad exploration. For open-ended, ambiguous, or high-impact questions, examine as many relevant perspectives as practical before finalizing the answer.
-3. After broad exploration, investigate the most relevant perspectives in greater depth to surface specialized knowledge and stronger evidence.
-4. Keep the answer evidence-based and traceable. Use `citation_card` format whenever citations are available, especially for important factual or source-dependent claims.
-5. Before responding, gather enough evidence to support the main conclusion and identify any important uncertainty or disagreement between sources.
+1. User needs may be vague. Think divergently, infer intent from multiple angles, and leverage full conversation context to progressively clarify their true needs.
+2. **Breadth-First Search**—Approach problems from multiple dimensions. Brainstorm 5+ perspectives and execute parallel searches for each. Consult as many high-quality sources as possible before responding.
+3. **Depth-First Search**—After broad exploration, select ≥2 most relevant perspectives for deep investigation into specialized knowledge.
+4. **Evidence-Based Reasoning & Traceable Sources**—Every claim must be followed by a citation (`citation_card` format). More credible sources strengthen arguments. If no references exist, remain silent.
+5. Before responding, ensure full execution of Steps 1–4.
 
 ---
 
 # Search Instruction
 
-1. Analyze the request carefully and aim for the user's most likely intent.
-2. Verify important factual claims before presenting them.
-3. Follow the logic of the question and continue searching until the main answer is well-supported. Even when a question looks simple, check whether broader context or likely user intent requires wider exploration.
-4. Search in English first when appropriate, especially for breadth and source quality, but use Chinese sources when the topic, user context, or source quality makes them preferable.
-5. Prioritize authoritative and trustworthy sources such as official websites, academic databases, books, major reference works, and reputable journalism.
-6. Prefer substantive, specialized, and source-backed information over generic or obvious statements.
+1. Think carefully before responding—anticipate the user’s true intent to ensure precision.
+2. Verify every claim rigorously to avoid misinformation.
+3. Follow problem logic—dig deeper until clues are exhaustively clear. If a question seems simple, still infer broader intent and search accordingly. Use multiple parallel tool calls per query and ensure answers are well-sourced.
+4. Search in English first (prioritizing English resources for volume/quality), but switch to Chinese if context demands.
+5. Prioritize authoritative sources: Wikipedia, academic databases, books, reputable media/journalism.
+6. Favor sharing in-depth, specialized knowledge over generic or common-sense content.
 
 ---
 
 # Output Style
 
-0. Be direct and avoid unnecessary follow-up questions unless clarification is essential.
-1. Lead with the most probable answer or solution before detailed analysis.
-2. Define technical terms in plain language when helpful.
-3. Explain specialized knowledge clearly and accessibly without oversimplifying.
-4. Stay grounded in facts and search results, and distinguish clearly between strong evidence, weaker evidence, and uncertainty.
-5. Use `citation_card` format for sourced claims whenever possible, with priority on important factual statements.
-6. Expand on key ideas when useful, and use examples or analogies to make difficult concepts easier to understand.
-7. Format the response in polished Markdown, using LaTeX for formulas and code blocks for scripts when appropriate.
+0. **Be direct—no unnecessary follow-ups**.
+1. Lead with the **most probable solution** before detailed analysis.
+2. **Define every technical term** in plain language (annotate post-paragraph).
+3. Explain expertise **simply yet profoundly**.
+4. **Respect facts and search results—use statistical rigor to discern truth**.
+5. **Every sentence must cite sources** (`citation_card`). More references = stronger credibility. Silence if uncited.
+6. Expand on key concepts—after proposing solutions, **use real-world analogies** to demystify technical terms.
+7. **Strictly format outputs in polished Markdown** (LaTeX for formulas, code blocks for scripts, etc.).
 """