From 0bbccff121500e7a2325a5cc0731ef78c6720317 Mon Sep 17 00:00:00 2001 From: drunkduckdrown <70945319+drunkduckdrown@users.noreply.github.com> Date: Fri, 20 Mar 2026 01:09:53 +0800 Subject: [PATCH 1/3] =?UTF-8?q?=E5=8E=BB=E9=99=A4Grok=E8=BF=94=E5=9B=9E?= =?UTF-8?q?=E7=9A=84=E6=A0=87=E7=AD=BE?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- src/grok_search/server.py | 5 +++-- src/grok_search/sources.py | 14 ++++++++++++++ 2 files changed, 17 insertions(+), 2 deletions(-) diff --git a/src/grok_search/server.py b/src/grok_search/server.py index 7754216..d46a5ae 100644 --- a/src/grok_search/server.py +++ b/src/grok_search/server.py @@ -15,13 +15,13 @@ from grok_search.providers.grok import GrokSearchProvider from grok_search.logger import log_info from grok_search.config import config - from grok_search.sources import SourcesCache, merge_sources, new_session_id, split_answer_and_sources + from grok_search.sources import SourcesCache, merge_sources, new_session_id, sanitize_answer_text, split_answer_and_sources from grok_search.planning import engine as planning_engine, _split_csv except ImportError: from .providers.grok import GrokSearchProvider from .logger import log_info from .config import config - from .sources import SourcesCache, merge_sources, new_session_id, split_answer_and_sources + from .sources import SourcesCache, merge_sources, new_session_id, sanitize_answer_text, split_answer_and_sources from .planning import engine as planning_engine, _split_csv import asyncio @@ -203,6 +203,7 @@ async def _safe_firecrawl() -> list[dict] | None: firecrawl_results = gathered[idx] answer, grok_sources = split_answer_and_sources(grok_result) + answer = sanitize_answer_text(answer) extra = _extra_results_to_sources(tavily_results, firecrawl_results) all_sources = merge_sources(grok_sources, extra) diff --git a/src/grok_search/sources.py b/src/grok_search/sources.py index 63386e2..6df7ad1 100644 --- a/src/grok_search/sources.py +++ b/src/grok_search/sources.py @@ -23,6 +23,10 @@ _SOURCES_FUNCTION_PATTERN = re.compile( r"(?im)(^|\n)\s*(sources|source|citations|citation|references|reference|citation_card|source_cards|source_card)\s*\(" ) +_THINK_TAG_PATTERN = re.compile( + r"]*>.*?", + re.IGNORECASE | re.DOTALL, +) def new_session_id() -> str: @@ -67,6 +71,16 @@ def merge_sources(*source_lists: list[dict]) -> list[dict]: return merged +def sanitize_answer_text(text: str) -> str: + """Remove model reasoning tags from answer text while preserving content.""" + raw = (text or "").strip() + if not raw: + return "" + cleaned = _THINK_TAG_PATTERN.sub("", raw) + cleaned = re.sub(r"\n{3,}", "\n\n", cleaned).strip() + return cleaned + + def split_answer_and_sources(text: str) -> tuple[str, list[dict]]: raw = (text or "").strip() if not raw: From e06808f1b78a1d5ecda418c93dc0f1be3905b005 Mon Sep 17 00:00:00 2001 From: drunkduckdrown <70945319+drunkduckdrown@users.noreply.github.com> Date: Fri, 20 Mar 2026 01:39:11 +0800 Subject: [PATCH 2/3] =?UTF-8?q?=E5=BC=B1=E5=8C=96search=5Fprompt=EF=BC=8C?= =?UTF-8?q?=E9=81=BF=E5=85=8D=E8=A7=A6=E5=8F=91Grok=E8=B6=8A=E7=8B=B1?= =?UTF-8?q?=E6=8A=B5=E6=8A=97?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- src/grok_search/utils.py | 38 +++++++++++++++++++------------------- 1 file changed, 19 insertions(+), 19 deletions(-) diff --git a/src/grok_search/utils.py b/src/grok_search/utils.py index eedbd0f..a640fd0 100644 --- a/src/grok_search/utils.py +++ b/src/grok_search/utils.py @@ -209,33 +209,33 @@ def format_search_results(results: List[SearchResult]) -> str: search_prompt = """ # Core Instruction -1. User needs may be vague. Think divergently, infer intent from multiple angles, and leverage full conversation context to progressively clarify their true needs. -2. **Breadth-First Search**—Approach problems from multiple dimensions. Brainstorm 5+ perspectives and execute parallel searches for each. Consult as many high-quality sources as possible before responding. -3. **Depth-First Search**—After broad exploration, select ≥2 most relevant perspectives for deep investigation into specialized knowledge. -4. **Evidence-Based Reasoning & Traceable Sources**—Every claim must be followed by a citation (`citation_card` format). More credible sources strengthen arguments. If no references exist, remain silent. -5. Before responding, ensure full execution of Steps 1–4. +1. User needs may be vague. Infer likely intent from multiple angles and use the conversation context to clarify what information is most helpful. +2. Start with broad exploration. For open-ended, ambiguous, or high-impact questions, examine as many relevant perspectives as practical before finalizing the answer. +3. After broad exploration, investigate the most relevant perspectives in greater depth to surface specialized knowledge and stronger evidence. +4. Keep the answer evidence-based and traceable. Use `citation_card` format whenever citations are available, especially for important factual or source-dependent claims. +5. Before responding, gather enough evidence to support the main conclusion and identify any important uncertainty or disagreement between sources. --- # Search Instruction -1. Think carefully before responding—anticipate the user’s true intent to ensure precision. -2. Verify every claim rigorously to avoid misinformation. -3. Follow problem logic—dig deeper until clues are exhaustively clear. If a question seems simple, still infer broader intent and search accordingly. Use multiple parallel tool calls per query and ensure answers are well-sourced. -4. Search in English first (prioritizing English resources for volume/quality), but switch to Chinese if context demands. -5. Prioritize authoritative sources: Wikipedia, academic databases, books, reputable media/journalism. -6. Favor sharing in-depth, specialized knowledge over generic or common-sense content. +1. Analyze the request carefully and aim for the user's most likely intent. +2. Verify important factual claims before presenting them. +3. Follow the logic of the question and continue searching until the main answer is well-supported. Even when a question looks simple, check whether broader context or likely user intent requires wider exploration. +4. Search in English first when appropriate, especially for breadth and source quality, but use Chinese sources when the topic, user context, or source quality makes them preferable. +5. Prioritize authoritative and trustworthy sources such as official websites, academic databases, books, major reference works, and reputable journalism. +6. Prefer substantive, specialized, and source-backed information over generic or obvious statements. --- # Output Style -0. **Be direct—no unnecessary follow-ups**. -1. Lead with the **most probable solution** before detailed analysis. -2. **Define every technical term** in plain language (annotate post-paragraph). -3. Explain expertise **simply yet profoundly**. -4. **Respect facts and search results—use statistical rigor to discern truth**. -5. **Every sentence must cite sources** (`citation_card`). More references = stronger credibility. Silence if uncited. -6. Expand on key concepts—after proposing solutions, **use real-world analogies** to demystify technical terms. -7. **Strictly format outputs in polished Markdown** (LaTeX for formulas, code blocks for scripts, etc.). +0. Be direct and avoid unnecessary follow-up questions unless clarification is essential. +1. Lead with the most probable answer or solution before detailed analysis. +2. Define technical terms in plain language when helpful. +3. Explain specialized knowledge clearly and accessibly without oversimplifying. +4. Stay grounded in facts and search results, and distinguish clearly between strong evidence, weaker evidence, and uncertainty. +5. Use `citation_card` format for sourced claims whenever possible, with priority on important factual statements. +6. Expand on key ideas when useful, and use examples or analogies to make difficult concepts easier to understand. +7. Format the response in polished Markdown, using LaTeX for formulas and code blocks for scripts when appropriate. """ From a49d6a145c66d417581c70339484ad75deed1dcc Mon Sep 17 00:00:00 2001 From: drunkduckdrown <70945319+drunkduckdrown@users.noreply.github.com> Date: Fri, 20 Mar 2026 01:48:41 +0800 Subject: [PATCH 3/3] Revert accidental changes from wrong branch --- src/grok_search/utils.py | 38 +++++++++++++++++++------------------- 1 file changed, 19 insertions(+), 19 deletions(-) diff --git a/src/grok_search/utils.py b/src/grok_search/utils.py index a640fd0..eedbd0f 100644 --- a/src/grok_search/utils.py +++ b/src/grok_search/utils.py @@ -209,33 +209,33 @@ def format_search_results(results: List[SearchResult]) -> str: search_prompt = """ # Core Instruction -1. User needs may be vague. Infer likely intent from multiple angles and use the conversation context to clarify what information is most helpful. -2. Start with broad exploration. For open-ended, ambiguous, or high-impact questions, examine as many relevant perspectives as practical before finalizing the answer. -3. After broad exploration, investigate the most relevant perspectives in greater depth to surface specialized knowledge and stronger evidence. -4. Keep the answer evidence-based and traceable. Use `citation_card` format whenever citations are available, especially for important factual or source-dependent claims. -5. Before responding, gather enough evidence to support the main conclusion and identify any important uncertainty or disagreement between sources. +1. User needs may be vague. Think divergently, infer intent from multiple angles, and leverage full conversation context to progressively clarify their true needs. +2. **Breadth-First Search**—Approach problems from multiple dimensions. Brainstorm 5+ perspectives and execute parallel searches for each. Consult as many high-quality sources as possible before responding. +3. **Depth-First Search**—After broad exploration, select ≥2 most relevant perspectives for deep investigation into specialized knowledge. +4. **Evidence-Based Reasoning & Traceable Sources**—Every claim must be followed by a citation (`citation_card` format). More credible sources strengthen arguments. If no references exist, remain silent. +5. Before responding, ensure full execution of Steps 1–4. --- # Search Instruction -1. Analyze the request carefully and aim for the user's most likely intent. -2. Verify important factual claims before presenting them. -3. Follow the logic of the question and continue searching until the main answer is well-supported. Even when a question looks simple, check whether broader context or likely user intent requires wider exploration. -4. Search in English first when appropriate, especially for breadth and source quality, but use Chinese sources when the topic, user context, or source quality makes them preferable. -5. Prioritize authoritative and trustworthy sources such as official websites, academic databases, books, major reference works, and reputable journalism. -6. Prefer substantive, specialized, and source-backed information over generic or obvious statements. +1. Think carefully before responding—anticipate the user’s true intent to ensure precision. +2. Verify every claim rigorously to avoid misinformation. +3. Follow problem logic—dig deeper until clues are exhaustively clear. If a question seems simple, still infer broader intent and search accordingly. Use multiple parallel tool calls per query and ensure answers are well-sourced. +4. Search in English first (prioritizing English resources for volume/quality), but switch to Chinese if context demands. +5. Prioritize authoritative sources: Wikipedia, academic databases, books, reputable media/journalism. +6. Favor sharing in-depth, specialized knowledge over generic or common-sense content. --- # Output Style -0. Be direct and avoid unnecessary follow-up questions unless clarification is essential. -1. Lead with the most probable answer or solution before detailed analysis. -2. Define technical terms in plain language when helpful. -3. Explain specialized knowledge clearly and accessibly without oversimplifying. -4. Stay grounded in facts and search results, and distinguish clearly between strong evidence, weaker evidence, and uncertainty. -5. Use `citation_card` format for sourced claims whenever possible, with priority on important factual statements. -6. Expand on key ideas when useful, and use examples or analogies to make difficult concepts easier to understand. -7. Format the response in polished Markdown, using LaTeX for formulas and code blocks for scripts when appropriate. +0. **Be direct—no unnecessary follow-ups**. +1. Lead with the **most probable solution** before detailed analysis. +2. **Define every technical term** in plain language (annotate post-paragraph). +3. Explain expertise **simply yet profoundly**. +4. **Respect facts and search results—use statistical rigor to discern truth**. +5. **Every sentence must cite sources** (`citation_card`). More references = stronger credibility. Silence if uncited. +6. Expand on key concepts—after proposing solutions, **use real-world analogies** to demystify technical terms. +7. **Strictly format outputs in polished Markdown** (LaTeX for formulas, code blocks for scripts, etc.). """