diff --git a/src/grok_search/server.py b/src/grok_search/server.py index 7754216..d46a5ae 100644 --- a/src/grok_search/server.py +++ b/src/grok_search/server.py @@ -15,13 +15,13 @@ from grok_search.providers.grok import GrokSearchProvider from grok_search.logger import log_info from grok_search.config import config - from grok_search.sources import SourcesCache, merge_sources, new_session_id, split_answer_and_sources + from grok_search.sources import SourcesCache, merge_sources, new_session_id, sanitize_answer_text, split_answer_and_sources from grok_search.planning import engine as planning_engine, _split_csv except ImportError: from .providers.grok import GrokSearchProvider from .logger import log_info from .config import config - from .sources import SourcesCache, merge_sources, new_session_id, split_answer_and_sources + from .sources import SourcesCache, merge_sources, new_session_id, sanitize_answer_text, split_answer_and_sources from .planning import engine as planning_engine, _split_csv import asyncio @@ -203,6 +203,7 @@ async def _safe_firecrawl() -> list[dict] | None: firecrawl_results = gathered[idx] answer, grok_sources = split_answer_and_sources(grok_result) + answer = sanitize_answer_text(answer) extra = _extra_results_to_sources(tavily_results, firecrawl_results) all_sources = merge_sources(grok_sources, extra) diff --git a/src/grok_search/sources.py b/src/grok_search/sources.py index 63386e2..6df7ad1 100644 --- a/src/grok_search/sources.py +++ b/src/grok_search/sources.py @@ -23,6 +23,10 @@ _SOURCES_FUNCTION_PATTERN = re.compile( r"(?im)(^|\n)\s*(sources|source|citations|citation|references|reference|citation_card|source_cards|source_card)\s*\(" ) +_THINK_TAG_PATTERN = re.compile( + r"]*>.*?", + re.IGNORECASE | re.DOTALL, +) def new_session_id() -> str: @@ -67,6 +71,16 @@ def merge_sources(*source_lists: list[dict]) -> list[dict]: return merged +def sanitize_answer_text(text: str) -> str: + """Remove model reasoning tags from answer text while preserving content.""" + raw = (text or "").strip() + if not raw: + return "" + cleaned = _THINK_TAG_PATTERN.sub("", raw) + cleaned = re.sub(r"\n{3,}", "\n\n", cleaned).strip() + return cleaned + + def split_answer_and_sources(text: str) -> tuple[str, list[dict]]: raw = (text or "").strip() if not raw: