Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
4 changes: 4 additions & 0 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -229,6 +229,10 @@ cp env.template .env
# Edit .env and set:
# - LLM_API_KEY (for memory extraction)
# - VECTORIZE_API_KEY (for embedding/rerank)
#
# Supported LLM backends: OpenAI, Anthropic Claude, Google Gemini,
# MiniMax, Azure OpenAI, Ollama, or any OpenAI-compatible API.
# See src/config/llm_backends.yaml for full configuration details.

# 5. Start server
uv run python src/run.py
Expand Down
4 changes: 4 additions & 0 deletions README.zh.md
Original file line number Diff line number Diff line change
Expand Up @@ -229,6 +229,10 @@ cp env.template .env
# 编辑 .env 并设置:
# - LLM_API_KEY(用于记忆提取)
# - VECTORIZE_API_KEY(用于向量化 / rerank)
#
# 支持的 LLM 后端:OpenAI、Anthropic Claude、Google Gemini、
# MiniMax、Azure OpenAI、Ollama 或任何 OpenAI 兼容 API。
# 详细配置请参阅 src/config/llm_backends.yaml。

# 5. 启动服务
uv run python src/run.py
Expand Down
6 changes: 6 additions & 0 deletions env.template
Original file line number Diff line number Diff line change
Expand Up @@ -30,6 +30,12 @@ LLM_MAX_TOKENS=32768
# When using Qwen3 via OpenRouter, consider setting to "cerebras"
# LLM_OPENROUTER_PROVIDER=cerebras

# ===================
# MiniMax Configuration (optional, for using MiniMax as LLM backend)
# ===================

# MINIMAX_API_KEY=your-minimax-api-key-here

# ===================
# Vectorize (Embedding) Service Configuration
# ===================
Expand Down
15 changes: 15 additions & 0 deletions src/config/llm_backends.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -59,6 +59,21 @@ llm_backends:
timeout: 600 # Increase to 10 minutes, suitable for time-consuming tasks like paper information extraction
max_retries: 3

# MiniMax configuration
minimax:
name: "MiniMax"
provider: "minimax"
base_url: "https://api.minimax.io/v1"
api_key: ""
models:
- "MiniMax-M2.7"
- "MiniMax-M2.7-highspeed"
- "MiniMax-M2.5"
- "MiniMax-M2.5-highspeed"
model: "MiniMax-M2.7"
timeout: 600
max_retries: 3

# Local Ollama configuration
ollama:
name: "Ollama Local"
Expand Down
115 changes: 115 additions & 0 deletions src/core/component/llm/llm_adapter/minimax_adapter.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,115 @@
import re
from typing import Dict, Any, List, Union, AsyncGenerator
import os
import openai
from core.component.llm.llm_adapter.completion import (
ChatCompletionRequest,
ChatCompletionResponse,
)
from core.component.llm.llm_adapter.llm_backend_adapter import LLMBackendAdapter
from core.constants.errors import ErrorMessage


class MiniMaxAdapter(LLMBackendAdapter):
"""MiniMax API adapter using OpenAI-compatible interface.

MiniMax provides an OpenAI-compatible API at https://api.minimax.io/v1.
This adapter handles MiniMax-specific behaviors:
- Temperature clamping to [0.01, 1.0] range
- Stripping <think>...</think> tags from reasoning model responses
- Auto-detection of MINIMAX_API_KEY environment variable
"""

# MiniMax API requires temperature in (0.0, 1.0] for most models,
# but temperature=0 is now accepted. We clamp to [0.01, 1.0] for safety
# with older model versions.
MIN_TEMPERATURE = 0.01
MAX_TEMPERATURE = 1.0

# Pattern to strip thinking tags from reasoning model output
_THINK_TAG_PATTERN = re.compile(
r"<think>.*?</think>\s*", flags=re.DOTALL
)

def __init__(self, config: Dict[str, Any]):
self.config = config
self.api_key = config.get("api_key") or os.getenv("MINIMAX_API_KEY")
self.base_url = config.get(
"base_url", "https://api.minimax.io/v1"
)
self.timeout = config.get("timeout", 600)

if not self.api_key:
raise ValueError(ErrorMessage.INVALID_PARAMETER.value)

self.client = openai.AsyncOpenAI(
api_key=self.api_key,
base_url=self.base_url,
timeout=self.timeout,
)

@classmethod
def _clamp_temperature(cls, temperature: float | None) -> float | None:
"""Clamp temperature to MiniMax's accepted range."""
if temperature is None:
return None
return max(cls.MIN_TEMPERATURE, min(cls.MAX_TEMPERATURE, temperature))

@classmethod
def _strip_think_tags(cls, text: str) -> str:
"""Strip <think>...</think> blocks from model output."""
return cls._THINK_TAG_PATTERN.sub("", text).strip()

async def chat_completion(
self, request: ChatCompletionRequest
) -> Union[ChatCompletionResponse, AsyncGenerator[str, None]]:
"""Perform chat completion via MiniMax OpenAI-compatible API."""
if not request.model:
raise ValueError(ErrorMessage.INVALID_PARAMETER.value)

params = request.to_dict()
client_params = {
"model": params.get("model"),
"messages": params.get("messages"),
"temperature": self._clamp_temperature(params.get("temperature")),
"max_tokens": params.get("max_tokens"),
"top_p": params.get("top_p"),
"frequency_penalty": params.get("frequency_penalty"),
"presence_penalty": params.get("presence_penalty"),
"stream": params.get("stream", False),
}
final_params = {k: v for k, v in client_params.items() if v is not None}

try:
if final_params.get("stream"):
async def stream_gen():
response_stream = await self.client.chat.completions.create(
**final_params
)
async for chunk in response_stream:
content = getattr(
chunk.choices[0].delta, "content", None
)
if content:
yield self._strip_think_tags(content)

return stream_gen()
else:
response = await self.client.chat.completions.create(
**final_params
)
resp_dict = response.model_dump()
# Strip think tags from non-streaming response
for choice in resp_dict.get("choices", []):
msg = choice.get("message", {})
if msg.get("content"):
msg["content"] = self._strip_think_tags(msg["content"])
return ChatCompletionResponse.from_dict(resp_dict)
except Exception as e:
raise RuntimeError(
f"MiniMax chat completion request failed: {e}"
)

def get_available_models(self) -> List[str]:
"""Get available MiniMax model list."""
return self.config.get("models", [])
3 changes: 3 additions & 0 deletions src/core/component/openai_compatible_client.py
Original file line number Diff line number Diff line change
Expand Up @@ -15,6 +15,7 @@
from core.component.llm.llm_adapter.openai_adapter import OpenAIAdapter
from core.component.llm.llm_adapter.anthropic_adapter import AnthropicAdapter
from core.component.llm.llm_adapter.gemini_adapter import GeminiAdapter
from core.component.llm.llm_adapter.minimax_adapter import MiniMaxAdapter

logger = get_logger(__name__)

Expand Down Expand Up @@ -75,6 +76,8 @@ async def _get_adapter(self, backend_name: str) -> LLMBackendAdapter:
adapter = AnthropicAdapter(backend_config)
elif provider == "gemini":
adapter = GeminiAdapter(backend_config)
elif provider == "minimax":
adapter = MiniMaxAdapter(backend_config)
else:
raise ValueError(f"Unsupported provider type: {provider}")

Expand Down
Loading