diff --git a/packages/cognitive/src/features/providers/anthropic/anthropic.config.ts b/packages/cognitive/src/features/providers/anthropic/anthropic.config.ts index 70853f9..4f41076 100644 --- a/packages/cognitive/src/features/providers/anthropic/anthropic.config.ts +++ b/packages/cognitive/src/features/providers/anthropic/anthropic.config.ts @@ -4,8 +4,128 @@ export const ANTHROPIC_CONFIG: ProviderConfig = { id: 'anthropic', name: 'Anthropic', description: 'Claude models focused on safety and helpfulness', - defaultModel: 'claude-sonnet-4-5-20250929', + defaultModel: 'claude-sonnet-4-6', models: [ + { + id: 'claude-opus-4-6', + displayName: 'Claude Opus 4.6', + aliases: ['claude-opus-4-6'], + description: + "Claude Opus 4.6 is Anthropic's most capable model, featuring adaptive reasoning and exceptional performance across complex tasks. It delivers state-of-the-art results on coding, analysis, and agentic workflows, with a 1M token context window.", + lifecycle: 'production', + health: 'healthy', + releaseDate: '2026-01-20', + capabilities: { + supportsText: true, + supportsImages: true, + supportsTools: true, + supportsJsonMode: false, + supportsSystemMessages: true, + supportsStreaming: true, + supportsReasoning: true, + supportsStopSequence: true, + supportsTemperature: true, + }, + limits: { + maxInputTokens: 1_000_000, + maxOutputTokens: 128_000, + }, + cost: { + inputCostPer1mTokens: 5, + outputCostPer1mTokens: 25, + }, + tags: ['recommended', 'reasoning', 'agents', 'vision', 'general-purpose', 'coding'], + }, + { + id: 'claude-sonnet-4-6', + displayName: 'Claude Sonnet 4.6', + aliases: ['claude-sonnet-4-6'], + description: + "Claude Sonnet 4.6 is Anthropic's latest flagship Sonnet model, offering top-tier performance for agentic and coding workflows with a 1M token context window. It delivers state-of-the-art results on coding benchmarks and reasoning tasks.", + lifecycle: 'production', + health: 'healthy', + releaseDate: '2026-01-20', + capabilities: { + supportsText: true, + supportsImages: true, + supportsTools: true, + supportsJsonMode: false, + supportsSystemMessages: true, + supportsStreaming: true, + supportsReasoning: true, + supportsStopSequence: true, + supportsTemperature: true, + }, + limits: { + maxInputTokens: 1_000_000, + maxOutputTokens: 64_000, + }, + cost: { + inputCostPer1mTokens: 3, + outputCostPer1mTokens: 15, + }, + tags: ['recommended', 'reasoning', 'agents', 'vision', 'general-purpose', 'coding'], + }, + { + id: 'claude-opus-4-5-20251101', + displayName: 'Claude Opus 4.5', + aliases: ['claude-opus-4-5'], + description: + "Claude Opus 4.5 is Anthropic's high-performance Opus model, excelling in complex reasoning, coding, and long-context tasks.", + lifecycle: 'production', + health: 'healthy', + releaseDate: '2025-11-01', + capabilities: { + supportsText: true, + supportsImages: true, + supportsTools: true, + supportsJsonMode: false, + supportsSystemMessages: true, + supportsStreaming: true, + supportsReasoning: true, + supportsStopSequence: true, + supportsTemperature: true, + }, + limits: { + maxInputTokens: 200_000, + maxOutputTokens: 64_000, + }, + cost: { + inputCostPer1mTokens: 5, + outputCostPer1mTokens: 25, + }, + tags: ['reasoning', 'agents', 'vision', 'general-purpose', 'coding'], + }, + { + id: 'claude-opus-4-1-20250805', + displayName: 'Claude Opus 4.1', + aliases: ['claude-opus-4-1'], + description: + "Claude Opus 4.1 is a powerful Opus-tier model optimized for complex tasks requiring extended reasoning and high accuracy.", + lifecycle: 'production', + health: 'healthy', + releaseDate: '2025-08-05', + capabilities: { + supportsText: true, + supportsImages: true, + supportsTools: true, + supportsJsonMode: false, + supportsSystemMessages: true, + supportsStreaming: true, + supportsReasoning: true, + supportsStopSequence: true, + supportsTemperature: true, + }, + limits: { + maxInputTokens: 200_000, + maxOutputTokens: 32_000, + }, + cost: { + inputCostPer1mTokens: 15, + outputCostPer1mTokens: 75, + }, + tags: ['reasoning', 'agents', 'vision', 'general-purpose', 'coding'], + }, { id: 'claude-sonnet-4-5-20250929', displayName: 'Claude Sonnet 4.5', @@ -36,6 +156,36 @@ export const ANTHROPIC_CONFIG: ProviderConfig = { }, tags: ['recommended', 'reasoning', 'agents', 'vision', 'general-purpose', 'coding'], }, + { + id: 'claude-opus-4-20250514', + displayName: 'Claude Opus 4', + aliases: ['claude-opus-4-0'], + description: + "Claude Opus 4 is Anthropic's frontier Opus model from May 2025, designed for complex reasoning, advanced coding, and long-horizon agentic tasks.", + lifecycle: 'production', + health: 'healthy', + releaseDate: '2025-05-14', + capabilities: { + supportsText: true, + supportsImages: true, + supportsTools: true, + supportsJsonMode: false, + supportsSystemMessages: true, + supportsStreaming: true, + supportsReasoning: true, + supportsStopSequence: true, + supportsTemperature: true, + }, + limits: { + maxInputTokens: 200_000, + maxOutputTokens: 32_000, + }, + cost: { + inputCostPer1mTokens: 15, + outputCostPer1mTokens: 75, + }, + tags: ['reasoning', 'agents', 'vision', 'general-purpose', 'coding'], + }, { id: 'claude-sonnet-4-20250514', displayName: 'Claude Sonnet 4', @@ -320,9 +470,11 @@ export const ANTHROPIC_CONFIG: ProviderConfig = { displayName: 'Claude 3 Haiku', description: "Claude 3 Haiku is Anthropic's fastest and most compact model for near-instant responsiveness. Quick and accurate targeted performance.", - lifecycle: 'production', + lifecycle: 'deprecated', health: 'healthy', releaseDate: '2024-03-07', + deprecationDate: '2026-04-19', + replacementModels: ['claude-haiku-4-5-20251001'], capabilities: { supportsText: true, supportsImages: true, diff --git a/packages/cognitive/src/features/providers/cerebras/cerebras.config.ts b/packages/cognitive/src/features/providers/cerebras/cerebras.config.ts index 665e691..b7a3d18 100644 --- a/packages/cognitive/src/features/providers/cerebras/cerebras.config.ts +++ b/packages/cognitive/src/features/providers/cerebras/cerebras.config.ts @@ -6,6 +6,62 @@ export const CEREBRAS_CONFIG: ProviderConfig = { description: 'Cerebras AI models for high-performance language processing', defaultModel: 'gpt-oss-120b', models: [ + { + id: 'qwen-3-235b-a22b-instruct-2507', + displayName: 'Qwen3 235B A22B Instruct', + description: + 'Qwen3 235B A22B Instruct is a large mixture-of-experts reasoning model from Alibaba with 235B parameters and 22B active per forward pass. It delivers strong performance on reasoning, code generation, and agentic tasks.', + capabilities: { + supportsText: true, + supportsImages: false, + supportsTools: true, + supportsJsonMode: true, + supportsSystemMessages: true, + supportsStreaming: true, + supportsReasoning: true, + supportsStopSequence: true, + supportsTemperature: true, + }, + limits: { + maxInputTokens: 131_000, + maxOutputTokens: 16_000, + }, + cost: { + inputCostPer1mTokens: 0.8, + outputCostPer1mTokens: 1.6, + }, + tags: ['preview', 'general-purpose', 'reasoning'], + releaseDate: '2025-07-01', + lifecycle: 'preview', + }, + { + id: 'zai-glm-4.7', + displayName: 'Z.ai GLM 4.7', + description: + 'Z.ai GLM 4.7 is a large-scale language model from Zhipu AI with 355B parameters, delivering strong performance across reasoning, coding, and multilingual tasks.', + capabilities: { + supportsText: true, + supportsImages: false, + supportsTools: true, + supportsJsonMode: true, + supportsSystemMessages: true, + supportsStreaming: true, + supportsReasoning: true, + supportsStopSequence: true, + supportsTemperature: true, + }, + limits: { + maxInputTokens: 131_000, + maxOutputTokens: 16_000, + }, + cost: { + inputCostPer1mTokens: 0.39, + outputCostPer1mTokens: 1.75, + }, + tags: ['preview', 'general-purpose', 'reasoning'], + releaseDate: '2025-08-01', + lifecycle: 'preview', + }, { id: 'gpt-oss-120b', displayName: 'GPT-OSS 120B (Preview)', diff --git a/packages/cognitive/src/features/providers/fireworks-ai/fireworks.config.ts b/packages/cognitive/src/features/providers/fireworks-ai/fireworks.config.ts index c2a2d42..fdf0101 100644 --- a/packages/cognitive/src/features/providers/fireworks-ai/fireworks.config.ts +++ b/packages/cognitive/src/features/providers/fireworks-ai/fireworks.config.ts @@ -6,6 +6,90 @@ export const FIREWORKS_CONFIG: ProviderConfig = { description: 'Fireworks AI provides fast inference for various language models', defaultModel: 'accounts/fireworks/models/llama-v3p1-70b-instruct', models: [ + { + id: 'deepseek-v3p2', + internalModelId: 'accounts/fireworks/models/deepseek-v3p2', + displayName: 'DeepSeek V3.2', + description: + 'DeepSeek V3.2 is the latest iteration of the DeepSeek V3 series, a 685B-parameter mixture-of-experts model with improved reasoning, instruction following, and coding performance.', + capabilities: { + supportsText: true, + supportsImages: false, + supportsTools: true, + supportsJsonMode: true, + supportsSystemMessages: true, + supportsStreaming: true, + supportsReasoning: false, + supportsStopSequence: true, + supportsTemperature: true, + }, + limits: { + maxInputTokens: 163_840, + maxOutputTokens: 16_384, + }, + cost: { + inputCostPer1mTokens: 0.56, + outputCostPer1mTokens: 1.68, + }, + tags: ['recommended', 'general-purpose', 'coding'], + lifecycle: 'production', + }, + { + id: 'deepseek-v3p1', + internalModelId: 'accounts/fireworks/models/deepseek-v3p1', + displayName: 'DeepSeek V3.1', + description: + 'DeepSeek V3.1 is an updated version of the DeepSeek V3 series with enhanced reasoning and instruction-following capabilities.', + capabilities: { + supportsText: true, + supportsImages: false, + supportsTools: true, + supportsJsonMode: true, + supportsSystemMessages: true, + supportsStreaming: true, + supportsReasoning: false, + supportsStopSequence: true, + supportsTemperature: true, + }, + limits: { + maxInputTokens: 163_840, + maxOutputTokens: 16_384, + }, + cost: { + inputCostPer1mTokens: 0.56, + outputCostPer1mTokens: 1.68, + }, + tags: ['general-purpose', 'coding'], + lifecycle: 'production', + }, + { + id: 'kimi-k2-instruct-0905', + internalModelId: 'accounts/fireworks/models/kimi-k2-instruct-0905', + displayName: 'Kimi K2 Instruct 0905', + description: + 'Kimi K2 Instruct 0905 from Moonshot AI is an updated version optimized for agentic tasks, complex reasoning, and coding with a large 262k context window.', + capabilities: { + supportsText: true, + supportsImages: false, + supportsTools: true, + supportsJsonMode: true, + supportsSystemMessages: true, + supportsStreaming: true, + supportsReasoning: true, + supportsStopSequence: true, + supportsTemperature: true, + }, + limits: { + maxInputTokens: 262_144, + maxOutputTokens: 16_384, + }, + cost: { + inputCostPer1mTokens: 0.6, + outputCostPer1mTokens: 2.5, + }, + tags: ['general-purpose', 'reasoning', 'coding'], + lifecycle: 'production', + }, { id: 'gpt-oss-20b', internalModelId: 'accounts/fireworks/models/gpt-oss-20b', @@ -109,12 +193,12 @@ export const FIREWORKS_CONFIG: ProviderConfig = { supportsTemperature: true, }, limits: { - maxInputTokens: 160_000, + maxInputTokens: 163_840, maxOutputTokens: 16_384, }, cost: { - inputCostPer1mTokens: 0.9, - outputCostPer1mTokens: 0.9, + inputCostPer1mTokens: 0.56, + outputCostPer1mTokens: 1.68, }, tags: ['recommended', 'general-purpose'], lifecycle: 'production', diff --git a/packages/cognitive/src/features/providers/google-ai/google-ai.config.ts b/packages/cognitive/src/features/providers/google-ai/google-ai.config.ts index c5d8e21..350b4fe 100644 --- a/packages/cognitive/src/features/providers/google-ai/google-ai.config.ts +++ b/packages/cognitive/src/features/providers/google-ai/google-ai.config.ts @@ -6,6 +6,64 @@ export const GOOGLE_AI_CONFIG: ProviderConfig = { description: 'Gemini models from Google with multimodal capabilities', defaultModel: 'gemini-2.5-flash', models: [ + { + id: 'gemini-3.1-pro', + internalModelId: 'gemini-3.1-pro-preview', + displayName: 'Gemini 3.1 Pro', + description: + "Google's most advanced model with multimodal understanding, agentic capabilities, and state-of-the-art reasoning. Features tiered pricing for prompts over 200k tokens.", + capabilities: { + supportsText: true, + supportsImages: true, + supportsTools: true, + supportsJsonMode: true, + supportsSystemMessages: true, + supportsStreaming: true, + supportsReasoning: true, + supportsStopSequence: true, + supportsTemperature: true, + }, + limits: { + maxInputTokens: 1_048_576, + maxOutputTokens: 65_536, + }, + cost: { + inputCostPer1mTokens: 2, + outputCostPer1mTokens: 12, + }, + tags: ['reasoning', 'agents', 'general-purpose', 'vision'], + releaseDate: '2026-01-15', + lifecycle: 'preview', + }, + { + id: 'gemini-3.1-flash-lite', + internalModelId: 'gemini-3.1-flash-lite-preview', + displayName: 'Gemini 3.1 Flash Lite', + description: + "Google's most cost-effective Gemini 3.1 model, optimized for high-volume and latency-sensitive use cases with strong reasoning capabilities.", + capabilities: { + supportsText: true, + supportsImages: true, + supportsTools: true, + supportsJsonMode: true, + supportsSystemMessages: true, + supportsStreaming: true, + supportsReasoning: true, + supportsStopSequence: true, + supportsTemperature: true, + }, + limits: { + maxInputTokens: 1_048_576, + maxOutputTokens: 65_536, + }, + cost: { + inputCostPer1mTokens: 0.25, + outputCostPer1mTokens: 1.5, + }, + tags: ['low-cost', 'reasoning', 'general-purpose', 'vision'], + releaseDate: '2026-01-15', + lifecycle: 'preview', + }, { id: 'gemini-3-pro', internalModelId: 'gemini-3-pro-preview', @@ -31,9 +89,11 @@ export const GOOGLE_AI_CONFIG: ProviderConfig = { inputCostPer1mTokens: 2, outputCostPer1mTokens: 12, }, - tags: ['reasoning', 'agents', 'general-purpose', 'vision'], + tags: ['deprecated', 'reasoning', 'agents', 'general-purpose', 'vision'], releaseDate: '2025-11-18', - lifecycle: 'preview', + lifecycle: 'discontinued', + discontinuedDate: '2026-03-09', + replacementModels: ['gemini-3.1-pro'], }, { id: 'gemini-3-flash', @@ -119,6 +179,34 @@ export const GOOGLE_AI_CONFIG: ProviderConfig = { releaseDate: '2025-01-15', lifecycle: 'production', }, + { + id: 'gemini-2.5-flash-lite', + displayName: 'Gemini 2.5 Flash Lite', + description: + "Google's most cost-effective stable Gemini model, offering fast inference and solid reasoning capabilities for high-volume applications.", + capabilities: { + supportsText: true, + supportsImages: true, + supportsTools: true, + supportsJsonMode: true, + supportsSystemMessages: true, + supportsStreaming: true, + supportsReasoning: true, + supportsStopSequence: true, + supportsTemperature: true, + }, + limits: { + maxInputTokens: 1_048_576, + maxOutputTokens: 65_536, + }, + cost: { + inputCostPer1mTokens: 0.1, + outputCostPer1mTokens: 0.4, + }, + tags: ['low-cost', 'reasoning', 'general-purpose', 'vision'], + releaseDate: '2025-06-01', + lifecycle: 'production', + }, { id: 'gemini-2.0-flash', displayName: 'Gemini 2.0 Flash', @@ -144,9 +232,11 @@ export const GOOGLE_AI_CONFIG: ProviderConfig = { inputCostPer1mTokens: 0.1, outputCostPer1mTokens: 0.4, }, - tags: ['low-cost', 'general-purpose', 'vision'], + tags: ['deprecated', 'low-cost', 'general-purpose', 'vision'], releaseDate: '2024-12-11', - lifecycle: 'production', + lifecycle: 'deprecated', + deprecationDate: '2026-01-01', + replacementModels: ['gemini-2.5-flash'], }, ], } diff --git a/packages/cognitive/src/features/providers/groq/groq.config.ts b/packages/cognitive/src/features/providers/groq/groq.config.ts index 0048857..d2f8dbe 100644 --- a/packages/cognitive/src/features/providers/groq/groq.config.ts +++ b/packages/cognitive/src/features/providers/groq/groq.config.ts @@ -6,6 +6,93 @@ export const GROQ_CONFIG: ProviderConfig = { description: 'Groq provides fast inference for OpenAI GPT OSS models', defaultModel: 'llama-3.3-70b-versatile', models: [ + { + id: 'llama-4-scout-17b-16e-instruct', + displayName: 'Llama 4 Scout 17B (Preview)', + internalModelId: 'meta-llama/llama-4-scout-17b-16e-instruct', + description: + 'Llama 4 Scout 17B is a mixture-of-experts model from Meta with 16 experts per forward pass. Supports multimodal input (text and image) and multilingual output across 12 languages.', + capabilities: { + supportsText: true, + supportsImages: true, + supportsSystemMessages: true, + supportsTools: true, + supportsJsonMode: true, + supportsStreaming: true, + supportsReasoning: false, + supportsStopSequence: true, + supportsTemperature: true, + }, + limits: { + maxInputTokens: 131_000, + maxOutputTokens: 32_768, + }, + cost: { + inputCostPer1mTokens: 0.11, + outputCostPer1mTokens: 0.34, + }, + tags: ['preview', 'general-purpose', 'vision'], + releaseDate: '2025-04-14', + lifecycle: 'production', + }, + { + id: 'kimi-k2-instruct-0905', + displayName: 'Kimi K2 Instruct 0905 (Preview)', + internalModelId: 'moonshotai/kimi-k2-instruct-0905', + description: + 'Kimi K2 Instruct 0905 is an updated version of the Kimi K2 model from Moonshot AI, optimized for agentic and reasoning tasks with strong coding capabilities.', + capabilities: { + supportsText: true, + supportsImages: false, + supportsSystemMessages: true, + supportsTools: true, + supportsJsonMode: true, + supportsStreaming: true, + supportsReasoning: true, + supportsStopSequence: true, + supportsTemperature: true, + }, + limits: { + maxInputTokens: 131_000, + maxOutputTokens: 32_768, + }, + cost: { + inputCostPer1mTokens: 1, + outputCostPer1mTokens: 3, + }, + tags: ['preview', 'general-purpose', 'reasoning', 'coding'], + releaseDate: '2025-09-05', + lifecycle: 'production', + }, + { + id: 'qwen3-32b', + displayName: 'Qwen3 32B (Preview)', + internalModelId: 'qwen/qwen3-32b', + description: + 'Qwen3 32B is a world-class reasoning model from Alibaba with strong performance on code generation, tool calling, and advanced reasoning tasks.', + capabilities: { + supportsText: true, + supportsImages: false, + supportsSystemMessages: true, + supportsTools: true, + supportsJsonMode: true, + supportsStreaming: true, + supportsReasoning: true, + supportsStopSequence: true, + supportsTemperature: true, + }, + limits: { + maxInputTokens: 131_000, + maxOutputTokens: 32_768, + }, + cost: { + inputCostPer1mTokens: 0.29, + outputCostPer1mTokens: 0.59, + }, + tags: ['preview', 'general-purpose', 'reasoning', 'coding'], + releaseDate: '2025-05-01', + lifecycle: 'production', + }, { id: 'gpt-oss-20b', displayName: 'GPT-OSS 20B (Preview)', @@ -25,11 +112,11 @@ export const GROQ_CONFIG: ProviderConfig = { }, limits: { maxInputTokens: 131_000, - maxOutputTokens: 32_000, + maxOutputTokens: 65_536, }, cost: { - inputCostPer1mTokens: 0.1, - outputCostPer1mTokens: 0.5, + inputCostPer1mTokens: 0.075, + outputCostPer1mTokens: 0.3, }, tags: ['preview', 'general-purpose', 'reasoning', 'low-cost'], releaseDate: '2024-12-01', @@ -54,11 +141,11 @@ export const GROQ_CONFIG: ProviderConfig = { }, limits: { maxInputTokens: 131_000, - maxOutputTokens: 32_000, + maxOutputTokens: 65_536, }, cost: { inputCostPer1mTokens: 0.15, - outputCostPer1mTokens: 0.75, + outputCostPer1mTokens: 0.6, }, tags: ['preview', 'general-purpose', 'reasoning'], releaseDate: '2024-12-01', @@ -111,7 +198,7 @@ export const GROQ_CONFIG: ProviderConfig = { supportsTemperature: true, }, limits: { - maxInputTokens: 128_000, + maxInputTokens: 131_000, maxOutputTokens: 32_768, }, cost: { @@ -263,8 +350,8 @@ export const GROQ_CONFIG: ProviderConfig = { supportsTemperature: true, }, limits: { - maxInputTokens: 128_000, - maxOutputTokens: 8192, + maxInputTokens: 131_000, + maxOutputTokens: 131_072, }, cost: { inputCostPer1mTokens: 0.05, diff --git a/packages/cognitive/src/features/providers/openrouter/openrouter.config.ts b/packages/cognitive/src/features/providers/openrouter/openrouter.config.ts index 1bb7e9a..6452821 100644 --- a/packages/cognitive/src/features/providers/openrouter/openrouter.config.ts +++ b/packages/cognitive/src/features/providers/openrouter/openrouter.config.ts @@ -25,14 +25,42 @@ export const OPENROUTER_CONFIG: ProviderConfig = { }, limits: { maxInputTokens: 131_000, - maxOutputTokens: 32_000, + maxOutputTokens: 65_536, }, cost: { inputCostPer1mTokens: 0.15, - outputCostPer1mTokens: 0.75, + outputCostPer1mTokens: 0.6, }, tags: ['preview', 'general-purpose', 'reasoning'], lifecycle: 'production', }, + { + id: 'gpt-oss-20b', + displayName: 'GPT-OSS 20B (Preview)', + provider: 'openrouter', + description: + 'gpt-oss-20b is a compact, open-weight language model optimized for low-latency. It shares the same training foundation and capabilities as the GPT-OSS 120B model, with faster responses and lower cost.', + capabilities: { + supportsText: true, + supportsImages: false, + supportsTools: true, + supportsJsonMode: true, + supportsSystemMessages: true, + supportsStreaming: true, + supportsReasoning: true, + supportsStopSequence: false, + supportsTemperature: true, + }, + limits: { + maxInputTokens: 131_000, + maxOutputTokens: 65_536, + }, + cost: { + inputCostPer1mTokens: 0.075, + outputCostPer1mTokens: 0.3, + }, + tags: ['preview', 'general-purpose', 'reasoning', 'low-cost'], + lifecycle: 'production', + }, ], } diff --git a/packages/cognitive/src/features/providers/xai/xai.config.ts b/packages/cognitive/src/features/providers/xai/xai.config.ts index 20c3161..0103200 100644 --- a/packages/cognitive/src/features/providers/xai/xai.config.ts +++ b/packages/cognitive/src/features/providers/xai/xai.config.ts @@ -4,8 +4,138 @@ export const XAI_CONFIG: ProviderConfig = { id: 'xai', name: 'xAI', description: 'xAI Grok models', - defaultModel: 'grok-4-fast-non-reasoning', + defaultModel: 'grok-4.20-0309-non-reasoning', models: [ + { + id: 'grok-4.20-0309-reasoning', + displayName: 'Grok 4.20 (Reasoning)', + description: "xAI's flagship Grok 4.20 model with advanced reasoning capabilities and 2M token context window.", + capabilities: { + supportsText: true, + supportsImages: true, + supportsTools: true, + supportsJsonMode: true, + supportsSystemMessages: true, + supportsStreaming: true, + supportsReasoning: true, + supportsStopSequence: false, + supportsTemperature: true, + }, + limits: { + maxInputTokens: 2_000_000, + maxOutputTokens: 128_000, + }, + cost: { + inputCostPer1mTokens: 2, + outputCostPer1mTokens: 6, + }, + tags: ['recommended', 'reasoning', 'general-purpose', 'vision'], + lifecycle: 'production', + }, + { + id: 'grok-4.20-0309-non-reasoning', + displayName: 'Grok 4.20 (Non-Reasoning)', + description: "xAI's flagship Grok 4.20 model optimized for fast general-purpose tasks with 2M token context window.", + capabilities: { + supportsText: true, + supportsImages: true, + supportsTools: true, + supportsJsonMode: true, + supportsSystemMessages: true, + supportsStreaming: true, + supportsReasoning: false, + supportsStopSequence: true, + supportsTemperature: true, + }, + limits: { + maxInputTokens: 2_000_000, + maxOutputTokens: 128_000, + }, + cost: { + inputCostPer1mTokens: 2, + outputCostPer1mTokens: 6, + }, + tags: ['recommended', 'general-purpose', 'vision'], + lifecycle: 'production', + }, + { + id: 'grok-4.20-multi-agent-0309', + displayName: 'Grok 4.20 Multi-Agent', + description: "xAI's Grok 4.20 model optimized for multi-agent workflows with 2M token context window.", + capabilities: { + supportsText: true, + supportsImages: true, + supportsTools: true, + supportsJsonMode: true, + supportsSystemMessages: true, + supportsStreaming: true, + supportsReasoning: true, + supportsStopSequence: false, + supportsTemperature: true, + }, + limits: { + maxInputTokens: 2_000_000, + maxOutputTokens: 128_000, + }, + cost: { + inputCostPer1mTokens: 2, + outputCostPer1mTokens: 6, + }, + tags: ['agents', 'reasoning', 'general-purpose', 'vision'], + lifecycle: 'production', + }, + { + id: 'grok-4-1-fast-reasoning', + displayName: 'Grok 4.1 Fast (Reasoning)', + description: 'Fast Grok 4.1 model with reasoning capabilities and 2M token context window.', + capabilities: { + supportsText: true, + supportsImages: true, + supportsTools: true, + supportsJsonMode: true, + supportsSystemMessages: true, + supportsStreaming: true, + supportsReasoning: true, + supportsStopSequence: false, + supportsTemperature: true, + }, + limits: { + maxInputTokens: 2_000_000, + maxOutputTokens: 128_000, + }, + cost: { + inputCostPer1mTokens: 0.2, + outputCostPer1mTokens: 0.5, + }, + tags: ['reasoning', 'general-purpose', 'low-cost'], + lifecycle: 'production', + }, + { + id: 'grok-4-1-fast-non-reasoning', + displayName: 'Grok 4.1 Fast (Non-Reasoning)', + description: 'Fast, cost-effective Grok 4.1 model for non-reasoning tasks with 2M token context window.', + capabilities: { + supportsText: true, + supportsImages: true, + supportsTools: true, + supportsJsonMode: true, + supportsSystemMessages: true, + supportsStreaming: true, + supportsReasoning: false, + supportsStopSequence: true, + supportsTemperature: true, + }, + limits: { + maxInputTokens: 2_000_000, + maxOutputTokens: 128_000, + }, + cost: { + inputCostPer1mTokens: 0.2, + outputCostPer1mTokens: 0.5, + }, + tags: ['low-cost', 'general-purpose'], + lifecycle: 'production', + }, { id: 'grok-code-fast-1', displayName: 'Grok Code Fast 1',