Skip to content

Commit f681dfe

Browse files
committed
Merge PR #252: fix: batch fixes for issues #2363-#2379
2 parents 746ca40 + a5db724 commit f681dfe

9 files changed

Lines changed: 657 additions & 17 deletions

File tree

cortex-cli/src/debug_cmd.rs

Lines changed: 71 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -149,13 +149,9 @@ async fn run_config(args: ConfigArgs) -> Result<()> {
149149
];
150150
for var in cortex_vars {
151151
if let Ok(val) = std::env::var(var) {
152-
// Mask API keys
153-
let display_val = if var.contains("API_KEY") || var.contains("SECRET") {
154-
if val.len() > 8 {
155-
format!("{}...{}", &val[..4], &val[val.len() - 4..])
156-
} else {
157-
"***".to_string()
158-
}
152+
// Mask sensitive values (API keys, secrets, tokens, passwords, credentials)
153+
let display_val = if is_sensitive_var_name(var) {
154+
redact_sensitive_value(&val)
159155
} else {
160156
val
161157
};
@@ -2067,6 +2063,40 @@ impl DebugCli {
20672063
}
20682064
}
20692065

2066+
/// Patterns that indicate a variable contains sensitive data.
2067+
const SENSITIVE_PATTERNS: &[&str] = &[
2068+
"API_KEY",
2069+
"SECRET",
2070+
"TOKEN",
2071+
"PASSWORD",
2072+
"CREDENTIAL",
2073+
"PRIVATE",
2074+
"AUTH",
2075+
"ACCESS_KEY",
2076+
"BEARER",
2077+
"SESSION",
2078+
];
2079+
2080+
/// Check if an environment variable name indicates sensitive data.
2081+
fn is_sensitive_var_name(name: &str) -> bool {
2082+
let name_upper = name.to_uppercase();
2083+
SENSITIVE_PATTERNS
2084+
.iter()
2085+
.any(|pattern| name_upper.contains(pattern))
2086+
}
2087+
2088+
/// Redact a sensitive value, showing only first and last few characters.
2089+
fn redact_sensitive_value(value: &str) -> String {
2090+
if value.is_empty() {
2091+
return "[EMPTY]".to_string();
2092+
}
2093+
if value.len() <= 8 {
2094+
return "[REDACTED]".to_string();
2095+
}
2096+
// Show first 4 and last 4 characters
2097+
format!("{}...{}", &value[..4], &value[value.len() - 4..])
2098+
}
2099+
20702100
#[cfg(test)]
20712101
mod tests {
20722102
use super::*;
@@ -2086,4 +2116,38 @@ mod tests {
20862116
assert_eq!(format_size(1048576), "1.00 MB");
20872117
assert_eq!(format_size(1073741824), "1.00 GB");
20882118
}
2119+
2120+
#[test]
2121+
fn test_is_sensitive_var_name() {
2122+
// Should match sensitive patterns
2123+
assert!(is_sensitive_var_name("OPENAI_API_KEY"));
2124+
assert!(is_sensitive_var_name("DATABASE_PASSWORD"));
2125+
assert!(is_sensitive_var_name("AWS_SECRET_ACCESS_KEY"));
2126+
assert!(is_sensitive_var_name("AUTH_TOKEN"));
2127+
assert!(is_sensitive_var_name("GITHUB_TOKEN"));
2128+
assert!(is_sensitive_var_name("PRIVATE_KEY"));
2129+
assert!(is_sensitive_var_name("CREDENTIAL_FILE"));
2130+
assert!(is_sensitive_var_name("BEARER_TOKEN"));
2131+
2132+
// Should not match non-sensitive patterns
2133+
assert!(!is_sensitive_var_name("PATH"));
2134+
assert!(!is_sensitive_var_name("HOME"));
2135+
assert!(!is_sensitive_var_name("USER"));
2136+
assert!(!is_sensitive_var_name("EDITOR"));
2137+
assert!(!is_sensitive_var_name("SHELL"));
2138+
}
2139+
2140+
#[test]
2141+
fn test_redact_sensitive_value() {
2142+
// Empty value
2143+
assert_eq!(redact_sensitive_value(""), "[EMPTY]");
2144+
2145+
// Short value (8 or fewer chars)
2146+
assert_eq!(redact_sensitive_value("short"), "[REDACTED]");
2147+
assert_eq!(redact_sensitive_value("12345678"), "[REDACTED]");
2148+
2149+
// Longer value shows first/last 4 chars
2150+
assert_eq!(redact_sensitive_value("sk-abc123xyz789"), "sk-a...9789");
2151+
assert_eq!(redact_sensitive_value("supersecretpassword"), "supe...word");
2152+
}
20892153
}

cortex-cli/src/run_cmd.rs

Lines changed: 113 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -152,6 +152,11 @@ pub struct RunCli {
152152
/// Timeout in seconds (0 for no timeout).
153153
#[arg(long = "timeout", default_value_t = 0)]
154154
pub timeout: u64,
155+
156+
/// Preview what would be sent without executing.
157+
/// Shows estimated token counts including system prompt and tool definitions.
158+
#[arg(long = "dry-run")]
159+
pub dry_run: bool,
155160
}
156161

157162
/// Tool display information for formatted output.
@@ -467,6 +472,11 @@ impl RunCli {
467472
attachments: &[FileAttachment],
468473
session_mode: SessionMode,
469474
) -> Result<()> {
475+
// Handle dry-run mode - show token estimates without executing
476+
if self.dry_run {
477+
return self.run_dry_run(message, attachments).await;
478+
}
479+
470480
let is_json = matches!(self.format, OutputFormat::Json | OutputFormat::Jsonl);
471481
let is_terminal = io::stdout().is_terminal();
472482

@@ -828,6 +838,109 @@ impl RunCli {
828838

829839
Ok(())
830840
}
841+
842+
/// Run in dry-run mode - show token estimates without executing.
843+
async fn run_dry_run(&self, message: &str, attachments: &[FileAttachment]) -> Result<()> {
844+
use cortex_engine::tokenizer::{TokenCounter, TokenizerType};
845+
846+
let config = cortex_engine::Config::default();
847+
let model = self
848+
.model
849+
.as_ref()
850+
.map(|m| resolve_model_alias(m).to_string())
851+
.unwrap_or_else(|| config.model.clone());
852+
853+
let mut counter = TokenCounter::for_model(&model);
854+
855+
// Count user prompt tokens
856+
let user_prompt_tokens = counter.count(message);
857+
858+
// Count attachment tokens
859+
let mut attachment_tokens = 0u32;
860+
for attachment in attachments {
861+
let content =
862+
std::fs::read_to_string(&attachment.path).unwrap_or_else(|_| String::new());
863+
attachment_tokens += counter.count(&content);
864+
// Add overhead for file markers
865+
attachment_tokens += 20; // Approximate overhead for "--- File: ... ---" markers
866+
}
867+
868+
// Estimate system prompt tokens (typical system prompt is ~500-2000 tokens)
869+
// This is an approximation as the actual system prompt varies
870+
let system_prompt_tokens = 1500u32;
871+
872+
// Estimate tool definition tokens
873+
// Each tool definition is approximately 100-200 tokens on average
874+
// Common tools: Execute, Read, Write, Edit, LS, Grep, Glob, etc.
875+
let tool_count = 15; // Approximate number of default tools
876+
let tool_tokens = tool_count * 150; // ~150 tokens per tool definition
877+
878+
// Calculate totals
879+
let total_input_tokens =
880+
user_prompt_tokens + attachment_tokens + system_prompt_tokens + tool_tokens;
881+
882+
// Output based on format
883+
if matches!(self.format, OutputFormat::Json | OutputFormat::Jsonl) {
884+
let output = serde_json::json!({
885+
"dry_run": true,
886+
"model": model,
887+
"token_estimates": {
888+
"user_prompt": user_prompt_tokens,
889+
"attachments": attachment_tokens,
890+
"system_prompt": system_prompt_tokens,
891+
"tool_definitions": tool_tokens,
892+
"total_input": total_input_tokens,
893+
},
894+
"message_preview": if message.len() > 100 {
895+
format!("{}...", &message[..100])
896+
} else {
897+
message.to_string()
898+
},
899+
"attachment_count": attachments.len(),
900+
});
901+
println!("{}", serde_json::to_string_pretty(&output)?);
902+
} else {
903+
println!("Dry Run - Token Estimate");
904+
println!("{}", "=".repeat(50));
905+
println!();
906+
println!("Model: {}", model);
907+
println!();
908+
println!("Token Breakdown:");
909+
println!(" User prompt: {:>8} tokens", user_prompt_tokens);
910+
if !attachments.is_empty() {
911+
println!(
912+
" Attachments: {:>8} tokens ({} files)",
913+
attachment_tokens,
914+
attachments.len()
915+
);
916+
}
917+
println!(
918+
" System prompt: {:>8} tokens (estimated)",
919+
system_prompt_tokens
920+
);
921+
println!(
922+
" Tool definitions: {:>8} tokens (estimated, {} tools)",
923+
tool_tokens, tool_count
924+
);
925+
println!(" {}", "-".repeat(30));
926+
println!(" Total input: {:>8} tokens", total_input_tokens);
927+
println!();
928+
println!("Note: System prompt and tool definition token counts are estimates.");
929+
println!("Actual counts may vary based on agent configuration.");
930+
if !message.is_empty() {
931+
println!();
932+
println!("Message preview:");
933+
let preview = if message.len() > 200 {
934+
format!(" {}...", &message[..200])
935+
} else {
936+
format!(" {}", message)
937+
};
938+
println!("{}", preview);
939+
}
940+
}
941+
942+
Ok(())
943+
}
831944
}
832945

833946
/// Session handling mode.

cortex-cli/src/scrape_cmd.rs

Lines changed: 45 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -124,6 +124,13 @@ pub struct ScrapeCommand {
124124
#[arg(long, default_value = "highest", value_name = "RESOLUTION")]
125125
pub image_resolution: String,
126126

127+
/// Attempt to traverse shadow DOM elements.
128+
/// When enabled, will look for <template> tags with shadowrootmode attribute
129+
/// and include their content. This is a best-effort feature as shadow DOM
130+
/// content is typically only accessible via JavaScript execution.
131+
#[arg(long)]
132+
pub include_shadow_dom: bool,
133+
127134
/// Show verbose output (includes fetching info).
128135
#[arg(short, long)]
129136
pub verbose: bool,
@@ -224,7 +231,14 @@ impl ScrapeCommand {
224231

225232
/// Process HTML content based on options.
226233
fn process_html(&self, html: &str, format: OutputFormat) -> Result<String> {
227-
let document = Html::parse_document(html);
234+
// Preprocess HTML to extract shadow DOM content if requested
235+
let processed_html = if self.include_shadow_dom {
236+
extract_shadow_dom_content(html)
237+
} else {
238+
html.to_string()
239+
};
240+
241+
let document = Html::parse_document(&processed_html);
228242

229243
// If a selector is provided, extract only that content
230244
let content_html = if let Some(selector_str) = &self.selector {
@@ -269,6 +283,36 @@ impl ScrapeCommand {
269283
}
270284
}
271285

286+
/// Extract shadow DOM content from HTML.
287+
/// This is a best-effort approach that handles declarative shadow DOM (template tags
288+
/// with shadowrootmode attribute) and replaces custom elements with their shadow content.
289+
fn extract_shadow_dom_content(html: &str) -> String {
290+
let document = Html::parse_document(html);
291+
let mut result = html.to_string();
292+
293+
// Look for declarative shadow DOM templates
294+
// These are <template shadowrootmode="open"> or <template shadowroot="open"> tags
295+
if let Ok(template_selector) = Selector::parse("template") {
296+
for template in document.select(&template_selector) {
297+
// Check for shadow root attributes
298+
let has_shadow_attr = template.value().attr("shadowrootmode").is_some()
299+
|| template.value().attr("shadowroot").is_some();
300+
301+
if has_shadow_attr {
302+
// Get the inner HTML of the template
303+
let inner_html = template.inner_html();
304+
305+
// Replace the template with its content
306+
// This makes shadow DOM content visible to the scraper
307+
let template_html = template.html();
308+
result = result.replace(&template_html, &inner_html);
309+
}
310+
}
311+
}
312+
313+
result
314+
}
315+
272316
/// Parse custom headers from command line arguments.
273317
fn parse_headers(headers: &[String]) -> Result<HashMap<String, String>> {
274318
let mut result = HashMap::new();

cortex-cli/src/stats_cmd.rs

Lines changed: 52 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -104,11 +104,44 @@ pub struct DateRange {
104104
}
105105

106106
/// Pricing information per 1M tokens.
107+
#[derive(Debug, Clone)]
107108
struct ModelPricing {
108109
input_per_million: f64,
109110
output_per_million: f64,
110111
}
111112

113+
/// Custom pricing configuration loaded from config file or environment.
114+
/// This allows users to override default pricing when provider prices change.
115+
fn load_custom_pricing() -> std::collections::HashMap<String, ModelPricing> {
116+
let mut custom = std::collections::HashMap::new();
117+
118+
// Try to load from environment variables in format:
119+
// CORTEX_PRICING_<MODEL>=<input_price>,<output_price>
120+
// Example: CORTEX_PRICING_GPT4O=2.5,10.0
121+
for (key, value) in std::env::vars() {
122+
if let Some(model_suffix) = key.strip_prefix("CORTEX_PRICING_") {
123+
let model_name = model_suffix.to_lowercase().replace('_', "-");
124+
let parts: Vec<&str> = value.split(',').collect();
125+
if parts.len() == 2 {
126+
if let (Ok(input), Ok(output)) = (
127+
parts[0].trim().parse::<f64>(),
128+
parts[1].trim().parse::<f64>(),
129+
) {
130+
custom.insert(
131+
model_name,
132+
ModelPricing {
133+
input_per_million: input,
134+
output_per_million: output,
135+
},
136+
);
137+
}
138+
}
139+
}
140+
}
141+
142+
custom
143+
}
144+
112145
impl StatsCli {
113146
/// Run the stats command.
114147
pub async fn run(self) -> Result<()> {
@@ -165,8 +198,26 @@ fn get_cortex_home() -> PathBuf {
165198
}
166199

167200
/// Get pricing for a model.
201+
/// Checks custom pricing from environment first, then falls back to defaults.
168202
fn get_model_pricing(model: &str) -> ModelPricing {
169-
// Pricing per 1M tokens (approximate as of late 2024)
203+
// First check for custom pricing from environment
204+
let custom_pricing = load_custom_pricing();
205+
let model_lower = model.to_lowercase();
206+
207+
// Check for exact match in custom pricing
208+
if let Some(pricing) = custom_pricing.get(&model_lower) {
209+
return pricing.clone();
210+
}
211+
212+
// Check for partial match in custom pricing (e.g., "gpt-4o" matches "gpt-4o-mini")
213+
for (key, pricing) in &custom_pricing {
214+
if model_lower.contains(key) {
215+
return pricing.clone();
216+
}
217+
}
218+
219+
// Fall back to default pricing (may be outdated - users can override via CORTEX_PRICING_*)
220+
// Pricing per 1M tokens (as of late 2024/early 2025 - may change)
170221
match model {
171222
// Anthropic
172223
m if m.contains("claude-opus-4") || m.contains("opus-4") => ModelPricing {

cortex-engine/src/client/types.rs

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -15,6 +15,11 @@ pub struct CompletionRequest {
1515
/// Temperature for sampling.
1616
#[serde(skip_serializing_if = "Option::is_none")]
1717
pub temperature: Option<f32>,
18+
/// Random seed for reproducibility.
19+
/// When set, the same seed with identical inputs should produce deterministic outputs.
20+
/// Note: This is applied to all model calls including tool invocations.
21+
#[serde(skip_serializing_if = "Option::is_none")]
22+
pub seed: Option<u64>,
1823
/// Tools available for the model.
1924
#[serde(skip_serializing_if = "Vec::is_empty")]
2025
pub tools: Vec<ToolDefinition>,
@@ -30,6 +35,7 @@ impl Default for CompletionRequest {
3035
model: String::new(),
3136
max_tokens: None,
3237
temperature: None,
38+
seed: None,
3339
tools: vec![],
3440
stream: true,
3541
}

0 commit comments

Comments
 (0)