CortexLM
diff --git a/‎Cargo.lock‎
Lines changed: 5 additions & 0 deletions b/‎Cargo.lock‎
Lines changed: 5 additions & 0 deletions
diff --git a/‎cortex-agents/src/custom/loader.rs‎
Lines changed: 57 additions & 1 deletion b/‎cortex-agents/src/custom/loader.rs‎
Lines changed: 57 additions & 1 deletion
diff --git a/‎cortex-app-server/src/config.rs‎
Lines changed: 17 additions & 1 deletion b/‎cortex-app-server/src/config.rs‎
Lines changed: 17 additions & 1 deletion
diff --git a/‎cortex-app-server/src/error.rs‎
Lines changed: 16 additions & 1 deletion b/‎cortex-app-server/src/error.rs‎
Lines changed: 16 additions & 1 deletion
diff --git a/‎cortex-app-server/src/main.rs‎
Lines changed: 21 additions & 3 deletions b/‎cortex-app-server/src/main.rs‎
Lines changed: 21 additions & 3 deletions
diff --git a/‎cortex-app-server/src/middleware.rs‎
Lines changed: 41 additions & 12 deletions b/‎cortex-app-server/src/middleware.rs‎
Lines changed: 41 additions & 12 deletions
diff --git a/‎cortex-cli/Cargo.toml‎
Lines changed: 11 additions & 1 deletion b/‎cortex-cli/Cargo.toml‎
Lines changed: 11 additions & 1 deletion
diff --git a/‎cortex-cli/src/acp_cmd.rs‎
Lines changed: 21 additions & 0 deletions b/‎cortex-cli/src/acp_cmd.rs‎
Lines changed: 21 additions & 0 deletions
@@ -180,6 +180,9 @@ impl CustomAgentLoader {
 }
 
 /// Parse YAML frontmatter from content.
+///
+/// Supports YAML anchors (`&name`), aliases (`*name`), and merge keys (`<<: *name`)
+/// which are resolved before returning the parsed value (#2199).
 fn parse_frontmatter(content: &str) -> Result<(serde_yaml::Value, String), CustomAgentError> {
     let content = content.trim();
 
@@ -197,9 +200,62 @@ fn parse_frontmatter(content: &str) -> Result<(serde_yaml::Value, String), Custo
     let yaml_str = &content[3..end + 3];
     let body = content[end + 7..].trim();
 
+    // Parse YAML with anchor/alias support
     let frontmatter: serde_yaml::Value = serde_yaml::from_str(yaml_str)?;
 
-    Ok((frontmatter, body.to_string()))
+    // Resolve merge keys (`<<: *alias`) in the parsed YAML (#2199)
+    let resolved = resolve_yaml_merge_keys(frontmatter);
+
+    Ok((resolved, body.to_string()))
+}
+
+/// Recursively resolve YAML merge keys (`<<: *alias`) in a parsed YAML value.
+/// This ensures that anchor-referenced values are properly merged into the parent mapping (#2199).
+fn resolve_yaml_merge_keys(value: serde_yaml::Value) -> serde_yaml::Value {
+    match value {
+        serde_yaml::Value::Mapping(mut map) => {
+            // Check for merge key (`<<`)
+            let merge_key = serde_yaml::Value::String("<<".to_string());
+
+            if let Some(merge_value) = map.remove(&merge_key) {
+                // The merge value can be a single mapping or a sequence of mappings
+                let merged_values: Vec<serde_yaml::Value> = match merge_value {
+                    serde_yaml::Value::Sequence(seq) => seq,
+                    other => vec![other],
+                };
+
+                // Create a new mapping with merged values first, then overwrite with local values
+                let mut result = serde_yaml::Mapping::new();
+
+                // Apply merged values (in order, later ones take precedence)
+                for merge_source in merged_values {
+                    if let serde_yaml::Value::Mapping(source_map) = merge_source {
+                        for (k, v) in source_map {
+                            result.insert(k, v);
+                        }
+                    }
+                }
+
+                // Apply local values (these take precedence over merged values)
+                for (k, v) in map {
+                    result.insert(k, resolve_yaml_merge_keys(v));
+                }
+
+                serde_yaml::Value::Mapping(result)
+            } else {
+                // No merge key, just recursively process children
+                let resolved: serde_yaml::Mapping = map
+                    .into_iter()
+                    .map(|(k, v)| (k, resolve_yaml_merge_keys(v)))
+                    .collect();
+                serde_yaml::Value::Mapping(resolved)
+            }
+        }
+        serde_yaml::Value::Sequence(seq) => {
+            serde_yaml::Value::Sequence(seq.into_iter().map(resolve_yaml_merge_keys).collect())
+        }
+        other => other,
+    }
 }
 
 /// Synchronous version of the loader.
 
@@ -48,10 +48,16 @@ pub struct ServerConfig {
     #[serde(default = "default_max_body_size")]
     pub max_body_size: usize,
 
-    /// Request timeout in seconds.
+    /// Request timeout in seconds (applies to full request lifecycle).
     #[serde(default = "default_request_timeout")]
     pub request_timeout: u64,
 
+    /// Read timeout for individual chunks in seconds.
+    /// Applies to chunked transfer encoding to prevent indefinite hangs
+    /// when clients disconnect without sending the terminal chunk.
+    #[serde(default = "default_read_timeout")]
+    pub read_timeout: u64,
+
     /// Enable metrics endpoint.
     #[serde(default = "default_true")]
     pub metrics_enabled: bool,
@@ -77,6 +83,10 @@ fn default_request_timeout() -> u64 {
     300 // 5 minutes
 }
 
+fn default_read_timeout() -> u64 {
+    30 // 30 seconds for individual chunk reads
+}
+
 fn default_true() -> bool {
     true
 }
@@ -95,6 +105,7 @@ impl Default for ServerConfig {
             mdns: MdnsConfig::default(),
             max_body_size: default_max_body_size(),
             request_timeout: default_request_timeout(),
+            read_timeout: default_read_timeout(),
             metrics_enabled: true,
             health_enabled: true,
             cors_origins: vec![],
@@ -150,6 +161,11 @@ impl ServerConfig {
     pub fn request_timeout_duration(&self) -> Duration {
         Duration::from_secs(self.request_timeout)
     }
+
+    /// Get read timeout as Duration (for chunked transfers).
+    pub fn read_timeout_duration(&self) -> Duration {
+        Duration::from_secs(self.read_timeout)
+    }
 }
 
 /// TLS configuration.
 
@@ -138,7 +138,11 @@ pub struct ErrorDetail {
 
 impl IntoResponse for AppError {
     fn into_response(self) -> Response {
+        use axum::http::header;
+
         let status = self.status_code();
+        let is_rate_limited = matches!(self, AppError::RateLimitExceeded);
+
         let body = ErrorResponse {
             error: ErrorDetail {
                 code: self.error_code().to_string(),
@@ -148,7 +152,18 @@ impl IntoResponse for AppError {
             },
         };
 
-        (status, Json(body)).into_response()
+        let mut response = (status, Json(body)).into_response();
+
+        // Add Retry-After header for rate limit responses (429)
+        // This helps clients implement proper backoff strategies
+        if is_rate_limited {
+            response.headers_mut().insert(
+                header::RETRY_AFTER,
+                "60".parse().unwrap(), // Suggest retry after 60 seconds
+            );
+        }
+
+        response
     }
 }
 
 
@@ -93,10 +93,13 @@ async fn main() -> ExitCode {
     };
 
     info!("Starting Cortex server on {}", config.listen_addr);
+    info!("Graceful shutdown timeout: {}s", config.shutdown_timeout);
     info!("Press Ctrl+C to stop");
 
+    let shutdown_timeout = config.shutdown_timeout;
+
     // Create shutdown signal
-    let shutdown = async {
+    let shutdown = async move {
         let ctrl_c = async {
             signal::ctrl_c()
                 .await
@@ -116,12 +119,27 @@ async fn main() -> ExitCode {
 
         tokio::select! {
             _ = ctrl_c => {
-                info!("Received Ctrl+C, shutting down...");
+                info!("Received Ctrl+C, initiating graceful shutdown (timeout: {}s)...", shutdown_timeout);
             }
             _ = terminate => {
-                info!("Received SIGTERM, shutting down...");
+                info!("Received SIGTERM, initiating graceful shutdown (timeout: {}s)...", shutdown_timeout);
             }
         }
+
+        // Give in-flight requests time to complete
+        // The axum graceful shutdown will handle waiting for connections
+        info!(
+            "Waiting up to {}s for in-flight requests to complete...",
+            shutdown_timeout
+        );
+
+        // Kill all background terminals on shutdown
+        info!("Killing background terminals...");
+        use cortex_engine::tools::handlers::get_terminal_manager;
+        let manager = get_terminal_manager();
+        let manager = manager.read().await;
+        manager.kill_all_terminals().await;
+        info!("Background terminals killed");
     };
 
     if let Err(e) = run_with_shutdown(config, shutdown).await {
 
@@ -150,14 +150,25 @@ fn get_rate_limit_key(request: &Request, state: &AppState) -> String {
     }
 
     // Fall back to IP address
-    if let Some(forwarded) = request.headers().get("X-Forwarded-For")
-        && let Ok(s) = forwarded.to_str()
-        && let Some(ip) = s.split(',').next()
-    {
-        return format!("ip:{}", ip.trim());
+    // When trust_proxy is enabled, check proxy headers for real client IP
+    if state.config.rate_limit.trust_proxy {
+        // Try X-Real-IP first (single IP from proxy)
+        if let Some(real_ip) = request.headers().get("X-Real-IP")
+            && let Ok(ip) = real_ip.to_str()
+        {
+            return format!("ip:{}", ip.trim());
+        }
+
+        // Then try X-Forwarded-For (may contain multiple IPs, take the first)
+        if let Some(forwarded) = request.headers().get("X-Forwarded-For")
+            && let Ok(s) = forwarded.to_str()
+            && let Some(ip) = s.split(',').next()
+        {
+            return format!("ip:{}", ip.trim());
+        }
     }
 
-    // Default to unknown
+    // Default to unknown when not behind proxy or headers not present
     "ip:unknown".to_string()
 }
 
@@ -254,17 +265,27 @@ pub async fn body_limit_middleware(
 }
 
 /// CORS configuration.
+/// Includes Access-Control-Max-Age header to allow browsers to cache
+/// preflight responses, reducing the number of OPTIONS requests.
 pub fn cors_layer(origins: &[String]) -> tower_http::cors::CorsLayer {
-    use tower_http::cors::CorsLayer;
+    use tower_http::cors::{Any, CorsLayer};
+
+    // Default max age for preflight cache: 24 hours (86400 seconds)
+    // This reduces the number of OPTIONS preflight requests from browsers
+    let max_age = std::time::Duration::from_secs(86400);
 
     if origins.is_empty() {
-        CorsLayer::permissive()
+        CorsLayer::permissive().max_age(max_age)
     } else {
         let origins: Vec<HeaderValue> = origins
             .iter()
             .filter_map(|o| HeaderValue::from_str(o).ok())
             .collect();
-        CorsLayer::new().allow_origin(origins)
+        CorsLayer::new()
+            .allow_origin(origins)
+            .allow_methods(Any)
+            .allow_headers(Any)
+            .max_age(max_age)
     }
 }
 
@@ -377,12 +398,20 @@ impl RequestContext {
             .map(|r| r.0.clone())
             .unwrap_or_else(|| Uuid::new_v4().to_string());
 
+        // Try to get client IP from proxy headers (X-Real-IP first, then X-Forwarded-For)
         let client_ip = request
             .headers()
-            .get("X-Forwarded-For")
+            .get("X-Real-IP")
             .and_then(|v| v.to_str().ok())
-            .and_then(|s| s.split(',').next())
-            .map(|s| s.trim().to_string());
+            .map(|s| s.trim().to_string())
+            .or_else(|| {
+                request
+                    .headers()
+                    .get("X-Forwarded-For")
+                    .and_then(|v| v.to_str().ok())
+                    .and_then(|s| s.split(',').next())
+                    .map(|s| s.trim().to_string())
+            });
 
         let user_agent = request
             .headers()
 
@@ -29,6 +29,7 @@ cortex-protocol = { workspace = true }
 cortex-tui = { workspace = true, optional = true }
 cortex-exec = { workspace = true }
 cortex-common = { workspace = true, features = ["cli"] }
+cortex-commands = { workspace = true }
 cortex-login = { workspace = true }
 cortex-process-hardening = { workspace = true }
 cortex-app-server = { workspace = true }
@@ -52,6 +53,7 @@ serde_json = { workspace = true }
 chrono = { workspace = true }
 uuid = { workspace = true }
 toml = { workspace = true }
+toml_edit = { workspace = true }
 serde_yaml = "0.9"
 serde = { workspace = true }
 ctor = "0.5"
@@ -65,8 +67,9 @@ zip = { workspace = true }
 flate2 = "1.0"
 tar = "0.4"
 
-# For scrape command (HTML parsing)
+# For scrape command (HTML parsing and URL handling)
 scraper = "0.22"
+url = "2.5"
 
 # For agent reference extraction from messages
 regex = { workspace = true }
@@ -79,3 +82,10 @@ which = { workspace = true }
 
 # For Ctrl+C handling and terminal cleanup
 ctrlc = { version = "3.4", features = ["termination"] }
+
+# For file descriptor limit checking
+libc = { workspace = true }
+
+# For SIGTERM signal handling on Unix (graceful container shutdown)
+[target.'cfg(unix)'.dependencies]
+signal-hook = "0.3"
@@ -41,6 +41,16 @@ pub struct AcpCli {
     /// Agent to use.
     #[arg(long = "agent")]
     pub agent: Option<String>,
+
+    /// Tools to allow (whitelist). Can be specified multiple times.
+    /// Only these tools will be available to the agent.
+    #[arg(long = "allow-tool", action = clap::ArgAction::Append)]
+    pub allow_tools: Vec<String>,
+
+    /// Tools to deny (blacklist). Can be specified multiple times.
+    /// These tools will be blocked from use.
+    #[arg(long = "deny-tool", action = clap::ArgAction::Append)]
+    pub deny_tools: Vec<String>,
 }
 
 impl AcpCli {
@@ -58,6 +68,17 @@ impl AcpCli {
             config.model = resolve_model_alias(model).to_string();
         }
 
+        // Report tool restrictions (will be applied when server initializes session)
+        if !self.allow_tools.is_empty() {
+            eprintln!("Tool whitelist: {:?}", self.allow_tools);
+            // Note: Tool restrictions are passed via server configuration
+        }
+
+        if !self.deny_tools.is_empty() {
+            eprintln!("Tool blacklist: {:?}", self.deny_tools);
+            // Note: Tool restrictions are passed via server configuration
+        }
+
         // Decide transport mode
         if self.stdio || self.port == 0 {
             // Use stdio transport