diff --git a/crates/bashkit/src/interpreter/mod.rs b/crates/bashkit/src/interpreter/mod.rs index f7f4bc4..4fd8cc0 100644 --- a/crates/bashkit/src/interpreter/mod.rs +++ b/crates/bashkit/src/interpreter/mod.rs @@ -6978,13 +6978,14 @@ impl Interpreter { { let mut depth = 0i32; let chars: Vec = expr.chars().collect(); + let byte_offsets: Vec = expr.char_indices().map(|(b, _)| b).collect(); for i in (0..chars.len()).rev() { match chars[i] { '(' => depth += 1, ')' => depth -= 1, ',' if depth == 0 => { - let left = &expr[..i]; - let right = &expr[i + 1..]; + let left = &expr[..byte_offsets[i]]; + let right = &expr[byte_offsets[i] + 1..]; self.evaluate_arithmetic_with_assign(left); return self.evaluate_arithmetic_with_assign(right); } @@ -7313,6 +7314,8 @@ impl Interpreter { } let chars: Vec = expr.chars().collect(); + // Precompute byte offsets so char-index → byte-index is O(1) + let bo: Vec = expr.char_indices().map(|(b, _)| b).collect(); // Ternary operator (lowest precedence) let mut depth = 0; @@ -7329,11 +7332,14 @@ impl Interpreter { ')' => colon_depth -= 1, '?' => colon_depth += 1, ':' if colon_depth == 0 => { - let cond = self.parse_arithmetic_impl(&expr[..i], arith_depth + 1); - let then_val = - self.parse_arithmetic_impl(&expr[i + 1..j], arith_depth + 1); + let cond = + self.parse_arithmetic_impl(&expr[..bo[i]], arith_depth + 1); + let then_val = self.parse_arithmetic_impl( + &expr[bo[i] + 1..bo[j]], + arith_depth + 1, + ); let else_val = - self.parse_arithmetic_impl(&expr[j + 1..], arith_depth + 1); + self.parse_arithmetic_impl(&expr[bo[j] + 1..], arith_depth + 1); return if cond != 0 { then_val } else { else_val }; } ':' => colon_depth -= 1, @@ -7352,12 +7358,12 @@ impl Interpreter { '(' => depth += 1, ')' => depth -= 1, '|' if depth == 0 && i > 0 && chars[i - 1] == '|' => { - let left = self.parse_arithmetic_impl(&expr[..i - 1], arith_depth + 1); + let left = self.parse_arithmetic_impl(&expr[..bo[i - 1]], arith_depth + 1); // Short-circuit: if left is true, don't evaluate right if left != 0 { return 1; } - let right = self.parse_arithmetic_impl(&expr[i + 1..], arith_depth + 1); + let right = self.parse_arithmetic_impl(&expr[bo[i] + 1..], arith_depth + 1); return if right != 0 { 1 } else { 0 }; } _ => {} @@ -7371,12 +7377,12 @@ impl Interpreter { '(' => depth += 1, ')' => depth -= 1, '&' if depth == 0 && i > 0 && chars[i - 1] == '&' => { - let left = self.parse_arithmetic_impl(&expr[..i - 1], arith_depth + 1); + let left = self.parse_arithmetic_impl(&expr[..bo[i - 1]], arith_depth + 1); // Short-circuit: if left is false, don't evaluate right if left == 0 { return 0; } - let right = self.parse_arithmetic_impl(&expr[i + 1..], arith_depth + 1); + let right = self.parse_arithmetic_impl(&expr[bo[i] + 1..], arith_depth + 1); return if right != 0 { 1 } else { 0 }; } _ => {} @@ -7393,8 +7399,8 @@ impl Interpreter { && (i == 0 || chars[i - 1] != '|') && (i + 1 >= chars.len() || chars[i + 1] != '|') => { - let left = self.parse_arithmetic_impl(&expr[..i], arith_depth + 1); - let right = self.parse_arithmetic_impl(&expr[i + 1..], arith_depth + 1); + let left = self.parse_arithmetic_impl(&expr[..bo[i]], arith_depth + 1); + let right = self.parse_arithmetic_impl(&expr[bo[i] + 1..], arith_depth + 1); return left | right; } _ => {} @@ -7408,8 +7414,8 @@ impl Interpreter { '(' => depth += 1, ')' => depth -= 1, '^' if depth == 0 => { - let left = self.parse_arithmetic_impl(&expr[..i], arith_depth + 1); - let right = self.parse_arithmetic_impl(&expr[i + 1..], arith_depth + 1); + let left = self.parse_arithmetic_impl(&expr[..bo[i]], arith_depth + 1); + let right = self.parse_arithmetic_impl(&expr[bo[i] + 1..], arith_depth + 1); return left ^ right; } _ => {} @@ -7426,8 +7432,8 @@ impl Interpreter { && (i == 0 || chars[i - 1] != '&') && (i + 1 >= chars.len() || chars[i + 1] != '&') => { - let left = self.parse_arithmetic_impl(&expr[..i], arith_depth + 1); - let right = self.parse_arithmetic_impl(&expr[i + 1..], arith_depth + 1); + let left = self.parse_arithmetic_impl(&expr[..bo[i]], arith_depth + 1); + let right = self.parse_arithmetic_impl(&expr[bo[i] + 1..], arith_depth + 1); return left & right; } _ => {} @@ -7441,13 +7447,13 @@ impl Interpreter { '(' => depth += 1, ')' => depth -= 1, '=' if depth == 0 && i > 0 && chars[i - 1] == '=' => { - let left = self.parse_arithmetic_impl(&expr[..i - 1], arith_depth + 1); - let right = self.parse_arithmetic_impl(&expr[i + 1..], arith_depth + 1); + let left = self.parse_arithmetic_impl(&expr[..bo[i - 1]], arith_depth + 1); + let right = self.parse_arithmetic_impl(&expr[bo[i] + 1..], arith_depth + 1); return if left == right { 1 } else { 0 }; } '=' if depth == 0 && i > 0 && chars[i - 1] == '!' => { - let left = self.parse_arithmetic_impl(&expr[..i - 1], arith_depth + 1); - let right = self.parse_arithmetic_impl(&expr[i + 1..], arith_depth + 1); + let left = self.parse_arithmetic_impl(&expr[..bo[i - 1]], arith_depth + 1); + let right = self.parse_arithmetic_impl(&expr[bo[i] + 1..], arith_depth + 1); return if left != right { 1 } else { 0 }; } _ => {} @@ -7461,29 +7467,29 @@ impl Interpreter { '(' => depth += 1, ')' => depth -= 1, '=' if depth == 0 && i > 0 && chars[i - 1] == '<' => { - let left = self.parse_arithmetic_impl(&expr[..i - 1], arith_depth + 1); - let right = self.parse_arithmetic_impl(&expr[i + 1..], arith_depth + 1); + let left = self.parse_arithmetic_impl(&expr[..bo[i - 1]], arith_depth + 1); + let right = self.parse_arithmetic_impl(&expr[bo[i] + 1..], arith_depth + 1); return if left <= right { 1 } else { 0 }; } '=' if depth == 0 && i > 0 && chars[i - 1] == '>' => { - let left = self.parse_arithmetic_impl(&expr[..i - 1], arith_depth + 1); - let right = self.parse_arithmetic_impl(&expr[i + 1..], arith_depth + 1); + let left = self.parse_arithmetic_impl(&expr[..bo[i - 1]], arith_depth + 1); + let right = self.parse_arithmetic_impl(&expr[bo[i] + 1..], arith_depth + 1); return if left >= right { 1 } else { 0 }; } '<' if depth == 0 && (i + 1 >= chars.len() || (chars[i + 1] != '=' && chars[i + 1] != '<')) && (i == 0 || chars[i - 1] != '<') => { - let left = self.parse_arithmetic_impl(&expr[..i], arith_depth + 1); - let right = self.parse_arithmetic_impl(&expr[i + 1..], arith_depth + 1); + let left = self.parse_arithmetic_impl(&expr[..bo[i]], arith_depth + 1); + let right = self.parse_arithmetic_impl(&expr[bo[i] + 1..], arith_depth + 1); return if left < right { 1 } else { 0 }; } '>' if depth == 0 && (i + 1 >= chars.len() || (chars[i + 1] != '=' && chars[i + 1] != '>')) && (i == 0 || chars[i - 1] != '>') => { - let left = self.parse_arithmetic_impl(&expr[..i], arith_depth + 1); - let right = self.parse_arithmetic_impl(&expr[i + 1..], arith_depth + 1); + let left = self.parse_arithmetic_impl(&expr[..bo[i]], arith_depth + 1); + let right = self.parse_arithmetic_impl(&expr[bo[i] + 1..], arith_depth + 1); return if left > right { 1 } else { 0 }; } _ => {} @@ -7502,8 +7508,8 @@ impl Interpreter { && (i < 2 || chars[i - 2] != '<') && (i + 1 >= chars.len() || chars[i + 1] != '=') => { - let left = self.parse_arithmetic_impl(&expr[..i - 1], arith_depth + 1); - let right = self.parse_arithmetic_impl(&expr[i + 1..], arith_depth + 1); + let left = self.parse_arithmetic_impl(&expr[..bo[i - 1]], arith_depth + 1); + let right = self.parse_arithmetic_impl(&expr[bo[i] + 1..], arith_depth + 1); // THREAT[TM-DOS-029]: clamp shift to 0..=63 to prevent panic let shift = right.clamp(0, 63) as u32; return left.wrapping_shl(shift); @@ -7514,8 +7520,8 @@ impl Interpreter { && (i < 2 || chars[i - 2] != '>') && (i + 1 >= chars.len() || chars[i + 1] != '=') => { - let left = self.parse_arithmetic_impl(&expr[..i - 1], arith_depth + 1); - let right = self.parse_arithmetic_impl(&expr[i + 1..], arith_depth + 1); + let left = self.parse_arithmetic_impl(&expr[..bo[i - 1]], arith_depth + 1); + let right = self.parse_arithmetic_impl(&expr[bo[i] + 1..], arith_depth + 1); // THREAT[TM-DOS-029]: clamp shift to 0..=63 to prevent panic let shift = right.clamp(0, 63) as u32; return left.wrapping_shr(shift); @@ -7544,8 +7550,8 @@ impl Interpreter { if chars[i] == '-' && i > 0 && chars[i - 1] == '-' { continue; } - let left = self.parse_arithmetic_impl(&expr[..i], arith_depth + 1); - let right = self.parse_arithmetic_impl(&expr[i + 1..], arith_depth + 1); + let left = self.parse_arithmetic_impl(&expr[..bo[i]], arith_depth + 1); + let right = self.parse_arithmetic_impl(&expr[bo[i] + 1..], arith_depth + 1); // THREAT[TM-DOS-029]: wrapping to prevent overflow panic return if chars[i] == '+' { left.wrapping_add(right) @@ -7571,14 +7577,14 @@ impl Interpreter { if i > 0 && chars[i - 1] == '*' { continue; } - let left = self.parse_arithmetic_impl(&expr[..i], arith_depth + 1); - let right = self.parse_arithmetic_impl(&expr[i + 1..], arith_depth + 1); + let left = self.parse_arithmetic_impl(&expr[..bo[i]], arith_depth + 1); + let right = self.parse_arithmetic_impl(&expr[bo[i] + 1..], arith_depth + 1); // THREAT[TM-DOS-029]: wrapping to prevent overflow panic return left.wrapping_mul(right); } '/' | '%' if depth == 0 => { - let left = self.parse_arithmetic_impl(&expr[..i], arith_depth + 1); - let right = self.parse_arithmetic_impl(&expr[i + 1..], arith_depth + 1); + let left = self.parse_arithmetic_impl(&expr[..bo[i]], arith_depth + 1); + let right = self.parse_arithmetic_impl(&expr[bo[i] + 1..], arith_depth + 1); // THREAT[TM-DOS-029]: wrapping to prevent i64::MIN / -1 panic return match chars[i] { '/' => { @@ -7609,9 +7615,9 @@ impl Interpreter { '(' => depth += 1, ')' => depth -= 1, '*' if depth == 0 && i + 1 < chars.len() && chars[i + 1] == '*' => { - let left = self.parse_arithmetic_impl(&expr[..i], arith_depth + 1); + let left = self.parse_arithmetic_impl(&expr[..bo[i]], arith_depth + 1); // Right-associative: parse from i+2 onward (may contain more **) - let right = self.parse_arithmetic_impl(&expr[i + 2..], arith_depth + 1); + let right = self.parse_arithmetic_impl(&expr[bo[i] + 2..], arith_depth + 1); // THREAT[TM-DOS-029]: clamp exponent to 0..=63 to prevent panic/hang let exp = right.clamp(0, 63) as u32; return left.wrapping_pow(exp); diff --git a/crates/bashkit/src/lib.rs b/crates/bashkit/src/lib.rs index e8f854a..36fa44f 100644 --- a/crates/bashkit/src/lib.rs +++ b/crates/bashkit/src/lib.rs @@ -2161,6 +2161,14 @@ mod tests { assert_eq!(result.stdout, "first second\n"); } + #[tokio::test] + async fn test_array_single_quote_subscript_no_panic() { + // Regression: single quote char as array index caused begin > end slice panic + let mut bash = Bash::new(); + // Should not panic on malformed subscript with lone quote + let _ = bash.exec("echo ${arr[\"]}").await; + } + // Resource limit tests #[tokio::test] @@ -3574,6 +3582,17 @@ done"#, assert_eq!(result.stdout, "1\n"); } + #[tokio::test] + async fn test_arithmetic_multibyte_no_panic() { + // Regression: multi-byte chars caused char-index/byte-index mismatch panic + let mut bash = Bash::new(); + // Multi-byte char in comma expression - should not panic + let result = bash.exec("echo $((0,1))").await.unwrap(); + assert_eq!(result.stdout, "1\n"); + // Ensure multi-byte input doesn't panic (treated as 0 / error) + let _ = bash.exec("echo $((\u{00e9}+1))").await; + } + // ============================================================ // Brace Expansion Tests // ============================================================ diff --git a/crates/bashkit/src/parser/mod.rs b/crates/bashkit/src/parser/mod.rs index d612a5c..fc4fd85 100644 --- a/crates/bashkit/src/parser/mod.rs +++ b/crates/bashkit/src/parser/mod.rs @@ -2397,8 +2397,9 @@ impl<'a> Parser<'a> { index.push(chars.next().unwrap()); } // Strip surrounding quotes from index (e.g. "foo" -> foo) - if (index.starts_with('"') && index.ends_with('"')) - || (index.starts_with('\'') && index.ends_with('\'')) + if index.len() >= 2 + && ((index.starts_with('"') && index.ends_with('"')) + || (index.starts_with('\'') && index.ends_with('\''))) { index = index[1..index.len() - 1].to_string(); } diff --git a/crates/bashkit/tests/proptest_security.rs b/crates/bashkit/tests/proptest_security.rs index 4f6149b..51a865d 100644 --- a/crates/bashkit/tests/proptest_security.rs +++ b/crates/bashkit/tests/proptest_security.rs @@ -15,6 +15,34 @@ fn bash_input_strategy() -> impl Strategy { proptest::string::string_regex("[a-zA-Z0-9_ ;|$()]{0,50}").unwrap() } +// Strategy for generating arithmetic expressions with multi-byte chars +// Covers the char-index vs byte-index mismatch that caused panics +fn arithmetic_multibyte_strategy() -> impl Strategy { + prop_oneof![ + // Multi-byte chars mixed with operators + proptest::string::string_regex("[0-9a-z+\\-*/%,()éèüöñ]{1,30}").unwrap(), + // CJK + operators + proptest::string::string_regex("[0-9+\\-*/()你好世界]{1,20}").unwrap(), + // Emoji + arithmetic + proptest::string::string_regex("[0-9+\\-*/,🎉🚀]{1,15}").unwrap(), + // Multi-byte with ternary/bitwise + proptest::string::string_regex("[0-9a-z?:|&^!<>=éü]{1,30}").unwrap(), + ] +} + +// Strategy for generating degenerate array subscript expressions +fn array_subscript_strategy() -> impl Strategy { + prop_oneof![ + // Lone/mismatched quotes in subscripts + proptest::string::string_regex("\\$\\{arr\\[[\"'a-z]{0,5}\\]\\}").unwrap(), + // Multi-byte in subscripts + proptest::string::string_regex("\\$\\{arr\\[[éü0-9\"']{0,5}\\]\\}").unwrap(), + // Edge-case subscript lengths (0, 1, 2 chars) + Just("${arr[\"]}".to_string()), + Just("${arr[']}".to_string()), + ] +} + // Strategy for generating resource-intensive scripts fn resource_stress_strategy() -> impl Strategy { prop_oneof![ @@ -126,6 +154,63 @@ proptest! { }); } + /// Arithmetic evaluator must not panic on multi-byte input + /// Regression: char-index used as byte-index caused panics on multi-byte chars + #[test] + fn arithmetic_multibyte_no_panic(expr in arithmetic_multibyte_strategy()) { + thread_local! { + static RT: tokio::runtime::Runtime = tokio::runtime::Builder::new_current_thread() + .enable_all() + .build() + .unwrap(); + } + + let script = format!("echo $(({expr}))"); + + RT.with(|rt| { + rt.block_on(async { + let limits = ExecutionLimits::new() + .max_commands(10) + .timeout(Duration::from_millis(50)); + + let mut bash = Bash::builder().limits(limits).build(); + let _ = bash.exec(&script).await; + }); + }); + } + + /// Parser must not panic on degenerate array subscripts + /// Regression: single-char quote in subscript caused begin > end slice panic + #[test] + fn parser_subscript_no_panic(input in array_subscript_strategy()) { + thread_local! { + static RT: tokio::runtime::Runtime = tokio::runtime::Builder::new_current_thread() + .enable_all() + .build() + .unwrap(); + } + + let script = format!("arr=(a b c); echo {input}"); + + RT.with(|rt| { + rt.block_on(async { + let limits = ExecutionLimits::new() + .max_commands(10) + .timeout(Duration::from_millis(50)); + + let mut bash = Bash::builder().limits(limits).build(); + let _ = bash.exec(&script).await; + }); + }); + } + + /// Lexer must not panic on multi-byte input (extends lexer_never_panics with unicode) + #[test] + fn lexer_multibyte_no_panic(input in proptest::string::string_regex("[a-zA-Z0-9_ ;|$()\"'éèüöñ你好🎉]{0,50}").unwrap()) { + let mut lexer = bashkit::parser::Lexer::new(&input); + while lexer.next_token().is_some() {} + } + /// Variable expansion should not execute code #[test] fn variable_expansion_safe(var_content in "[^']{0,100}") {