From e32d58a7cacd3ee0a2b86f5d283c31070a37dc6d Mon Sep 17 00:00:00 2001 From: Agent Smith Date: Thu, 27 Mar 2025 17:57:52 +0800 Subject: [PATCH] feat: add index out of bounds check --- src/ast/builder/llvmbuilder.rs | 16 +++++++- src/ast/builder/mod.rs | 3 ++ src/ast/builder/no_op_builder.rs | 12 ++++++ src/ast/node/primary.rs | 65 ++++++++++++++++++++++++++++++++ src/ast/test.rs | 52 +++++++++++++++++++++++++ src/utils/read_config.rs | 4 ++ test/Kagari.toml | 1 + test/arr_bounds/Kagari.toml | 4 ++ test/arr_bounds/main.pi | 5 +++ vm/src/lib.rs | 11 ++++++ 10 files changed, 172 insertions(+), 1 deletion(-) create mode 100644 test/arr_bounds/Kagari.toml create mode 100644 test/arr_bounds/main.pi diff --git a/src/ast/builder/llvmbuilder.rs b/src/ast/builder/llvmbuilder.rs index e2bc400ee..585023878 100644 --- a/src/ast/builder/llvmbuilder.rs +++ b/src/ast/builder/llvmbuilder.rs @@ -1751,6 +1751,20 @@ impl<'a, 'ctx> IRBuilder<'a, 'ctx> for LLVMBuilder<'a, 'ctx> { let f = self.get_llvm_value(f).unwrap().into_function_value(); f.get_name().to_str().unwrap() == "main" } + + fn build_global_string_ptr(&self, s: &str, name: &str) -> ValueHandle { + let s = self.builder.build_global_string_ptr(s, name).unwrap(); + self.get_llvm_value_handle(&s.as_any_value_enum()) + } + + fn build_unreachable(&self) { + _ = self.builder.build_unreachable(); + } + + fn is_debug(&self) -> bool { + self.debug + } + fn tag_generator_ctx_as_root(&self, f: ValueHandle, ctx: &mut Ctx<'a>) { let f = self.get_llvm_value(f).unwrap().into_function_value(); let allocab = f.get_first_basic_block().unwrap(); @@ -2371,7 +2385,7 @@ impl<'a, 'ctx> IRBuilder<'a, 'ctx> for LLVMBuilder<'a, 'ctx> { if *self.optimized.borrow() { return; } - if !self.debug { + if !self.debug && self.optlevel as u32 >= 1 { self.module.strip_debug_info(); } self.module.verify().unwrap_or_else(|e| { diff --git a/src/ast/builder/mod.rs b/src/ast/builder/mod.rs index 7dc44ead0..9f6f6fe99 100644 --- a/src/ast/builder/mod.rs +++ b/src/ast/builder/mod.rs @@ -39,6 +39,7 @@ pub trait IRBuilder<'a, 'ctx> { fn get_global_var_handle(&self, name: &str) -> Option; fn new_subscope(&self, start: Pos); fn get_sp_handle(&self) -> ValueHandle; + fn is_debug(&self) -> bool; fn add_global( &self, name: &str, @@ -329,6 +330,8 @@ pub trait IRBuilder<'a, 'ctx> { fn is_main(&self, f: ValueHandle) -> bool; fn await_task(&self, ctx: &mut Ctx<'a>, task: ValueHandle) -> ValueHandle; fn await_ret(&self, ctx: &mut Ctx<'a>, ret: ValueHandle); + fn build_global_string_ptr(&self, s: &str, name: &str) -> ValueHandle; + fn build_unreachable(&self); } /// ValueHandle is an index used to separate the low level generatted code inside [BuilderEnum] from the respective high level ast node diff --git a/src/ast/builder/no_op_builder.rs b/src/ast/builder/no_op_builder.rs index 30765d533..45eaf19c9 100644 --- a/src/ast/builder/no_op_builder.rs +++ b/src/ast/builder/no_op_builder.rs @@ -631,4 +631,16 @@ impl<'a, 'ctx> IRBuilder<'a, 'ctx> for NoOpBuilder<'a, 'ctx> { 0 } fn await_ret(&self, _ctx: &mut Ctx<'a>, _ret: ValueHandle) {} + + fn is_debug(&self) -> bool { + false + } + + fn build_global_string_ptr(&self, _s: &str, _name: &str) -> ValueHandle { + 0 + } + + fn build_unreachable(&self) { + // 什么都不做 + } } diff --git a/src/ast/node/primary.rs b/src/ast/node/primary.rs index 1d62bf2bb..a9375b485 100644 --- a/src/ast/node/primary.rs +++ b/src/ast/node/primary.rs @@ -1,3 +1,4 @@ +use std::cell::RefCell; use std::sync::Arc; use super::node_result::NodeResultBuilder; @@ -5,6 +6,7 @@ use super::*; use crate::ast::builder::BuilderEnum; use crate::ast::builder::IRBuilder; +use crate::ast::builder::IntPredicate; use crate::ast::builder::ValueHandle; use crate::ast::ctx::Ctx; use crate::ast::ctx::MacroReplaceNode; @@ -366,6 +368,69 @@ impl Node for ArrayElementNode { let elemptr: ValueHandle = { let index: &[ValueHandle; 1] = &[index_val]; + + // 在debug模式下添加越界检查 + if ctx.config.assert_index_out_of_bounds { + // 获取数组大小 + let size_ptr = builder + .build_struct_gep(arr, 2, "size_ptr", &pltype.borrow(), ctx) + .unwrap(); + let size = builder.build_load( + size_ptr, + "arr_size", + &PLType::Primitive(PriType::I64), + ctx, + ); + + // 创建比较:index >= size 或 index < 0 + let cmp_ge = builder.build_int_compare( + IntPredicate::SGE, + index_val, + size, + "index_ge_size", + ); + let cmp_lt = builder.build_int_compare( + IntPredicate::SLT, + index_val, + builder.int_value(&PriType::I64, 0, true), + "index_lt_zero", + ); + let out_of_bounds = builder.build_or(cmp_ge, cmp_lt, "out_of_bounds"); + + // 获取当前函数 + let current_func = ctx.function.unwrap(); + let error_block = builder.append_basic_block(current_func, "arr_index_error"); + let continue_block = + builder.append_basic_block(current_func, "arr_index_continue"); + + builder.build_conditional_branch(out_of_bounds, error_block, continue_block); + + // 错误处理块 + builder.position_at_end_block(error_block); + + // 获取printf函数 + let printf_fn = builder + .get_function("pl_index_out_of_bounds") + .unwrap_or_else(|| { + let ret_type = PLType::Void; + let param_type = PLType::Primitive(PriType::I64); + builder.add_function( + "pl_index_out_of_bounds", + &[param_type.clone(), param_type], + ret_type, + ctx, + ) + }); + + // 调用printf输出错误信息 + builder.build_call(printf_fn, &[index_val, size], &PLType::Void, ctx, None); + + builder.build_unreachable(); + + // 继续执行块 + builder.position_at_end_block(continue_block); + } + let real_arr: ValueHandle = builder .build_struct_gep(arr, 1, "real_arr", &pltype.borrow(), ctx) .unwrap(); diff --git a/src/ast/test.rs b/src/ast/test.rs index 4d2077f9a..f242ad0d7 100644 --- a/src/ast/test.rs +++ b/src/ast/test.rs @@ -653,6 +653,58 @@ fn test_tail_call_opt() { drop(l); } +#[test] +fn test_assert_index_out_of_bounds() { + let l = crate::utils::plc_new::tests::TEST_COMPILE_MUTEX + .lock() + .unwrap(); + set_test_asset(); + let out = "testout3"; + let exe = PathBuf::from(out); + #[cfg(target_os = "windows")] + let exe = exe.with_extension("exe"); + _ = remove_file(&exe); + use std::{path::PathBuf, process::Command}; + + use crate::ast::compiler::{compile, Options}; + + let docs = MemDocs::default(); + let db = Database::default(); + let input = MemDocsInput::new( + &db, + Arc::new(Mutex::new(docs)), + "test/arr_bounds/main.pi".to_string(), + Default::default(), + ActionType::Compile, + None, + None, + ); + compile( + &db, + input, + out.to_string(), + Options { + optimization: crate::ast::compiler::HashOptimizationLevel::Less, + genir: true, + printast: false, + flow: false, + fmt: false, + jit: false, + debug: false, + ..Default::default() + }, + ); + let exe = crate::utils::canonicalize(&exe) + .unwrap_or_else(|_| panic!("static compiled file not found {:?}", exe)); + eprintln!("exec: {:?}", exe); + let o = Command::new(exe.to_str().unwrap()) + .output() + .expect("failed to execute compiled program"); + // should trigger index out of bounds, so status should be non-zero + assert!(!o.status.success(), "should trigger index out of bounds"); + drop(l); +} + #[cfg(test)] pub(crate) fn set_test_asset() { use std::time::SystemTime; diff --git a/src/utils/read_config.rs b/src/utils/read_config.rs index 7bea4858b..5fc020cf1 100644 --- a/src/utils/read_config.rs +++ b/src/utils/read_config.rs @@ -84,6 +84,10 @@ pub struct Config { /// and it's decided by the position of kagari.toml file #[serde(skip)] pub root: String, + + /// Assert Index Out Of Bounds, default is false + #[serde(default)] + pub assert_index_out_of_bounds: bool, } /// ConfigWrapper wraps a config, which represents all configuration of an entry node of a program. diff --git a/test/Kagari.toml b/test/Kagari.toml index f25c797f0..e363fbd00 100644 --- a/test/Kagari.toml +++ b/test/Kagari.toml @@ -1,6 +1,7 @@ project = "project1" entry = "main.pi" +assert_index_out_of_bounds = true [deps] project2 = { path = "project2" } diff --git a/test/arr_bounds/Kagari.toml b/test/arr_bounds/Kagari.toml new file mode 100644 index 000000000..dffddfc58 --- /dev/null +++ b/test/arr_bounds/Kagari.toml @@ -0,0 +1,4 @@ +entry = "main.pi" +project = "arr_bounds" + +assert_index_out_of_bounds = true diff --git a/test/arr_bounds/main.pi b/test/arr_bounds/main.pi new file mode 100644 index 000000000..3c12ca733 --- /dev/null +++ b/test/arr_bounds/main.pi @@ -0,0 +1,5 @@ +fn main() i64 { + let a = [1]; + let d = a[1]; + return 0; +} diff --git a/vm/src/lib.rs b/vm/src/lib.rs index 36e21c324..f96d4f00d 100644 --- a/vm/src/lib.rs +++ b/vm/src/lib.rs @@ -41,6 +41,17 @@ fn pl_panic() { exit(1); } +#[is_runtime] +fn pl_index_out_of_bounds(index: i64, len: i64) { + println!( + "index out of bounds occured! index: {}, len: {}", + index, len + ); + let bt = Backtrace::new(); + println!("{:?}", bt); + exit(1); +} + #[is_runtime] fn __cast_panic() { println!("invalid cast occured!");