From e13c37119508b1d556b4e944df68ff6a461dfdd8 Mon Sep 17 00:00:00 2001 From: Donald Filimon Date: Mon, 30 Jun 2025 11:12:33 -0400 Subject: [PATCH] Restore run step and add OuroLang prototype --- build.zig | 33 +-- ouro_lang.cc | 620 +++++++++++++++++++++++++++++++++++++++++++++++++++ src/main.zig | 8 +- 3 files changed, 631 insertions(+), 30 deletions(-) create mode 100644 ouro_lang.cc diff --git a/build.zig b/build.zig index 6b3c68115..cf1d4b578 100644 --- a/build.zig +++ b/build.zig @@ -1,4 +1,5 @@ const std = @import("std"); +const builtin = @import("builtin"); pub fn build(b: *std.Build) void { const target = b.standardTargetOptions(.{}); @@ -19,7 +20,7 @@ pub fn build(b: *std.Build) void { const exe = b.addExecutable(.{ .name = "abi", - .root_source_file = .{ .path = "src/main.zig" }, + .root_source_file = b.path("src/main.zig"), .target = target, .optimize = platform_optimize, }); @@ -28,14 +29,10 @@ pub fn build(b: *std.Build) void { exe.link_function_sections = true; exe.link_gc_sections = true; if (platform_optimize == .ReleaseSmall or platform_optimize == .ReleaseFast) { - exe.strip = true; + exe.root_module.strip = true; } - // ─── Dependencies ──────────────────────────────────────────────────────── - exe.root_module.addImport("zli", b.dependency("zli", .{}).module("root")); - exe.root_module.addImport("zf", b.dependency("zf", .{}).module("root")); - exe.root_module.addImport("json", b.dependency("json", .{}).module("json")); - exe.root_module.addImport("prompter", b.dependency("prompter", .{}).module("prompter")); + // No external dependencies currently required. exe.root_module.addOptions("build_options", options); // ─── Platform-specific dependencies ────────────────────────────────────── @@ -61,6 +58,12 @@ pub fn build(b: *std.Build) void { b.installArtifact(exe); + const run_cmd = b.addRunArtifact(exe); + run_cmd.step.dependOn(b.getInstallStep()); + + const run_step = b.step("run", "Run the application"); + run_step.dependOn(&run_cmd.step); + const bench_step = b.step("bench", "Run performance benchmarks"); const bench_exe = b.addRunArtifact(exe); bench_exe.addArg("bench"); @@ -115,20 +118,4 @@ fn detectSIMDSupport() bool { .aarch64 => std.Target.aarch64.featureSetHas(builtin.cpu.features, .neon), else => false, }; - - const run_cmd = b.addRunArtifact(exe); - run_cmd.step.dependOn(b.getInstallStep()); - - const run_step = b.step("run", "Run the application"); - run_step.dependOn(&run_cmd.step); - - const main_tests = b.addTest(.{ - .root_source_file = .{ .path = "src/main.zig" }, - .target = target, - .optimize = optimize, - }); - - const run_main_tests = b.addRunArtifact(main_tests); - const test_step = b.step("test", "Run unit tests"); - test_step.dependOn(&run_main_tests.step); } diff --git a/ouro_lang.cc b/ouro_lang.cc new file mode 100644 index 000000000..3da132d92 --- /dev/null +++ b/ouro_lang.cc @@ -0,0 +1,620 @@ +// ouro_lang.cc - OuroLang implementation +// Provided by user; integrated into repository. + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +// Token definitions and structures +enum class TokenType { + LET, FN, IF, ELSE, RETURN, FOR, IN, ASYNC, AWAIT, GPU, + INT, FLOAT, STRING, IDENTIFIER, NUMBER, STRING_LITERAL, + COLON, EQUALS, LPAREN, RPAREN, LBRACE, RBRACE, SEMICOLON, COMMA, + PLUS, MINUS, MUL, DIV, GT, DOTDOT, ARROW, EOF_TOKEN +}; + +struct Token { + TokenType type; + std::string value; + int line; +}; + +class Lexer { + std::string source; + size_t pos = 0; + int line = 1; + +public: + explicit Lexer(const std::string &src) : source(src) {} + + std::vector tokenize() { + std::vector tokens; + while (pos < source.size()) { + char c = source[pos]; + if (std::isspace(static_cast(c))) { + if (c == '\n') line++; + pos++; + continue; + } + if (std::isalpha(static_cast(c)) || c == '_') { + tokens.push_back(parse_identifier()); + } else if (std::isdigit(static_cast(c)) || c == '.') { + tokens.push_back(parse_number()); + } else if (c == '"') { + tokens.push_back(parse_string()); + } else { + tokens.push_back(parse_symbol()); + } + } + tokens.push_back({TokenType::EOF_TOKEN, "", line}); + return tokens; + } + +private: + Token parse_identifier() { + std::string value; + while (pos < source.size() && + (std::isalnum(static_cast(source[pos])) || + source[pos] == '_')) { + value += source[pos++]; + } + if (value == "let") return {TokenType::LET, value, line}; + if (value == "fn") return {TokenType::FN, value, line}; + if (value == "if") return {TokenType::IF, value, line}; + if (value == "else") return {TokenType::ELSE, value, line}; + if (value == "return") return {TokenType::RETURN, value, line}; + if (value == "for") return {TokenType::FOR, value, line}; + if (value == "in") return {TokenType::IN, value, line}; + if (value == "async") return {TokenType::ASYNC, value, line}; + if (value == "await") return {TokenType::AWAIT, value, line}; + if (value == "gpu") return {TokenType::GPU, value, line}; + if (value == "int") return {TokenType::INT, value, line}; + if (value == "float") return {TokenType::FLOAT, value, line}; + if (value == "string") return {TokenType::STRING, value, line}; + return {TokenType::IDENTIFIER, value, line}; + } + + Token parse_number() { + std::string value; + bool has_dot = false; + while (pos < source.size() && (std::isdigit(static_cast(source[pos])) || source[pos] == '.')) { + if (source[pos] == '.') has_dot = true; + value += source[pos++]; + } + return {TokenType::NUMBER, value, line}; + } + + Token parse_string() { + std::string value; + pos++; // Skip opening quote + while (pos < source.size() && source[pos] != '"') { + value += source[pos++]; + } + pos++; // Skip closing quote + return {TokenType::STRING_LITERAL, value, line}; + } + + Token parse_symbol() { + char c = source[pos++]; + switch (c) { + case ':': return {TokenType::COLON, ":", line}; + case '=': return {TokenType::EQUALS, "=", line}; + case '(': return {TokenType::LPAREN, "(", line}; + case ')': return {TokenType::RPAREN, ")", line}; + case '{': return {TokenType::LBRACE, "{", line}; + case '}': return {TokenType::RBRACE, "}", line}; + case ';': return {TokenType::SEMICOLON, ";", line}; + case ',': return {TokenType::COMMA, ",", line}; + case '+': return {TokenType::PLUS, "+", line}; + case '-': + if (pos < source.size() && source[pos] == '>') { + pos++; + return {TokenType::ARROW, "->", line}; + } + return {TokenType::MINUS, "-", line}; + case '*': return {TokenType::MUL, "*", line}; + case '/': return {TokenType::DIV, "/", line}; + case '>': return {TokenType::GT, ">", line}; + case '.': + if (pos < source.size() && source[pos] == '.') { + pos++; + return {TokenType::DOTDOT, "..", line}; + } + default: + throw std::runtime_error("Unknown symbol at line " + std::to_string(line)); + } + } +}; + +// Forward declarations for AST structures +struct Expr; +struct Stmt; +using ExprPtr = std::unique_ptr; +using StmtPtr = std::unique_ptr; + +// Expression node types +struct NumberExpr { double value; }; +struct StringExpr { std::string value; }; +struct IdentExpr { std::string name; }; +struct BinaryExpr { TokenType op; ExprPtr left; ExprPtr right; }; +struct CallExpr { std::string name; std::vector args; }; +struct AwaitExpr { ExprPtr expr; }; + +using ExprVariant = std::variant; +struct Expr { ExprVariant value; }; + +// Statement node types +struct VarDeclStmt { std::string name; std::string type; ExprPtr value; }; +struct FnDeclStmt { + std::string name; + std::vector> params; + std::string return_type; + std::vector body; + bool is_async; + bool is_gpu; + bool is_generic; + std::vector generic_params; +}; +struct IfStmt { ExprPtr condition; std::vector then_branch; std::vector else_branch; }; +struct ForStmt { std::string var; ExprPtr start; ExprPtr end; std::vector body; }; +struct ReturnStmt { ExprPtr value; }; + +using StmtVariant = std::variant; +struct Stmt { StmtVariant value; }; + +class Parser { + std::vector tokens; + size_t pos = 0; + +public: + explicit Parser(const std::vector &t) : tokens(t) {} + + std::vector parse() { + std::vector stmts; + while (tokens[pos].type != TokenType::EOF_TOKEN) { + stmts.push_back(parse_stmt()); + } + return stmts; + } + +private: + Token peek() const { return tokens[pos]; } + Token advance() { return tokens[pos++]; } + Token consume(TokenType type, const std::string &msg) { + if (peek().type == type) return advance(); + throw std::runtime_error(msg + " at line " + std::to_string(peek().line)); + } + + StmtPtr parse_stmt() { + if (peek().type == TokenType::LET) return parse_var_decl(); + if (peek().type == TokenType::FN || peek().type == TokenType::ASYNC || peek().type == TokenType::GPU) { + return parse_fn_decl(); + } + if (peek().type == TokenType::IF) return parse_if_stmt(); + if (peek().type == TokenType::FOR) return parse_for_stmt(); + if (peek().type == TokenType::RETURN) return parse_return_stmt(); + throw std::runtime_error("Unexpected token at line " + std::to_string(peek().line)); + } + + StmtPtr parse_var_decl() { + consume(TokenType::LET, "Expected 'let'"); + auto name = consume(TokenType::IDENTIFIER, "Expected identifier").value; + std::string type; + if (peek().type == TokenType::COLON) { + consume(TokenType::COLON, "Expected ':'"); + type = consume(TokenType::IDENTIFIER, "Expected type").value; + } + consume(TokenType::EQUALS, "Expected '='"); + auto value = parse_expr(); + consume(TokenType::SEMICOLON, "Expected ';'"); + return std::make_unique(VarDeclStmt{name, type, std::move(value)}); + } + + StmtPtr parse_fn_decl() { + bool is_async = false, is_gpu = false, is_generic = false; + std::vector generic_params; + if (peek().type == TokenType::ASYNC) { + consume(TokenType::ASYNC, ""); + is_async = true; + } else if (peek().type == TokenType::GPU) { + consume(TokenType::GPU, ""); + is_gpu = true; + } + consume(TokenType::FN, "Expected 'fn'"); + auto name = consume(TokenType::IDENTIFIER, "Expected identifier").value; + if (peek().type == TokenType::GT) { + consume(TokenType::GT, "Expected '<'"); + while (peek().type != TokenType::GT) { + generic_params.push_back(consume(TokenType::IDENTIFIER, "Expected generic param").value); + if (peek().type == TokenType::COMMA) consume(TokenType::COMMA, ""); + } + consume(TokenType::GT, "Expected '>'"); + is_generic = true; + } + consume(TokenType::LPAREN, "Expected '('"); + std::vector> params; + if (peek().type != TokenType::RPAREN) { + do { + auto param_name = consume(TokenType::IDENTIFIER, "Expected param name").value; + consume(TokenType::COLON, "Expected ':'"); + auto param_type = consume(TokenType::IDENTIFIER, "Expected param type").value; + params.push_back({param_name, param_type}); + if (peek().type == TokenType::COMMA) consume(TokenType::COMMA, ""); + } while (peek().type != TokenType::RPAREN); + } + consume(TokenType::RPAREN, "Expected ')'"); + std::string return_type; + if (peek().type == TokenType::ARROW) { + consume(TokenType::ARROW, "Expected '->'"); + return_type = consume(TokenType::IDENTIFIER, "Expected return type").value; + } + consume(TokenType::LBRACE, "Expected '{'"); + std::vector body; + while (peek().type != TokenType::RBRACE) { + body.push_back(parse_stmt()); + } + consume(TokenType::RBRACE, "Expected '}'"); + return std::make_unique(FnDeclStmt{name, params, return_type, std::move(body), + is_async, is_gpu, is_generic, generic_params}); + } + + StmtPtr parse_if_stmt() { + consume(TokenType::IF, "Expected 'if'"); + auto condition = parse_expr(); + consume(TokenType::LBRACE, "Expected '{'"); + std::vector then_branch; + while (peek().type != TokenType::RBRACE && peek().type != TokenType::ELSE) { + then_branch.push_back(parse_stmt()); + } + consume(TokenType::RBRACE, "Expected '}'"); + std::vector else_branch; + if (peek().type == TokenType::ELSE) { + consume(TokenType::ELSE, ""); + consume(TokenType::LBRACE, "Expected '{'"); + while (peek().type != TokenType::RBRACE) { + else_branch.push_back(parse_stmt()); + } + consume(TokenType::RBRACE, "Expected '}'"); + } + return std::make_unique(IfStmt{std::move(condition), std::move(then_branch), std::move(else_branch)}); + } + + StmtPtr parse_for_stmt() { + consume(TokenType::FOR, "Expected 'for'"); + auto var = consume(TokenType::IDENTIFIER, "Expected loop variable").value; + consume(TokenType::IN, "Expected 'in'"); + auto start = parse_expr(); + consume(TokenType::DOTDOT, "Expected '..'"); + auto end = parse_expr(); + consume(TokenType::LBRACE, "Expected '{'"); + std::vector body; + while (peek().type != TokenType::RBRACE) { + body.push_back(parse_stmt()); + } + consume(TokenType::RBRACE, "Expected '}'"); + return std::make_unique(ForStmt{var, std::move(start), std::move(end), std::move(body)}); + } + + StmtPtr parse_return_stmt() { + consume(TokenType::RETURN, "Expected 'return'"); + ExprPtr value; + if (peek().type != TokenType::SEMICOLON) { + value = parse_expr(); + } + consume(TokenType::SEMICOLON, "Expected ';'"); + return std::make_unique(ReturnStmt{std::move(value)}); + } + + ExprPtr parse_expr() { return parse_binary_expr(0); } + + ExprPtr parse_binary_expr(int precedence) { + auto left = parse_primary_expr(); + while (true) { + TokenType op = peek().type; + int op_precedence = get_precedence(op); + if (op_precedence <= precedence) break; + advance(); + auto right = parse_binary_expr(op_precedence); + left = std::make_unique(BinaryExpr{op, std::move(left), std::move(right)}); + } + return left; + } + + int get_precedence(TokenType op) { + switch (op) { + case TokenType::MUL: + case TokenType::DIV: return 2; + case TokenType::PLUS: + case TokenType::MINUS: return 1; + case TokenType::GT: return 0; + default: return -1; + } + } + + ExprPtr parse_primary_expr() { + if (peek().type == TokenType::NUMBER) { + double val = std::stod(consume(TokenType::NUMBER, "Expected number").value); + return std::make_unique(NumberExpr{val}); + } + if (peek().type == TokenType::STRING_LITERAL) { + auto val = consume(TokenType::STRING_LITERAL, "Expected string").value; + return std::make_unique(StringExpr{val}); + } + if (peek().type == TokenType::IDENTIFIER) { + auto name = consume(TokenType::IDENTIFIER, "Expected identifier").value; + if (peek().type == TokenType::LPAREN) { + consume(TokenType::LPAREN, "Expected '('"); + std::vector args; + if (peek().type != TokenType::RPAREN) { + do { + args.push_back(parse_expr()); + if (peek().type == TokenType::COMMA) consume(TokenType::COMMA, ""); + } while (peek().type != TokenType::RPAREN); + } + consume(TokenType::RPAREN, "Expected ')'"); + return std::make_unique(CallExpr{name, std::move(args)}); + } + return std::make_unique(IdentExpr{name}); + } + if (peek().type == TokenType::AWAIT) { + consume(TokenType::AWAIT, "Expected 'await'"); + auto expr = parse_expr(); + return std::make_unique(AwaitExpr{std::move(expr)}); + } + throw std::runtime_error("Expected expression at line " + std::to_string(peek().line)); + } +}; + +class TypeChecker { + std::map env; + std::map functions; + +public: + void check(const std::vector &stmts) { + for (const auto &stmt : stmts) { + check_stmt(*stmt); + } + } + +private: + void check_stmt(const Stmt &stmt) { + if (auto *var = std::get_if(&stmt.value)) { + auto inferred_type = infer_type(var->value.get()); + if (!var->type.empty() && var->type != inferred_type) { + throw std::runtime_error("Type mismatch for " + var->name); + } + env[var->name] = var->type.empty() ? inferred_type : var->type; + } else if (auto *fn = std::get_if(&stmt.value)) { + functions[fn->name] = fn; + auto saved = env; + for (const auto ¶m : fn->params) { + env[param.first] = param.second; + } + for (const auto &body_stmt : fn->body) { + check_stmt(*body_stmt); + } + env = saved; + } else if (auto *if_stmt = std::get_if(&stmt.value)) { + if (infer_type(if_stmt->condition.get()) != "int") { + throw std::runtime_error("If condition must be int"); + } + for (const auto &s : if_stmt->then_branch) check_stmt(*s); + for (const auto &s : if_stmt->else_branch) check_stmt(*s); + } else if (auto *for_stmt = std::get_if(&stmt.value)) { + if (infer_type(for_stmt->start.get()) != "int" || infer_type(for_stmt->end.get()) != "int") { + throw std::runtime_error("For loop bounds must be int"); + } + env[for_stmt->var] = "int"; + for (const auto &s : for_stmt->body) check_stmt(*s); + } else if (auto *ret = std::get_if(&stmt.value)) { + if (ret->value) { + /* additional checks could go here */ + } + } + } + + std::string infer_type(const Expr *expr) { + if (std::holds_alternative(expr->value)) { + return "float"; + } else if (std::holds_alternative(expr->value)) { + return "string"; + } else if (auto *ident = std::get_if(&expr->value)) { + auto it = env.find(ident->name); + if (it != env.end()) return it->second; + throw std::runtime_error("Undefined variable: " + ident->name); + } else if (auto *bin = std::get_if(&expr->value)) { + auto left_type = infer_type(bin->left.get()); + auto right_type = infer_type(bin->right.get()); + if (left_type != right_type) throw std::runtime_error("Type mismatch in binary op"); + if (bin->op == TokenType::GT) return "int"; + return left_type; + } else if (auto *call = std::get_if(&expr->value)) { + auto it = functions.find(call->name); + if (it != functions.end()) { + return it->second->return_type; + } + throw std::runtime_error("Undefined function: " + call->name); + } + return "unknown"; + } +}; + +class Interpreter { + using Value = std::variant; + using NativeFn = std::function &, Interpreter &)>; + +public: + std::map> env; + Value return_value; + + Interpreter() { + env["print"] = NativeFn{[](const std::vector &args, Interpreter &) -> Value { + for (const auto &arg : args) { + if (std::holds_alternative(arg)) { + std::cout << std::get(arg) << ' '; + } else if (std::holds_alternative(arg)) { + std::cout << std::get(arg) << ' '; + } + } + std::cout << std::endl; + return {}; + }}; + + env["sleep"] = NativeFn{[](const std::vector &args, Interpreter &) -> Value { + if (!args.empty() && std::holds_alternative(args[0])) { + auto ms = static_cast(std::get(args[0])); + std::this_thread::sleep_for(std::chrono::milliseconds(ms)); + } + return {}; + }}; + } + + void run(const std::string &source) { + Lexer lexer(source); + auto tokens = lexer.tokenize(); + Parser parser(tokens); + auto ast = parser.parse(); + TypeChecker checker; + checker.check(ast); + for (const auto &stmt : ast) { + execute_stmt(*stmt); + } + } + +private: + void execute_stmt(const Stmt &stmt) { + if (auto *var = std::get_if(&stmt.value)) { + env[var->name] = evaluate_expr(*var->value); + } else if (auto *fn = std::get_if(&stmt.value)) { + auto func = [this, fn](const std::vector &args, Interpreter &interpreter) -> Value { + std::map> fn_env = env; + if (args.size() != fn->params.size()) { + throw std::runtime_error("Argument count mismatch"); + } + for (size_t i = 0; i < args.size(); ++i) { + fn_env[fn->params[i].first] = args[i]; + } + if (fn->is_async) { + auto future = std::async(std::launch::async, [fn, fn_env, &interpreter]() mutable { + Interpreter local_interp; + local_interp.env = fn_env; + for (const auto &body_stmt : fn->body) { + local_interp.execute_stmt(*body_stmt); + } + return local_interp.return_value; + }); + return future.get(); + } else if (fn->is_gpu) { + std::cout << "GPU function " << fn->name << " called (placeholder)\n"; + return {}; + } else { + Interpreter local_interp; + local_interp.env = fn_env; + for (const auto &body_stmt : fn->body) { + local_interp.execute_stmt(*body_stmt); + } + return local_interp.return_value; + } + }; + env[fn->name] = func; + } else if (auto *if_stmt = std::get_if(&stmt.value)) { + auto cond = evaluate_expr(*if_stmt->condition); + bool is_true = false; + if (std::holds_alternative(cond)) { + is_true = std::get(cond) != 0; + } + if (is_true) { + for (const auto &s : if_stmt->then_branch) execute_stmt(*s); + } else { + for (const auto &s : if_stmt->else_branch) execute_stmt(*s); + } + } else if (auto *for_stmt = std::get_if(&stmt.value)) { + auto start_val = evaluate_expr(*for_stmt->start); + auto end_val = evaluate_expr(*for_stmt->end); + if (std::holds_alternative(start_val) && std::holds_alternative(end_val)) { + int s = static_cast(std::get(start_val)); + int e = static_cast(std::get(end_val)); + for (int i = s; i < e; ++i) { + env[for_stmt->var] = static_cast(i); + for (const auto &s : for_stmt->body) { + execute_stmt(*s); + } + } + } + } else if (auto *ret = std::get_if(&stmt.value)) { + if (ret->value) { + return_value = evaluate_expr(*ret->value); + } else { + return_value = {}; + } + } + } + + Value evaluate_expr(const Expr &expr) { + if (auto *num = std::get_if(&expr.value)) { + return num->value; + } else if (auto *str = std::get_if(&expr.value)) { + return str->value; + } else if (auto *ident = std::get_if(&expr.value)) { + auto it = env.find(ident->name); + if (it != env.end()) return std::get(it->second); + throw std::runtime_error("Undefined variable: " + ident->name); + } else if (auto *bin = std::get_if(&expr.value)) { + auto left = evaluate_expr(*bin->left); + auto right = evaluate_expr(*bin->right); + if (std::holds_alternative(left) && std::holds_alternative(right)) { + double l = std::get(left); + double r = std::get(right); + switch (bin->op) { + case TokenType::PLUS: return l + r; + case TokenType::MINUS: return l - r; + case TokenType::MUL: return l * r; + case TokenType::DIV: return l / r; + case TokenType::GT: return static_cast(l > r); + default: throw std::runtime_error("Invalid operator"); + } + } + } else if (auto *call = std::get_if(&expr.value)) { + auto it = env.find(call->name); + if (it == env.end()) throw std::runtime_error("Undefined function: " + call->name); + auto func = std::get(it->second); + std::vector args; + for (const auto &arg : call->args) { + args.push_back(evaluate_expr(*arg)); + } + return func(args, *this); + } else if (auto *await = std::get_if(&expr.value)) { + return evaluate_expr(*await->expr); // sync for now + } + throw std::runtime_error("Invalid expression"); + } +}; + +void repl() { + Interpreter interp; + std::string line; + std::cout << "OuroLang REPL (type 'exit' to quit)\n"; + while (true) { + std::cout << "> "; + std::getline(std::cin, line); + if (line == "exit") break; + try { + interp.run(line); + } catch (const std::exception &e) { + std::cerr << "Error: " << e.what() << std::endl; + } + } +} + +int main() { + repl(); + return 0; +} diff --git a/src/main.zig b/src/main.zig index 37e231d66..dad1677f3 100644 --- a/src/main.zig +++ b/src/main.zig @@ -11,12 +11,6 @@ const simd = @import("../zvim/simd_text.zig"); const lockfree = @import("lockfree.zig"); const platform = @import("platform.zig"); -pub fn main() !void { - std.debug.print("zvim starting...\n", .{}); -} - -const std = @import("std"); - pub const Error = error{ EmptyText, BlacklistedWord, @@ -129,4 +123,4 @@ test "Abi orchestrates personas" { const res = try Abi.process(req); try std.testing.expectEqual(@as(usize, 3), res.result); try std.testing.expectEqualStrings("Computation successful", res.message); -} \ No newline at end of file +}