From 56a2e06f0c1c25efc3c0d979ce7bad2253da4b41 Mon Sep 17 00:00:00 2001 From: ivanlele Date: Fri, 20 Mar 2026 18:18:49 +0200 Subject: [PATCH] migrate to logos for human_encoding --- Cargo-recent.lock | 69 +- Cargo.toml | 2 +- src/human_encoding/README.md | 4 +- src/human_encoding/error.rs | 27 +- src/human_encoding/mod.rs | 23 +- src/human_encoding/parse/ast.rs | 1204 +++++++++++++++---------------- 6 files changed, 659 insertions(+), 670 deletions(-) diff --git a/Cargo-recent.lock b/Cargo-recent.lock index 3bc4ca6f..7e37de7f 100644 --- a/Cargo-recent.lock +++ b/Cargo-recent.lock @@ -51,6 +51,12 @@ version = "0.11.1" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "32637268377fc7b10a8c6d51de3e7fba1ce5dd371a96e342b34e6078db558e7f" +[[package]] +name = "beef" +version = "0.5.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "3a8241f3ebb85c056b509d4327ad0358fbbba6ffb340bf388f26350aeda225b1" + [[package]] name = "bitcoin" version = "0.32.8" @@ -160,6 +166,12 @@ version = "0.1.5" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "3a3076410a55c90011c298b04d0cfa770b00fa04e1e3c97d3f6c9de105a03844" +[[package]] +name = "fnv" +version = "1.0.7" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "3f9eec918d3f24069decb9af1554cad7c880e2da24a9afd88aca000531ab82c1" + [[package]] name = "getrandom" version = "0.2.16" @@ -238,6 +250,12 @@ dependencies = [ "wasm-bindgen", ] +[[package]] +name = "lazy_static" +version = "1.5.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "bbd2bcb4c963f2ddae06a2efc7e9f3591312473c50c6685e1f298068316e66fe" + [[package]] name = "libc" version = "0.2.178" @@ -254,6 +272,40 @@ dependencies = [ "cc", ] +[[package]] +name = "logos" +version = "0.15.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "ff472f899b4ec2d99161c51f60ff7075eeb3097069a36050d8037a6325eb8154" +dependencies = [ + "logos-derive", +] + +[[package]] +name = "logos-codegen" +version = "0.15.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "192a3a2b90b0c05b27a0b2c43eecdb7c415e29243acc3f89cc8247a5b693045c" +dependencies = [ + "beef", + "fnv", + "lazy_static", + "proc-macro2", + "quote", + "regex-syntax", + "rustc_version", + "syn", +] + +[[package]] +name = "logos-derive" +version = "0.15.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "605d9697bcd5ef3a42d38efc51541aa3d6a4a25f7ab6d1ed0da5ac632a26b470" +dependencies = [ + "logos-codegen", +] + [[package]] name = "memchr" version = "2.7.6" @@ -368,6 +420,15 @@ version = "0.8.8" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "7a2d987857b319362043e95f5353c0535c1f58eec5336fdfcf626430af7def58" +[[package]] +name = "rustc_version" +version = "0.4.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "cfcb3a22ef46e85b45de6ee7e79d063319ebb6594faafcf1c225ea92ab6e9b92" +dependencies = [ + "semver", +] + [[package]] name = "rustversion" version = "1.0.22" @@ -427,6 +488,12 @@ dependencies = [ "secp256k1-sys", ] +[[package]] +name = "semver" +version = "1.0.27" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "d767eb0aabc880b29956c35734170f26ed551a859dbd361d140cdbeca61ab1e2" + [[package]] name = "serde" version = "1.0.228" @@ -519,8 +586,8 @@ dependencies = [ "getrandom 0.2.16", "ghost-cell", "hex-conservative 0.2.2", + "logos", "miniscript", - "santiago", "serde", "serde_json", "simplicity-sys 0.6.2", diff --git a/Cargo.toml b/Cargo.toml index 85f09495..76ad80a8 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -29,7 +29,7 @@ elements = { version = "0.25.0", optional = true, default-features = false } ghost-cell = { version = "0.2.6", default-features = false } hashes = { package = "bitcoin_hashes", version = "0.14" } hex = { package = "hex-conservative", version = "0.2.1" } -santiago = "1.3" +logos = "0.15" simplicity-sys = { version = "0.6.2", path = "./simplicity-sys" } serde = { version = "1.0.103", features = ["derive"], optional = true } diff --git a/src/human_encoding/README.md b/src/human_encoding/README.md index 6bfe21e9..7d600846 100644 --- a/src/human_encoding/README.md +++ b/src/human_encoding/README.md @@ -15,9 +15,7 @@ With that said, the rest of the document defines the encoding. ## Syntax -The syntax is defined in `src/human_encoding/parse/ast.rs`. It currently uses the -`santiago` parser generator, but we would like to move away from this, probably to -an ad-hoc parser, to avoid poor asymptotic behavior and to get better error messages. +The syntax is defined in `src/human_encoding/parse/ast.rs`. Comments are started by `--` and end at the next newline. This is the only aspect in which whitespace is significant. diff --git a/src/human_encoding/error.rs b/src/human_encoding/error.rs index 68faeb41..04a1c43e 100644 --- a/src/human_encoding/error.rs +++ b/src/human_encoding/error.rs @@ -2,7 +2,6 @@ //! Parsing Errors -use santiago::lexer::Lexeme; use std::collections::BTreeMap; use std::sync::{Arc, Mutex}; use std::{error, fmt, iter}; @@ -19,22 +18,6 @@ pub struct ErrorSet { errors: BTreeMap, Vec>, } -impl From> for ErrorSet { - fn from(e: santiago::parser::ParseError) -> Self { - let lex = e.at.map(|rc| (*rc).clone()); - match lex.as_ref().map(|lex| &lex.position).map(Position::from) { - Some(pos) => ErrorSet::single(pos, Error::ParseFailed(lex)), - None => ErrorSet::single_no_position(Error::ParseFailed(lex)), - } - } -} - -impl From for ErrorSet { - fn from(e: santiago::lexer::LexerError) -> Self { - ErrorSet::single(e.position, Error::LexFailed(e.message)) - } -} - impl ErrorSet { /// Constructs a new empty error set. pub fn new() -> Self { @@ -260,10 +243,10 @@ pub enum Error { NameRepeated(Arc), /// Program did not have a `main` expression NoMain, - /// Parsing failed (the parser provides us some extra information, but beyond - /// the line and column, it does not seem very useful to a user, so we drop it). - ParseFailed(Option), - /// Lexing failed; here santiago provides us an error message which is useful + /// Parsing failed; the string (if any) is a description of the token at which + /// parsing failed. + ParseFailed(Option), + /// Lexing failed LexFailed(String), /// A number was parsed in some context but was out of range. NumberOutOfRange(String), @@ -327,7 +310,7 @@ impl fmt::Display for Error { write!(f, "number {} was out of allowable range", n) } Error::ParseFailed(None) => f.write_str("could not parse"), - Error::ParseFailed(Some(ref lex)) => write!(f, "could not parse `{}`", lex.raw), + Error::ParseFailed(Some(ref raw)) => write!(f, "could not parse `{}`", raw), Error::LexFailed(ref msg) => write!(f, "could not parse: {}", msg), Error::TypeCheck(ref e) => fmt::Display::fmt(e, f), Error::Undefined(ref s) => write!(f, "reference to undefined symbol `{}`", s), diff --git a/src/human_encoding/mod.rs b/src/human_encoding/mod.rs index 4db0b355..4e81963b 100644 --- a/src/human_encoding/mod.rs +++ b/src/human_encoding/mod.rs @@ -24,31 +24,16 @@ pub use self::error::{Error, ErrorSet}; pub use self::named_node::NamedCommitNode; /// Line/column pair -/// -/// There is a similar type provided by the `santiago` library but it does not implement -/// `Copy`, among many other traits, which makes it unergonomic to use. Santiago positions -/// can be converted using `.into()`. #[derive(Copy, Clone, Debug, PartialEq, Eq, PartialOrd, Ord, Default, Hash)] pub struct Position { line: usize, column: usize, } -impl<'a> From<&'a santiago::lexer::Position> for Position { - fn from(position: &'a santiago::lexer::Position) -> Position { - Position { - line: position.line, - column: position.column, - } - } -} - -impl From for Position { - fn from(position: santiago::lexer::Position) -> Position { - Position { - line: position.line, - column: position.column, - } +impl Position { + /// Create a new position from line and column. + pub fn new(line: usize, column: usize) -> Self { + Position { line, column } } } diff --git a/src/human_encoding/parse/ast.rs b/src/human_encoding/parse/ast.rs index 18de679e..eb5c0c3a 100644 --- a/src/human_encoding/parse/ast.rs +++ b/src/human_encoding/parse/ast.rs @@ -2,16 +2,16 @@ //! Parsing -use std::mem; +use std::str; use std::sync::Arc; +use logos::{Lexer, Logos}; + use crate::human_encoding::{Error, ErrorSet, Position, WitnessOrHole}; use crate::jet::Jet; use crate::value::Word; use crate::{node, types}; -use crate::{BitIter, Cmr, FailEntropy}; -use santiago::grammar::{Associativity, Grammar}; -use santiago::lexer::{Lexeme, LexerRules}; +use crate::{BitIter, FailEntropy}; /// A single non-empty line of a program, of the form x = y :: t /// @@ -106,278 +106,517 @@ impl Type { } } -/// Takes a program as a string and parses it into an AST (actually, a vector -/// of lines, each of which is individually an AST) -pub fn parse_line_vector(input: &str) -> Result>, ErrorSet> { - let lexer_rules = lexer_rules(); - let grammar = grammar::(); - - let lexemes = match santiago::lexer::lex(&lexer_rules, input) { - Ok(lexemes) => lexemes, - Err(err) => return Err(err.into()), - }; - let trees = santiago::parser::parse(&grammar, &lexemes)?; - assert_eq!(trees.len(), 1, "ambiguous parse (this is a bug)"); - match trees[0].as_abstract_syntax_tree() { - Ast::Program(lines) => Ok(lines), - Ast::Error(errs) => Err(errs), - x => unreachable!( - "Parsed program into non-program non-error {:?}; this is a bug.", - x - ), - } +/// Token type produced by the logos lexer. +#[derive(Logos, Debug, Clone, PartialEq)] +#[logos(skip r"[ \t\r\n]+")] // skip whitespace +#[logos(skip r"--[^\n]*")] // skip line comments +enum Token { + // Punctuatiions + #[token(":=")] + Assign, + #[token("->")] + Arrow, + #[token("#{")] + HashBrace, + #[token("(")] + LParen, + #[token(")")] + RParen, + #[token("+")] + Plus, + #[token("*")] + Star, + #[token(":")] + Colon, + #[token("}")] + RBrace, + #[token("?")] + Question, + + // Keywords + #[token("const")] + Const, + #[token("assertl")] + AssertL, + #[token("assertr")] + AssertR, + #[token("fail")] + Fail, + #[token("disconnect")] + Disconnect, + #[token("case")] + Case, + #[token("comp")] + Comp, + #[token("pair")] + Pair, + #[token("injl")] + InjL, + #[token("injr")] + InjR, + #[token("take")] + Take, + #[token("drop")] + Drop, + #[token("unit")] + Unit, + #[token("iden")] + Iden, + #[token("witness")] + Witness, + + // Jet names + #[regex(r"jet_[a-z0-9_]+", |lex| lex.slice().to_owned())] + Jet(String), + + // Literals + #[token("_")] + Underscore, + #[regex(r"0b[01]+", |lex| lex.slice().to_owned())] + BinLiteral(String), + #[regex(r"0x[0-9a-f]+", |lex| lex.slice().to_owned())] + HexLiteral(String), + + // CMR literal + #[regex(r"#[a-fA-F0-9]{64}", |lex| lex.slice().to_owned())] + CmrLiteral(String), + + // Types + #[token("1")] + One, + #[token("2")] + Two, + #[regex(r"2\^[1-9][0-9]*", |lex| lex.slice().to_owned())] + TwoExp(String), + + // Symbols + #[regex(r"[a-zA-Z_\-.'][0-9a-zA-Z_\-.']*", priority = 1, callback = |lex| lex.slice().to_owned())] + Symbol(String), } -/// Check a list of AST elements for errors; if any are errors, combine them and return the result -fn propagate_errors(ast: &[Ast]) -> Option> { - let mut e = ErrorSet::new(); - for elem in ast { - if let Ast::Error(errs) = elem { - e.merge(errs); +/// A token together with its source position +#[derive(Debug, Clone)] +struct Spanned { + token: Token, + position: Position, +} + +/// Lex the entire input into a vector of spanned tokens +fn lex_all(input: &str) -> Result, ErrorSet> { + let mut lexer: Lexer<'_, Token> = Token::lexer(input); + let mut tokens = Vec::new(); + while let Some(result) = lexer.next() { + let span = lexer.span(); + // Compute line-column position + let position = offset_to_position(input, span.start); + match result { + Ok(token) => tokens.push(Spanned { token, position }), + Err(()) => { + return Err(ErrorSet::single( + position, + Error::LexFailed(format!( + "unexpected character `{}`", + &input[span.start..span.end] + )), + )); + } } } - if e.is_empty() { - None - } else { - Some(Ast::Error(e)) + Ok(tokens) +} + +/// Convert a byte offset into line-column +fn offset_to_position(input: &str, offset: usize) -> Position { + let mut line: usize = 1; + let mut col: usize = 1; + for (i, ch) in input.char_indices() { + if i >= offset { + break; + } + if ch == '\n' { + line += 1; + col = 1; + } else { + col += 1; + } } + Position::new(line, col) } -/// Main AST structure -/// -/// This structure is never really instantiated; during construction it is -/// continually collapsed until in the end it will be in either the `Program` -/// or `Error` variant. -#[derive(Debug, Clone)] -enum Ast { - Combinator { - comb: node::Inner<(), J, (), WitnessOrHole>, - position: Position, - }, - /// A type->type arrow - Arrow(Option, Option), - /// A #{expr} or #abcd CMR - Cmr(AstCmr), - /// An error occurred during parsing - Error(ErrorSet), - /// An expression - Expression(Expression), - /// A full expression line - Line(Line), - /// A hex or binary literal - Literal { - data: Vec, - bit_length: usize, - position: Position, - }, - /// The top-level program - Program(Vec>), - /// A symbol - Symbol { value: Arc, position: Position }, - /// A type - Type(Option), - /// Dummy value used internally in the parser when building the tree. - /// - /// Any parse objects which have no information in themselves (e.g. the - /// plus or star symbols) but which are just used to shape the parse - /// tree, get mapped to this value and then discarded. - Dummy { position: Position }, - /// Dummy value used when manipulating the tree in-place, to replace - /// data that we move out of the tree - Replaced, +struct Parser { + tokens: Vec, + pos: usize, } -impl Ast { - /// Creates an `Ast` from a single sub-AST - fn from_1(toks: &mut [Self], convert: F1, unconvert: F) -> Self - where - F1: FnOnce(&mut Self) -> T, - F: FnOnce(T) -> Self, - { - if let Some(e) = propagate_errors(toks) { - return e; - } - assert_eq!(toks.len(), 1); - unconvert(convert(&mut toks[0])) +impl Parser { + fn new(tokens: Vec) -> Self { + Parser { tokens, pos: 0 } } - /// Creates an `Ast` from two sub-`Ast`s with a dummy in between - fn from_2(toks: &mut [Self], convert1: F1, convert2: F2, unconvert: F) -> Self - where - F1: FnOnce(&mut Self) -> T, - F2: FnOnce(&mut Self) -> U, - F: FnOnce(T, U) -> Self, - { - if let Some(e) = propagate_errors(toks) { - return e; - } - assert_eq!(toks.len(), 3); - toks[1].expect_position(); - unconvert(convert1(&mut toks[0]), convert2(&mut toks[2])) + fn peek(&self) -> Option<&Token> { + self.tokens.get(self.pos).map(|s| &s.token) } - /// Creates an `Ast` from three sub-`Ast`s with dummies in between - fn from_3( - toks: &mut [Self], - convert1: F1, - convert2: F2, - convert3: F3, - unconvert: F, - ) -> Self - where - F1: FnOnce(&mut Self) -> T, - F2: FnOnce(&mut Self) -> U, - F3: FnOnce(&mut Self) -> V, - F: FnOnce(T, U, V) -> Self, - { - if let Some(e) = propagate_errors(toks) { - return e; - } - assert_eq!(toks.len(), 5); - toks[1].expect_position(); - toks[3].expect_position(); - unconvert( - convert1(&mut toks[0]), - convert2(&mut toks[2]), - convert3(&mut toks[4]), - ) + fn current_position(&self) -> Position { + self.tokens + .get(self.pos) + .map(|s| s.position) + .unwrap_or_default() + } + + /// Advance and return the consumed spanned token + fn advance(&mut self) -> &Spanned { + let s = &self.tokens[self.pos]; + self.pos += 1; + s } - /// Creates an AST from a combinator with no arguments - fn from_combinator(mut toks: Vec) -> Self { - if let Some(e) = propagate_errors(&toks) { - return e; + /// Consume a token if it matches, returning true on success + fn eat(&mut self, expected: &Token) -> bool { + if self.peek() != Some(expected) { + return false; } + self.pos += 1; + true + } - let (comb, position) = match mem::replace(&mut toks[0], Ast::Replaced) { - Ast::Combinator { comb, position } => (comb, position), - _ => unreachable!(), - }; + /// Consume a token that must match, or return an error + fn expect(&mut self, expected: &Token) -> Result { + if self.peek() != Some(expected) { + return Err(ErrorSet::single( + self.current_position(), + Error::ParseFailed(self.peek_raw_description()), + )); + } + let pos = self.current_position(); + self.pos += 1; + Ok(pos) + } - // This stupid construction is needed to avoid borrowck rules - // around borrowing tok[1] and tok[2] mutably at the same time. - let arcs: Vec> = toks[1..] - .iter_mut() - .map(Ast::expect_expression) - .map(Arc::new) - .collect(); - let inner = comb - .map_left_right(|_| Arc::clone(&arcs[0]), |_| Arc::clone(&arcs[1])) - .map_disconnect(|_| Arc::clone(&arcs[1])); - Ast::Expression(Expression { - inner: ExprInner::Inline(inner), - position, - }) + /// Whether we are at the end of input + fn at_end(&self) -> bool { + self.pos >= self.tokens.len() } - /// Creates an AST from a dummy lexeme - fn from_dummy_lexeme(lexemes: &[&std::rc::Rc]) -> Self { - assert_eq!(lexemes.len(), 1); - let position = (&lexemes[0].position).into(); - Ast::Dummy { position } + /// Human-readable description of the current token for error messages + fn peek_raw_description(&self) -> Option { + self.tokens.get(self.pos).map(|s| match &s.token { + Token::Assign => ":=".to_owned(), + Token::Arrow => "->".to_owned(), + Token::HashBrace => "#{".to_owned(), + Token::LParen => "(".to_owned(), + Token::RParen => ")".to_owned(), + Token::Plus => "+".to_owned(), + Token::Star => "*".to_owned(), + Token::Colon => ":".to_owned(), + Token::RBrace => "}".to_owned(), + Token::Question => "?".to_owned(), + Token::Const => "const".to_owned(), + Token::AssertL => "assertl".to_owned(), + Token::AssertR => "assertr".to_owned(), + Token::Fail => "fail".to_owned(), + Token::Disconnect => "disconnect".to_owned(), + Token::Case => "case".to_owned(), + Token::Comp => "comp".to_owned(), + Token::Pair => "pair".to_owned(), + Token::InjL => "injl".to_owned(), + Token::InjR => "injr".to_owned(), + Token::Take => "take".to_owned(), + Token::Drop => "drop".to_owned(), + Token::Unit => "unit".to_owned(), + Token::Iden => "iden".to_owned(), + Token::Witness => "witness".to_owned(), + Token::Jet(ref s) => s.clone(), + Token::Underscore => "_".to_owned(), + Token::BinLiteral(ref s) | Token::HexLiteral(ref s) => s.clone(), + Token::CmrLiteral(ref s) => s.clone(), + Token::One => "1".to_owned(), + Token::Two => "2".to_owned(), + Token::TwoExp(ref s) => s.clone(), + Token::Symbol(ref s) => s.clone(), + }) } +} - /// Creates an AST from a lexeme which forms a complete type - fn from_type_lexeme(lexemes: &[&std::rc::Rc]) -> Self { - assert_eq!(lexemes.len(), 1); - let position = &lexemes[0].position; - match lexemes[0].raw.as_str() { - "1" => Ast::Type(Some(Type::One)), - "2" => Ast::Type(Some(Type::Two)), - other => { - assert_eq!(&other[..2], "2^"); - match str::parse::(&other[2..]) { - // TODO how many of these should we support? - Ok(0) => Ast::Type(Some(Type::One)), - Ok(1) => Ast::Type(Some(Type::Two)), - Ok(2) => Ast::Type(Some(Type::TwoTwoN(1))), - Ok(4) => Ast::Type(Some(Type::TwoTwoN(2))), - Ok(8) => Ast::Type(Some(Type::TwoTwoN(3))), - Ok(16) => Ast::Type(Some(Type::TwoTwoN(4))), - Ok(32) => Ast::Type(Some(Type::TwoTwoN(5))), - Ok(64) => Ast::Type(Some(Type::TwoTwoN(6))), - Ok(128) => Ast::Type(Some(Type::TwoTwoN(7))), - Ok(256) => Ast::Type(Some(Type::TwoTwoN(8))), - Ok(512) => Ast::Type(Some(Type::TwoTwoN(9))), - Ok(y) => Ast::Error(ErrorSet::single(position, Error::Bad2ExpNumber(y))), - Err(_) => Ast::Error(ErrorSet::single( - position, - Error::NumberOutOfRange(other.to_owned()), - )), - } - } - } +/// Takes a program as a string and parses it into an AST +pub fn parse_line_vector(input: &str) -> Result>, ErrorSet> { + let tokens = lex_all(input)?; + let mut parser = Parser::new(tokens); + let mut lines = Vec::new(); + while !parser.at_end() { + lines.push(parse_line(&mut parser)?); } + Ok(lines) +} - /// Creates an AST from a combinator lexeme - fn from_combinator_lexeme(lexemes: &[&std::rc::Rc]) -> Self { - assert_eq!(lexemes.len(), 1); - let position = (&lexemes[0].position).into(); - let comb = match lexemes[0].raw.as_str() { - "unit" => node::Inner::Unit, - "iden" => node::Inner::Iden, - "injl" => node::Inner::InjL(()), - "injr" => node::Inner::InjR(()), - "take" => node::Inner::Take(()), - "drop" => node::Inner::Drop(()), - "comp" => node::Inner::Comp((), ()), - "case" => node::Inner::Case((), ()), - "assertl" => node::Inner::AssertL((), Cmr::unit()), - "assertr" => node::Inner::AssertR(Cmr::unit(), ()), - "pair" => node::Inner::Pair((), ()), - "disconnect" => node::Inner::Disconnect((), ()), - "witness" => node::Inner::Witness(WitnessOrHole::Witness), - "fail" => node::Inner::Fail(FailEntropy::ZERO), - other => { - assert_eq!(&other[..4], "jet_"); - if let Ok(jet) = J::from_str(&other[4..]) { - node::Inner::Jet(jet) - } else { - return Ast::Error(ErrorSet::single( - position, - Error::UnknownJet(other.to_owned()), - )); - } - } +/// Parse a line +fn parse_line(p: &mut Parser) -> Result, ErrorSet> { + let (name, position) = parse_symbol_value(p)?; + + if p.eat(&Token::Assign) { + // symbol ":=" expr (optionally followed by ":" arrow) + let expr = parse_expr(p)?; + let arrow = if p.eat(&Token::Colon) { + parse_arrow(p)? + } else { + (None, None) }; - Ast::Combinator { comb, position } + return Ok(Line { + position, + name, + expression: Some(expr), + arrow, + }); + } + + if p.eat(&Token::Colon) { + // symbol ":" arrow + let arrow = parse_arrow(p)?; + return Ok(Line { + position, + name, + expression: None, + arrow, + }); } - /// Creates an AST from a literal lexeme - fn from_literal_lexeme(lexemes: &[&std::rc::Rc]) -> Self { - assert_eq!(lexemes.len(), 1); - let position = (&lexemes[0].position).into(); + Err(ErrorSet::single( + p.current_position(), + Error::ParseFailed(p.peek_raw_description()), + )) +} - if lexemes[0].raw == "_" { - return Ast::Literal { - data: vec![], - bit_length: 0, +/// Parse an arrow (type -> type) +fn parse_arrow(p: &mut Parser) -> Result<(Option, Option), ErrorSet> { + let src = parse_type(p)?; + p.expect(&Token::Arrow)?; + let tgt = parse_type(p)?; + Ok((src, tgt)) +} + +/// Parse an expression +fn parse_expr(p: &mut Parser) -> Result, ErrorSet> { + let position = p.current_position(); + + match p.peek().cloned() { + Some(Token::LParen) => { + p.advance(); + let inner = parse_expr(p)?; + p.expect(&Token::RParen)?; + Ok(inner) + } + Some(Token::Question) => { + p.advance(); + let (name, sym_pos) = parse_symbol_value(p)?; + Ok(Expression { + inner: ExprInner::Inline(node::Inner::Witness(WitnessOrHole::TypedHole(name))), + position: sym_pos, + }) + } + Some(Token::Const) => { + p.advance(); + let (data, bit_length, lit_pos) = parse_literal(p)?; + let mut iter = BitIter::from(data); + if bit_length.count_ones() != 1 || bit_length > 1 << 31 { + return Err(ErrorSet::single( + lit_pos, + Error::BadWordLength { bit_length }, + )); + } + let word = Word::from_bits(&mut iter, bit_length.trailing_zeros()).unwrap(); + Ok(Expression { + inner: ExprInner::Inline(node::Inner::Word(word)), + position: lit_pos, + }) + } + Some(Token::AssertL) => { + p.advance(); + let left = parse_expr(p)?; + let cmr = parse_cmr(p)?; + Ok(Expression { + inner: ExprInner::AssertL(Arc::new(left), cmr), position, - }; + }) } - - let s = &lexemes[0].raw[2..]; - if &lexemes[0].raw[..2] == "0x" { - let bit_length = s.len() * 4; - let mut data = Vec::with_capacity(s.len().div_ceil(2)); - for idx in 0..s.len() / 2 { - data.push(u8::from_str_radix(&s[2 * idx..2 * idx + 2], 16).unwrap()); + Some(Token::AssertR) => { + p.advance(); + let cmr = parse_cmr(p)?; + let right = parse_expr(p)?; + Ok(Expression { + inner: ExprInner::AssertR(cmr, Arc::new(right)), + position, + }) + } + Some(Token::Fail) => { + p.advance(); + let (value, bit_length, lit_pos) = parse_literal(p)?; + if bit_length < 128 { + return Err(ErrorSet::single( + lit_pos, + Error::EntropyInsufficient { bit_length }, + )); } - if s.len() % 2 == 1 { - data.push(u8::from_str_radix(&s[s.len() - 1..], 16).unwrap() << 4); + if bit_length > 512 { + return Err(ErrorSet::single( + lit_pos, + Error::EntropyTooMuch { bit_length }, + )); } - - Ast::Literal { - data, - bit_length, + let mut entropy = [0; 64]; + entropy[..value.len()].copy_from_slice(&value[..]); + let entropy = FailEntropy::from_byte_array(entropy); + Ok(Expression { + inner: ExprInner::Inline(node::Inner::Fail(entropy)), position, - } - } else { - assert_eq!(&lexemes[0].raw[..2], "0b"); + }) + } + // Nullary? + Some(Token::Unit) => { + p.advance(); + Ok(Expression { + inner: ExprInner::Inline(node::Inner::Unit), + position, + }) + } + Some(Token::Iden) => { + p.advance(); + Ok(Expression { + inner: ExprInner::Inline(node::Inner::Iden), + position, + }) + } + Some(Token::Witness) => { + p.advance(); + Ok(Expression { + inner: ExprInner::Inline(node::Inner::Witness(WitnessOrHole::Witness)), + position, + }) + } + Some(Token::Jet(ref name)) => { + let jet_name = name.clone(); + p.advance(); + let Ok(jet) = J::from_str(&jet_name[4..]) else { + return Err(ErrorSet::single(position, Error::UnknownJet(jet_name))); + }; + Ok(Expression { + inner: ExprInner::Inline(node::Inner::Jet(jet)), + position, + }) + } + // Unary + Some(Token::InjL) => { + p.advance(); + let child = Arc::new(parse_expr(p)?); + Ok(Expression { + inner: ExprInner::Inline(node::Inner::InjL(child)), + position, + }) + } + Some(Token::InjR) => { + p.advance(); + let child = Arc::new(parse_expr(p)?); + Ok(Expression { + inner: ExprInner::Inline(node::Inner::InjR(child)), + position, + }) + } + Some(Token::Take) => { + p.advance(); + let child = Arc::new(parse_expr(p)?); + Ok(Expression { + inner: ExprInner::Inline(node::Inner::Take(child)), + position, + }) + } + Some(Token::Drop) => { + p.advance(); + let child = Arc::new(parse_expr(p)?); + Ok(Expression { + inner: ExprInner::Inline(node::Inner::Drop(child)), + position, + }) + } + // Binary + Some(Token::Case) => { + p.advance(); + let left = Arc::new(parse_expr(p)?); + let right = Arc::new(parse_expr(p)?); + Ok(Expression { + inner: ExprInner::Inline(node::Inner::Case(left, right)), + position, + }) + } + Some(Token::Comp) => { + p.advance(); + let left = Arc::new(parse_expr(p)?); + let right = Arc::new(parse_expr(p)?); + Ok(Expression { + inner: ExprInner::Inline(node::Inner::Comp(left, right)), + position, + }) + } + Some(Token::Pair) => { + p.advance(); + let left = Arc::new(parse_expr(p)?); + let right = Arc::new(parse_expr(p)?); + Ok(Expression { + inner: ExprInner::Inline(node::Inner::Pair(left, right)), + position, + }) + } + Some(Token::Disconnect) => { + p.advance(); + let left = Arc::new(parse_expr(p)?); + let right = Arc::new(parse_expr(p)?); + Ok(Expression { + inner: ExprInner::Inline(node::Inner::Disconnect(left, right)), + position, + }) + } + // Symbol reference + Some(Token::Symbol(_)) | Some(Token::Underscore) => { + let (name, sym_pos) = parse_symbol_value(p)?; + Ok(Expression::reference(name, sym_pos)) + } + _ => Err(ErrorSet::single( + position, + Error::ParseFailed(p.peek_raw_description()), + )), + } +} + +/// Parse a CMR (either an expression in #{} or a literal) +fn parse_cmr(p: &mut Parser) -> Result, ErrorSet> { + if p.eat(&Token::HashBrace) { + let expr = parse_expr(p)?; + p.expect(&Token::RBrace)?; + return Ok(AstCmr::Expr(Arc::new(expr))); + } + if let Some(Token::CmrLiteral(_)) = p.peek() { + p.advance(); + return Ok(AstCmr::Literal); + } + + Err(ErrorSet::single( + p.current_position(), + Error::ParseFailed(p.peek_raw_description()), + )) +} + +/// Parse a literal (underscore, binary, or hex) +fn parse_literal(p: &mut Parser) -> Result<(Vec, usize, Position), ErrorSet> { + let position = p.current_position(); + match p.peek().cloned() { + Some(Token::Underscore) => { + p.advance(); + Ok((vec![], 0, position)) + } + Some(Token::BinLiteral(ref raw)) => { + let s = &raw[2..]; let bit_length = s.len(); let mut data = Vec::with_capacity(s.len().div_ceil(8)); - let mut x = 0; + let mut x: u8 = 0; for (n, ch) in s.chars().enumerate() { match ch { '0' => {} @@ -392,410 +631,127 @@ impl Ast { if s.len() % 8 != 0 { data.push(x); } - - Ast::Literal { - data, - bit_length, - position, + p.advance(); + Ok((data, bit_length, position)) + } + Some(Token::HexLiteral(ref raw)) => { + let s = &raw[2..]; + let bit_length = s.len() * 4; + let mut data = Vec::with_capacity(s.len().div_ceil(2)); + for idx in 0..s.len() / 2 { + data.push(u8::from_str_radix(&s[2 * idx..2 * idx + 2], 16).unwrap()); } + if s.len() % 2 == 1 { + data.push(u8::from_str_radix(&s[s.len() - 1..], 16).unwrap() << 4); + } + p.advance(); + Ok((data, bit_length, position)) } + _ => Err(ErrorSet::single( + position, + Error::ParseFailed(p.peek_raw_description()), + )), } +} - /// Creates an AST from a CMR literal lexeme - fn from_cmr_literal_lexeme(lexemes: &[&std::rc::Rc]) -> Self { - assert_eq!(lexemes.len(), 1); - assert_eq!(lexemes[0].raw.len(), 65); - - Ast::Cmr(AstCmr::Literal) - } - - fn expect_arrow(&mut self) -> (Option, Option) { - let replaced = mem::replace(self, Ast::Replaced); - if let Ast::Arrow(a, b) = replaced { - (a, b) - } else { - panic!("Expected arrow, got {:?}", self); +/// Parse a type expression, left-associative for both + and * +fn parse_type(p: &mut Parser) -> Result, ErrorSet> { + let mut lhs = parse_type_atom(p)?; + loop { + if p.peek() == Some(&Token::Plus) { + p.advance(); + let rhs = parse_type_atom(p)?; + lhs = lhs + .zip(rhs) + .map(|(l, r)| Type::Sum(Box::new(l), Box::new(r))); + continue; } - } - - /// Checks that a given AST element is a dummy value - fn expect_position(&self) -> Position { - match self { - Ast::Combinator { position, .. } => *position, - Ast::Dummy { position } => *position, - Ast::Literal { position, .. } => *position, - Ast::Symbol { position, .. } => *position, - _ => panic!("Expected some element with position, got {:?}", self), + if p.peek() == Some(&Token::Star) { + p.advance(); + let rhs = parse_type_atom(p)?; + lhs = lhs + .zip(rhs) + .map(|(l, r)| Type::Product(Box::new(l), Box::new(r))); + continue; } + break; } + Ok(lhs) +} - fn expect_cmr(&mut self) -> AstCmr { - let replaced = mem::replace(self, Ast::Replaced); - if let Ast::Cmr(cmr) = replaced { - cmr - } else { - panic!("Expected CMR, got {:?}", replaced); +/// Parse a type atom +fn parse_type_atom(p: &mut Parser) -> Result, ErrorSet> { + match p.peek().cloned() { + Some(Token::One) => { + p.advance(); + Ok(Some(Type::One)) } - } - - fn expect_expression(&mut self) -> Expression { - let replaced = mem::replace(self, Ast::Replaced); - if let Ast::Expression(exp) = replaced { - exp - } else { - panic!("Expected expression, got {:?}", replaced); + Some(Token::Two) => { + p.advance(); + Ok(Some(Type::Two)) } - } - - fn expect_line(&mut self) -> Line { - let replaced = mem::replace(self, Ast::Replaced); - if let Ast::Line(ell) = replaced { - ell - } else { - panic!("Expected line, got {:?}", replaced); + Some(Token::TwoExp(ref raw)) => { + let raw = raw.clone(); + let position = p.current_position(); + p.advance(); + let exp_str = &raw[2..]; + match str::parse::(exp_str) { + Ok(0) => Ok(Some(Type::One)), + Ok(1) => Ok(Some(Type::Two)), + Ok(2) => Ok(Some(Type::TwoTwoN(1))), + Ok(4) => Ok(Some(Type::TwoTwoN(2))), + Ok(8) => Ok(Some(Type::TwoTwoN(3))), + Ok(16) => Ok(Some(Type::TwoTwoN(4))), + Ok(32) => Ok(Some(Type::TwoTwoN(5))), + Ok(64) => Ok(Some(Type::TwoTwoN(6))), + Ok(128) => Ok(Some(Type::TwoTwoN(7))), + Ok(256) => Ok(Some(Type::TwoTwoN(8))), + Ok(512) => Ok(Some(Type::TwoTwoN(9))), + Ok(y) => Err(ErrorSet::single(position, Error::Bad2ExpNumber(y))), + Err(_) => Err(ErrorSet::single(position, Error::NumberOutOfRange(raw))), + } } - } - - fn expect_literal(&mut self) -> (Vec, usize, Position) { - let replaced = mem::replace(self, Ast::Replaced); - if let Ast::Literal { - data, - bit_length, - position, - } = replaced - { - (data, bit_length, position) - } else { - panic!("Expected literal, got {:?}", replaced); + Some(Token::LParen) => { + p.advance(); + let ty = parse_type(p)?; + p.expect(&Token::RParen)?; + Ok(ty) } - } - - fn expect_program(&mut self) -> Vec> { - let replaced = mem::replace(self, Ast::Replaced); - if let Ast::Program(lines) = replaced { - lines - } else { - panic!("Expected program, got {:?}", replaced); + Some(Token::Symbol(_)) | Some(Token::Underscore) => { + let (name, _pos) = parse_symbol_value(p)?; + if name.as_ref() == "_" { + Ok(None) + } else { + Ok(Some(Type::Name(name.as_ref().to_owned()))) + } } + _ => Err(ErrorSet::single( + p.current_position(), + Error::ParseFailed(p.peek_raw_description()), + )), } +} - fn expect_symbol(&mut self) -> (Arc, Position) { - let replaced = mem::replace(self, Ast::Replaced); - if let Ast::Symbol { value, position } = replaced { - (value, position) - } else { - panic!("Expected symbol, got {:?}", replaced); +/// Consume a token that represents a symbol name and return it +fn parse_symbol_value(p: &mut Parser) -> Result<(Arc, Position), ErrorSet> { + let position = p.current_position(); + match p.peek().cloned() { + Some(Token::Symbol(ref s)) => { + let s: Arc = Arc::from(s.as_str()); + p.advance(); + Ok((s, position)) } - } - - /// Checks that a given AST element is a type, and returns it if so - /// - /// Replaces the original value with a dummy, on the assumption that - /// it lives in a vector and therefore can't be simply moved. - fn expect_type(&mut self) -> Option { - let replaced = mem::replace(self, Ast::Replaced); - if let Ast::Type(ty) = replaced { - ty - } else { - panic!("Expected type, got {:?}", replaced); + Some(Token::Underscore) => { + p.advance(); + Ok((Arc::from("_"), position)) } + _ => Err(ErrorSet::single( + position, + Error::ParseFailed(p.peek_raw_description()), + )), } } -fn lexer_rules() -> LexerRules { - santiago::lexer_rules!( - // Base combinators and jets - "DEFAULT" | "CONST" = string "const"; - "DEFAULT" | "NULLARY" = pattern "unit|iden|witness|jet_[a-z0-9_]*"; - "DEFAULT" | "UNARY" = pattern "injl|injr|take|drop"; - "DEFAULT" | "BINARY" = pattern "case|comp|pair|disconnect"; - // Assertions - "DEFAULT" | "ASSERTL" = string "assertl"; - "DEFAULT" | "ASSERTR" = string "assertr"; - "DEFAULT" | "FAIL" = string "fail"; - // Literals - "DEFAULT" | "_" = string "_"; - "DEFAULT" | "LITERAL" = pattern r"0b[01]+|0x[0-9a-f]+"; - - // Symbols (expression names). Essentially any alphanumeric that does not - // start with a numbera and isn't a reserved symbol (i.e. one of the above) - // patterns. Dash, underscore and dot are also allowed anywhere in a symbol. - "DEFAULT" | "SYMBOL" = pattern r"[a-zA-Z_\-.'][0-9a-zA-Z_\-.']*"; - - // Type/arrow symbols - "DEFAULT" | "(" = string "("; - "DEFAULT" | ")" = string ")"; - "DEFAULT" | "+" = string "+"; - "DEFAULT" | "*" = string "*"; - "DEFAULT" | "->" = string "->"; - "DEFAULT" | ":" = string ":"; - "DEFAULT" | "1" = string "1"; - "DEFAULT" | "2" = string "2"; - "DEFAULT" | "2EXP" = pattern "2\\^[1-9][0-9]*"; - - // Assignment - "DEFAULT" | ":=" = string ":="; - - // CMR and holes - "DEFAULT" | "CMRLIT" = pattern "#[a-fA-F0-9]{64}"; - "DEFAULT" | "#{" = string "#{"; - "DEFAULT" | "}" = string "}"; - "DEFAULT" | "?" = string "?"; - - // Comments (single-line comments only). - "DEFAULT" | "LINE_COMMENT" = pattern r"--.*\n" => |lexer| lexer.skip(); - - // No whitespace is significant except to separate other tokens. - "DEFAULT" | "WS" = pattern r"\s" => |lexer| lexer.skip(); - ) -} - -fn grammar() -> Grammar> { - santiago::grammar!( - "program" => empty => |_| Ast::Program(vec![]); - "program" => rules "line" "program" => |mut toks| { - if let Some(e) = propagate_errors(&toks) { return e; } - let line = toks[0].expect_line(); - let prog = toks[1].expect_program(); - - let mut new_prog = Vec::with_capacity(1 + prog.len()); - new_prog.push(line); - new_prog.extend(prog); - Ast::Program(new_prog) - }; - - "line" => rules "symbol" ":" "arrow" => |mut toks| Ast::from_2( - &mut toks, - Ast::expect_symbol, - Ast::expect_arrow, - |symb, arrow| Ast::Line(Line { - position: symb.1, - name: symb.0, - expression: None, - arrow, - }) - ); - "line" => rules "symbol" ":=" "expr" => |mut toks| Ast::from_2( - &mut toks, - Ast::expect_symbol, - Ast::expect_expression, - |symb, expr| Ast::Line(Line { - position: symb.1, - name: symb.0, - expression: Some(expr), - arrow: (None, None), - }) - ); - "line" => rules "symbol" ":=" "expr" ":" "arrow" => |mut toks| Ast::from_3( - &mut toks, - Ast::expect_symbol, - Ast::expect_expression, - Ast::expect_arrow, - |symb, expr, arrow| Ast::Line(Line { - position: symb.1, - name: symb.0, - expression: Some(expr), - arrow, - }), - ); - - "arrow" => rules "type" "->" "type" => |mut toks| Ast::from_2( - &mut toks, - Ast::expect_type, - Ast::expect_type, - Ast::Arrow, - ); - - "expr" => rules "symbol" => |mut toks| { - if let Some(e) = propagate_errors(&toks) { return e; } - assert_eq!(toks.len(), 1); - if let Ast::Symbol { value, position } = mem::replace(&mut toks[0], Ast::Replaced) { - Ast::Expression(Expression::reference(value, position)) - } else { - unreachable!("expected string, got something else") - } - }; - "expr" => rules "?" "symbol" => |mut toks| { - if let Some(e) = propagate_errors(&toks) { return e; } - assert_eq!(toks.len(), 2); - if let Ast::Symbol { value, position } = mem::replace(&mut toks[1], Ast::Replaced) { - Ast::Expression(Expression { - inner: ExprInner::Inline(node::Inner::Witness(WitnessOrHole::TypedHole(value))), - position, - }) - } else { - unreachable!("expected string, got something else") - } - }; - "expr" => rules "(" "expr" ")" => |toks| toks[1].clone(); - "expr" => rules "nullary" => Ast::from_combinator; - "expr" => rules "unary" "expr" => Ast::from_combinator; - "expr" => rules "binary" "expr" "expr" => Ast::from_combinator; - - // TODO should we allow CMRs as literals for constant words? - "expr" => rules "const" "literal" => |mut toks| { - if let Some(e) = propagate_errors(&toks) { return e; } - assert_eq!(toks.len(), 2); - let (data, bit_length, position) = toks[1].expect_literal(); - let mut iter = BitIter::from(data); - - if bit_length.count_ones() != 1 || bit_length > 1 << 31 { - return Ast::Error(ErrorSet::single( - position, - Error::BadWordLength { bit_length }, - )); - } - // unwrap ok here since literally every sequence of bits is a valid - // value for the given type - let word = Word::from_bits(&mut iter, bit_length.trailing_zeros()).unwrap(); - Ast::Expression(Expression { - inner: ExprInner::Inline(node::Inner::Word(word)), - position, - }) - }; - - "expr" => rules "assertl" "expr" "cmr" => |mut toks| { - if let Some(e) = propagate_errors(&toks) { return e; } - assert_eq!(toks.len(), 3); - let exp1 = toks[1].expect_expression(); - let cmr2 = toks[2].expect_cmr(); - Ast::Expression(Expression { - inner: ExprInner::AssertL(Arc::new(exp1), cmr2), - position: toks[0].expect_position(), - }) - }; - "expr" => rules "assertr" "cmr" "expr" => |mut toks| { - if let Some(e) = propagate_errors(&toks) { return e; } - assert_eq!(toks.len(), 3); - let cmr1 = toks[1].expect_cmr(); - let exp2 = toks[2].expect_expression(); - Ast::Expression(Expression { - inner: ExprInner::AssertR(cmr1, Arc::new(exp2)), - position: toks[0].expect_position(), - }) - }; - - "expr" => rules "fail" "literal" => |mut toks| { - if let Some(e) = propagate_errors(&toks) { return e; } - assert_eq!(toks.len(), 2); - let (value, bit_length, position) = toks[1].expect_literal(); - if bit_length < 128 { - Ast::Error(ErrorSet::single( - position, - Error::EntropyInsufficient { bit_length }, - )) - } else if bit_length > 512 { - Ast::Error(ErrorSet::single( - position, - Error::EntropyTooMuch { bit_length }, - )) - } else { - let mut entropy = [0; 64]; - entropy[..value.len()].copy_from_slice(&value[..]); - let entropy = FailEntropy::from_byte_array(entropy); - Ast::Expression(Expression { - inner: ExprInner::Inline(node::Inner::Fail(entropy)), - position, - }) - } - }; - - "cmr" => rules "#{" "expr" "}" => |mut toks| { - if let Some(e) = propagate_errors(&toks) { return e; } - assert_eq!(toks.len(), 3); - let exp = toks[1].expect_expression(); - Ast::Cmr(AstCmr::Expr(Arc::new(exp))) - }; - "cmr" => rules "CMRLIT"; - - - "type" => rules "symbol" => |mut toks| Ast::from_1( - &mut toks, - Ast::expect_symbol, - |name| { - if name.0.as_ref() == "_" { - Ast::Type(None) - } else { - // Type names are stored as Strings, but we normally use Arc - // in the parser. So we need to do an extra conversion. - Ast::Type(Some(Type::Name(name.0.as_ref().to_owned()))) - } - }, - ); - "type" => rules "1"; - "type" => rules "2"; - "type" => rules "2EXP"; - "type" => rules "(" "type" ")" => |mut toks| Ast::from_1( - &mut toks[1..2], - Ast::expect_type, - Ast::Type, - ); - "type" => rules "type" "+" "type" => |mut toks| Ast::from_2( - &mut toks, - Ast::expect_type, - Ast::expect_type, - |t1, t2| Ast::Type(t1.zip(t2).map(|(t1, t2)| Type::Sum(Box::new(t1), Box::new(t2)))), - ); - "type" => rules "type" "*" "type" => |mut toks| Ast::from_2( - &mut toks, - Ast::expect_type, - Ast::expect_type, - |t1, t2| Ast::Type(t1.zip(t2).map(|(t1, t2)| Type::Product(Box::new(t1), Box::new(t2)))), - ); - - // Turn lexemes into rules - "nullary" => lexemes "NULLARY" => Ast::from_combinator_lexeme; - "unary" => lexemes "UNARY" => Ast::from_combinator_lexeme; - "binary" => lexemes "BINARY" => Ast::from_combinator_lexeme; - "const" => lexemes "CONST" => Ast::from_dummy_lexeme; - "assertl" => lexemes "ASSERTL" => Ast::from_combinator_lexeme; - "assertr" => lexemes "ASSERTR" => Ast::from_combinator_lexeme; - "fail" => lexemes "FAIL" => Ast::from_combinator_lexeme; - - "literal" => lexemes "LITERAL" => Ast::from_literal_lexeme; - "literal" => lexemes "_" => Ast::from_literal_lexeme; - - "#{" => lexemes "#{" => Ast::from_dummy_lexeme; - "}" => lexemes "}" => Ast::from_dummy_lexeme; - "CMRLIT" => lexemes "CMRLIT" => Ast::from_cmr_literal_lexeme; - - "symbol" => lexemes "_" => |lexemes| { - assert_eq!(lexemes.len(), 1); - Ast::Symbol { - value: Arc::from(lexemes[0].raw.as_str()), - position: (&lexemes[0].position).into(), - } - }; - "symbol" => lexemes "SYMBOL" => |lexemes| { - assert_eq!(lexemes.len(), 1); - Ast::Symbol { - value: Arc::from(lexemes[0].raw.as_str()), - position: (&lexemes[0].position).into(), - } - }; - - "(" => lexemes "(" => Ast::from_dummy_lexeme; - ")" => lexemes ")" => Ast::from_dummy_lexeme; - "+" => lexemes "+" => Ast::from_dummy_lexeme; - "*" => lexemes "*" => Ast::from_dummy_lexeme; - ":" => lexemes ":" => Ast::from_dummy_lexeme; - "->" => lexemes "->" => Ast::from_dummy_lexeme; - - "1" => lexemes "1" => Ast::from_type_lexeme; - "2" => lexemes "2" => Ast::from_type_lexeme; - "2EXP" => lexemes "2EXP" => Ast::from_type_lexeme; - - ":=" => lexemes ":=" => Ast::from_dummy_lexeme; - - "#" => lexemes "#" => Ast::from_dummy_lexeme; - "?" => lexemes "?" => Ast::from_dummy_lexeme; - - // Define associativity rules for type constructors - Associativity::Left => rules "+"; - Associativity::Left => rules "*"; - ) -} - #[cfg(test)] mod tests { use super::*;