diff --git a/src/ast/README.md b/src/ast/README.md index b0cbc618de117fc23fc952893b9bd76224145bbd..0bfe52cc163e2119bf218dd6d6249e2100c8a3b5 100644 --- a/src/ast/README.md +++ b/src/ast/README.md @@ -1,122 +1,6 @@ -# Rust Syntax +# Spans -As it turns out Rust syntax is a bit tricky to parse. +We may use spans to give location information for each token/matched item. -Consider the following program (`examples/syntax.rs`): -``` Rust -fn main() { - let a = false; // trailing `;` - - // extra `;` are allowed in between statements - - // (1) - let a = { - let b = 1 + 1; - - // (2) - if a {} // no trialing `;` - b // no trialing `;`, return value - }; - - // (3) - while false { - // do something here - } // no trailing `;` - - // (4) - { - // local block/scope - } - - // (5) - let _b = if a < 5 { 1 } else { 2 }; -} -``` - -The *body* of a function is a *block* (sequence of statements). The `let` statement accepts a *block* of statements as part of an assignment (1). Rust allows *blocks* to introduce local scopes (4). Rust allows allow assignment to refer to the result of an `if then else` construct (5). - -Inside a *block*, statements are typically separated by `;`, with the following exceptions. - -- `if` statements (2), -- `while` statements (3), and -- `{ ... }` inner blocks (4). - -Additionally Rust allows for additional `;` in between statements (but extra `;` are considered non-idiomatic and thus removed by `rustfmt`). - -Omitting trailing `;` for the last statement in a *block* renders an implicit return. This is allowed by the Rust compiler in case the statement can be interpreted as an expression. - -## An example grammar - -The example grammar in `ast/parser.lalrpop` covers a minimal subset of Rust, sufficient to parse the given `syntax.rs` example. Each action merely produces a unit result (no AST is built). - -Some interesting design decisions: - -- A *block* of statements is sequence of `;` separated statements followed by an optional trailing statement. `Block` accepts a sequence of statements `StmtSeq*` followed by an optional trailing `Stmt`. `StmtSeq` is either a `Stmt` `;`, or a `StmtBlock`, where the latter cover the case of `while`, `if` and `Block` (nesting/scopes) (without requiring `;` delimiting). We also see that `Stmt` accepts additional `;`. `Stmt` also accepts `ExprNoBlock` (which is essentially plain expressions, free of block constructs as further discussed below.) - -``` Rust -Block: () = { - "{" StmtSeq* Stmt? "}", -} - -StmtSeq: () = { - Stmt ";", - StmtBlock, -} - -StmtBlock: () = { - "while" Expr Block, - "if" Expr Block ("else" Block)?, - Block, -} - -Stmt: () = { - ";", - "let" "mut"? Id (":" Type)? "=" Expr, - ExprNoBlock "=" Expr, - ExprNoBlock, -} -``` - -- We treat statements that may be considered as expressions by a special rule `ExprBlock`, where we accept either `if then else` or a `block` (statments). (This is where we likely add `match` and similar statements later.) - - Recall that a statement can be a return value, thus must somehow accept an expression. Now, here is the crux, since we want `if then else` and `block` (statements) to be treated as expression for assignments, it would cause ambiguities between statements as part of an expression or inside a *block*. We can resolve this by the adopting `ExprNoBlock`, inside of `stmt`. `ExprNoBlock` accepts expression besides those that are matched by `ExprBlock` (`if then else` and `block`). - -- `;` is treated as a `stmt`, hence we accept *blocks* like `{; let a = 5;;;;; let b = 6;;; c}`. Notice, `;` carries no meaning besides for the optional trailing `;` of a *block* (determining the return type). - -``` Rust -Expr: () = { - ExprBlock, - ExprNoBlock, -} - -ExprBlock: () = { - "if" ExprNoBlock Block "else" Block, - Block, -} - -// Lowest Precedence -ExprNoBlock = Tier2<AndOrOp, AndOr>; -AndOr = Tier2<ComparisonOp, AddSub>; -AddSub = Tier2<AddSubOp, MulDiv>; -MulDiv = Tier2<MulDivOp, Unary>; -Unary = Tier1<UnaryOp, Term>; - -// Highest Precedence -Term: () = { - Id, - Num, - Id "(" CommaNoTrail<Expr> ")", - "(" Expr ")", -} -... -``` - -- `Expr` accepts both `ExprBlock` (statements with return value), and plain plain expressions (`ExprNoBlock`). - -- Precedences go from low to high, with `Term` having the highest precedence (matched first in a bottom up (LR) parser). - -## Reflection on the Rust syntax - -The Rust syntax seems somewhat arbitrarily chosen. The requirement that `let` statements must be trailed by `;`, is to my best understanding not required for soundness (the `let` could have been given a `Unit` type, similar to an assignment). This leads me to believe that the trailing `;` is rather an enforcement of style. diff --git a/src/ast/main.rs b/src/ast/main.rs index cd28e1165e3ed5ac7af077be6798307777e33664..2382591ad3292d33cdfc587ab11f51d04b6f5f78 100644 --- a/src/ast/main.rs +++ b/src/ast/main.rs @@ -1,6 +1,3 @@ -use std::fs::File; -use std::io::prelude::*; - use lalrpop_util::lalrpop_mod; lalrpop_mod!(pub parser, "/ast/parser.rs"); @@ -11,34 +8,7 @@ pub mod ast; fn main() {} -pub fn read(file_name: &str) -> std::io::Result<String> { - let mut file = File::open(file_name)?; - let mut contents = String::new(); - file.read_to_string(&mut contents)?; - Ok(contents) -} - -pub fn parse(file_name: &str) { - let p = read(file_name).expect("File not found"); - ProgramParser::new().parse(&p).unwrap() -} - -#[test] -fn syntax() { - parse("examples/syntax.rs"); -} - -#[test] -fn syntax2() { - parse("examples/syntax2.rs"); -} - -#[test] -fn syntax3() { - parse("examples/syntax3.rs"); -} - #[test] -fn syntax4() { - parse("examples/syntax4.rs"); +fn loc() { + println!("{:?}", NumSeqParser::new().parse("1, 2").unwrap()); } diff --git a/src/ast/parser.lalrpop b/src/ast/parser.lalrpop index cdbf819db1dedbe03093f203b5c485ce214699e0..539b88bb96a72b8af6473b564d0c0bcdde311266 100644 --- a/src/ast/parser.lalrpop +++ b/src/ast/parser.lalrpop @@ -4,6 +4,18 @@ use crate::ast::*; grammar; +// pub Items: Vec<(usize, usize)> = { +// <@L> <@R> => vec![(<>)], + +// <mut v:Items> <e:Spanned<"+">> => { v.push(e); v }, + +// <v:Items> "-" => v +// }; + +// Spanned<T>: (usize, usize) = { +// <@L> T <@R> => (<>) +// }; + match { // The default whitespace skipping is disabled an `ignore pattern` is specified r"\s*" => { }, @@ -14,124 +26,21 @@ match { _ } +Spanned<T>: (usize, usize, T) = { + <l:@L> <t:T> <r:@R> => (l, r, t) +}; + // A comma separated sequence without trailing comma CommaNoTrail<T>: Vec<T> = { <mut v:(<T> ",")*> <e:T> => { v.push(e); v } } -Tier2<Op, NextTier>: () = { - Tier2<Op, NextTier> Op NextTier, - NextTier -}; - -Tier1<Op, NextTier>: () = { - Op NextTier, - NextTier -}; - -pub Program: () = { - Function* -} - -Function: () = { - "fn" Id Params ("->" Type)? Block, -} - -Params: () = { - "()", // seems like a haxx - "(" (Param ",")* Param? ")", -} - -Param:() = { - "mut"? Id ":" Type, -} - -Type:() = { - "i32", - "bool", - "()", - "!", -} - -Block: () = { - "{" StmtSeq* Stmt? "}", -} - -StmtSeq: () = { - Stmt ";", - StmtBlock, -} - -StmtBlock: () = { - "while" Expr Block, - "if" Expr Block ("else" Block)?, - Block, -} - -Stmt: () = { - ";", - "let" "mut"? Id (":" Type)? "=" Expr, - ExprNoBlock "=" Expr, - ExprNoBlock, -} - - -Expr: () = { - ExprBlock, - ExprNoBlock, -} - -ExprBlock: () = { - "if" ExprNoBlock Block "else" Block, - Block, -} - -// Lowest Precedence -ExprNoBlock = Tier2<AndOrOp, AndOr>; -AndOr = Tier2<ComparisonOp, AddSub>; -AddSub = Tier2<AddSubOp, MulDiv>; -MulDiv = Tier2<MulDivOp, Unary>; -Unary = Tier1<UnaryOp, Term>; - -// Highest Precedence -Term: () = { - Id, - Num, - Id "(" CommaNoTrail<Expr> ")", - "(" Expr ")", -} - -AndOrOp: () = { - "||", - "&&", -} - -ComparisonOp: () = { - "==", - "!=", - ">", - "<", -} - -AddSubOp: () = { - "+", - "-", -} - -MulDivOp: () = { - "/", - "*", -} - -UnaryOp: () = { - "!", - "*", - "&", - "&" "mut", -} +pub NumSeq: Vec<(usize, usize, i32)> = { + CommaNoTrail<Num> +} -Num: i32 = { - r"[0-9]+" => i32::from_str(<>).unwrap(), +pub Num: (usize, usize, i32) = { + <l: @L> <n: r"[0-9]+"> <r: @R> => (l, r, i32::from_str(n).unwrap()), }; Id: String = {