simple spans work

1358f62f · Per Lindgren · cd6055e1 · 1358f62f · 1358f62f · 1358f62f
Commit 1358f62f authored 4 years ago by Per Lindgren
--- a/src/ast/README.md
+++ b/src/ast/README.md
-# Rust Syntax
+# Spans

-As it turns out Rust syntax is a bit tricky to parse.
+We may use spans to give location information for each token/matched item.

-Consider the following program (`examples/syntax.rs`):

-``` Rust
-fn main() {
-    let a = false; // trailing `;`
-
-    // extra `;` are allowed in between statements
-
-    // (1)
-    let a = {
-        let b = 1 + 1;
-
-        // (2)
-        if a {} // no trialing `;`
-        b // no trialing `;`, return value
-    };
-
-    // (3)
-    while false {
-        // do something here
-    } // no trailing `;`
-
-    // (4)
-    {
-        // local block/scope
-    }
-
-    // (5)
-    let _b = if a < 5 { 1 } else { 2 };
-}
-```
-
-The *body* of a function is a *block* (sequence of statements). The `let` statement accepts a *block* of statements as part of an assignment (1). Rust allows *blocks* to introduce local scopes (4). Rust allows allow assignment to refer to the result of an `if then else` construct (5).
-
-Inside a *block*, statements are typically separated by `;`, with the following exceptions. 
-
- `if` statements (2),
- `while` statements (3), and
- `{ ... }` inner blocks (4).
-
-Additionally Rust allows for additional `;` in between statements (but extra `;` are considered non-idiomatic and thus removed by `rustfmt`).
-
-Omitting trailing `;` for the last statement in a *block* renders an implicit return. This is allowed by the Rust compiler in case the statement can be interpreted as an expression.
-
-## An example grammar
-
-The example grammar in `ast/parser.lalrpop` covers a minimal subset of Rust, sufficient to parse the given `syntax.rs` example. Each action merely produces a unit result (no AST is built).
-
-Some interesting design decisions:
-
- A *block* of statements is sequence of `;` separated statements followed by an optional trailing statement. `Block` accepts a sequence of statements `StmtSeq*` followed by an optional trailing `Stmt`. `StmtSeq` is either a `Stmt` `;`, or a `StmtBlock`, where the latter cover the case of `while`, `if` and `Block` (nesting/scopes) (without requiring `;` delimiting). We also see that `Stmt` accepts additional `;`. `Stmt` also accepts `ExprNoBlock` (which is essentially plain expressions, free of block constructs as further discussed below.)
-
-``` Rust
-Block: () = {
-    "{" StmtSeq* Stmt? "}",
-}
-
-StmtSeq: () = {
-    Stmt ";",
-    StmtBlock,
-}
-
-StmtBlock: () = {
-    "while" Expr Block,
-    "if" Expr Block ("else" Block)?,
-    Block,
-}
-
-Stmt: () = {
-    ";",
-    "let" "mut"? Id (":" Type)? "=" Expr,
-    ExprNoBlock "=" Expr,
-    ExprNoBlock,
-}
-```
-
- We treat statements that may be considered as expressions by a special rule `ExprBlock`, where we accept either `if then else` or a `block` (statments). (This is where we likely add `match` and similar statements later.)
-
-  Recall that a statement can be a return value, thus must somehow accept an expression. Now, here is the crux, since we want `if then else` and `block` (statements) to be treated as expression for assignments, it would cause ambiguities between statements as part of an expression or inside a *block*. We can resolve this by the adopting `ExprNoBlock`, inside of `stmt`. `ExprNoBlock` accepts expression besides those that are matched by `ExprBlock` (`if then else` and `block`).
-
- `;` is treated as a `stmt`, hence we accept *blocks* like `{; let a = 5;;;;; let b = 6;;; c}`. Notice, `;` carries no meaning besides for the optional trailing `;` of a *block* (determining the return type).
-
-``` Rust
-Expr: () = {
-    ExprBlock,
-    ExprNoBlock,    
-}
-
-ExprBlock: () = {
-    "if" ExprNoBlock Block "else" Block,
-    Block,
-}
-
-// Lowest Precedence
-ExprNoBlock = Tier2<AndOrOp, AndOr>; 
-AndOr = Tier2<ComparisonOp, AddSub>;
-AddSub = Tier2<AddSubOp, MulDiv>;
-MulDiv = Tier2<MulDivOp, Unary>;
-Unary = Tier1<UnaryOp, Term>;
-
-// Highest Precedence
-Term: () = {
-    Id,
-    Num,
-    Id "(" CommaNoTrail<Expr> ")",    
-    "(" Expr ")",
-}
-...
-```
-
- `Expr` accepts both `ExprBlock` (statements with return value), and plain plain expressions (`ExprNoBlock`).
-
- Precedences go from low to high, with `Term` having the highest precedence (matched first in a bottom up (LR) parser).
-
-## Reflection on the Rust syntax
-
-The Rust syntax seems somewhat arbitrarily chosen. The requirement that `let` statements must be trailed by `;`, is to my best understanding not required for soundness (the `let` could have been given a `Unit` type, similar to an assignment). This leads me to believe that the trailing `;` is rather an enforcement of style.

--- a/src/ast/main.rs
+++ b/src/ast/main.rs
-use std::fs::File;
-use std::io::prelude::*;
-
 use lalrpop_util::lalrpop_mod;

 lalrpop_mod!(pub parser, "/ast/parser.rs");
@@ -11,34 +8,7 @@ pub mod ast;

 fn main() {}

-pub fn read(file_name: &str) -> std::io::Result<String> {
-    let mut file = File::open(file_name)?;
-    let mut contents = String::new();
-    file.read_to_string(&mut contents)?;
-    Ok(contents)
-}
-
-pub fn parse(file_name: &str) {
-    let p = read(file_name).expect("File not found");
-    ProgramParser::new().parse(&p).unwrap()
-}
-
-#[test]
-fn syntax() {
-    parse("examples/syntax.rs");
-}
-
-#[test]
-fn syntax2() {
-    parse("examples/syntax2.rs");
-}
-
-#[test]
-fn syntax3() {
-    parse("examples/syntax3.rs");
-}
-
 #[test]
-fn syntax4() {
-    parse("examples/syntax4.rs");
+fn loc() {
+    println!("{:?}", NumSeqParser::new().parse("1, 2").unwrap());
 }
--- a/src/ast/parser.lalrpop
+++ b/src/ast/parser.lalrpop
@@ -4,6 +4,18 @@ use crate::ast::*;

 grammar;

+// pub Items: Vec<(usize, usize)> = {
+//     <@L> <@R> => vec![(<>)],
+
+//     <mut v:Items> <e:Spanned<"+">> => { v.push(e); v },
+
+//     <v:Items> "-" => v
+// };
+
+// Spanned<T>: (usize, usize) = {
+//     <@L> T <@R> => (<>)
+// };
+
 match {
    // The default whitespace skipping is disabled an `ignore pattern` is specified
    r"\s*" => { }, 
@@ -14,124 +26,21 @@ match {
    _
 }

+Spanned<T>: (usize, usize, T) = {
+    <l:@L> <t:T> <r:@R>  => (l, r, t)
+};
+
 // A comma separated sequence without trailing comma
 CommaNoTrail<T>: Vec<T> = { 
    <mut v:(<T> ",")*> <e:T> => { v.push(e); v }
 }

-Tier2<Op, NextTier>: () = {
-    Tier2<Op, NextTier> Op NextTier,
-    NextTier
-};
-
-Tier1<Op, NextTier>: () = {
-    Op NextTier,
-    NextTier
-};
-
-pub Program: () = {
-    Function*
-}
-
-Function: () = {
-    "fn" Id Params ("->" Type)? Block,
-}
-
-Params: () = {
-    "()", // seems like a haxx
-    "(" (Param ",")* Param? ")",
-}
-
-Param:() = {
-    "mut"? Id ":" Type,
-}
-
-Type:() = {
-    "i32",
-    "bool",
-    "()",
-    "!",
-}
-
-Block: () = {
-    "{" StmtSeq* Stmt? "}",
-}
-
-StmtSeq: () = {
-    Stmt ";",
-    StmtBlock,
-}
-
-StmtBlock: () = {
-    "while" Expr Block,
-    "if" Expr Block ("else" Block)?,
-    Block,
-}
-
-Stmt: () = {
-    ";",
-    "let" "mut"? Id (":" Type)? "=" Expr,
-    ExprNoBlock "=" Expr,
-    ExprNoBlock,
-}
-
-
-Expr: () = {
-    ExprBlock,
-    ExprNoBlock,    
-}
-
-ExprBlock: () = {
-    "if" ExprNoBlock Block "else" Block,
-    Block,
-}
-
-// Lowest Precedence
-ExprNoBlock = Tier2<AndOrOp, AndOr>; 
-AndOr = Tier2<ComparisonOp, AddSub>;
-AddSub = Tier2<AddSubOp, MulDiv>;
-MulDiv = Tier2<MulDivOp, Unary>;
-Unary = Tier1<UnaryOp, Term>;
-
-// Highest Precedence
-Term: () = {
-    Id,
-    Num,
-    Id "(" CommaNoTrail<Expr> ")",    
-    "(" Expr ")",
-}
-
-AndOrOp: () = {
-    "||",
-    "&&",
-}
-
-ComparisonOp: () = {
-    "==",
-    "!=", 
-    ">", 
-    "<",
-}
-
-AddSubOp: () = {
-    "+",
-    "-", 
-}
-
-MulDivOp: () = {
-    "/",
-    "*", 
-}
-
-UnaryOp: () = {
-    "!",
-    "*",
-    "&",
-    "&" "mut", 
+pub NumSeq: Vec<(usize, usize, i32)> = {
+    CommaNoTrail<Num> 
 } 

-Num: i32 = {
-    r"[0-9]+" => i32::from_str(<>).unwrap(),
+pub Num: (usize, usize, i32) = {
+    <l: @L> <n: r"[0-9]+"> <r: @R> => (l, r, i32::from_str(n).unwrap()),
 };

 Id: String = {