Compare revisions

Per · Per Lindgren · Per Lindgren · Per · Per · Per
--- a/.vscode/tasks.json
+++ b/.vscode/tasks.json
@@ -62,6 +62,18 @@
                "kind": "build",
                "isDefault": true
            }
+        },
+        {
+            "type": "shell",
+            "label": "cargo run --example crust",
+            "command": "cargo run --example crust",
+            "problemMatcher": [
+                "$rustc"
+            ],
+            "group": {
+                "kind": "build",
+                "isDefault": true
+            }
        }
    ]
 }
\ No newline at end of file
--- a/Cargo.toml
+++ b/Cargo.toml
@@ -9,3 +9,4 @@ edition = "2018"
 [dependencies]
 nom = "5.0.1"
 nom_locate = "1.0.0"
+inkwell = { git = "https://github.com/TheDan64/inkwell", branch = "llvm8-0" }
--- a/HOME_EXAM.md
+++ b/HOME_EXAM.md
+- [Your repo](#your-repo)
+- [Your syntax](#your-syntax)
+- [Your semantics](#your-semantics)
+- [Your type checker](#your-type-checker)
+- [Your borrrow checker](#your-borrrow-checker)
+- [Your LLVM backend](#your-llvm-backend)
+- [Overal course goals and learning outcomes.](#overal-course-goals-and-learning-outcomes)
+
+## Your repo
+
+- Link to your repo here: https://github.com/dynematic/d7050e
+
+## Your syntax
+
+- Give an as complete as possible EBNF grammar for your language
+
+- Give an example that showcases all rules of your EBNF. The program should "do" something as used in the next excercise.
+
+- If you support pointers, make sure your example covers pointers as well.
+
+- Compare your solution to the requirements (as stated in the README.md). What are your contributions to the implementation.
+
+Program
+```ebnf
+    : FunctionDec+
+    ;
+```
+FunctionDec
+```ebnf
+    : "fn" Id "(" [ "," ], Params ")" "->" Type "{" Statement "}"
+    | "fn" Id "(" [ "," ], Params ")" "{" Statement "}"
+    ;
+```
+Params
+```ebnf
+    : "Id" ":" Type
+    ;
+```
+Statement
+```ebnf
+    : "let" Id ":" Type AssignOp Expr ";"
+    | "if" Log "{" Statement "}"
+    | "while" Log "{" Statement "}"
+    | "return" Expr ";"
+    | Expr ";"
+    ;
+```
+LogOp
+```ebnf
+    : "&&"
+    | "||"
+    | "!"
+    ;
+```
+CondOp
+```ebnf
+    : ">"
+    | "<"
+    | "=="
+    | "!="
+    ;
+```
+AssignOp
+```ebnf
+    : "="
+    | "+="
+    | "-="
+    | "/="
+    | "*="
+    ;
+```
+SumOp
+```ebnf
+    : "+"
+    | "-"
+    ;
+```
+FactorOp
+```ebnf
+    : "*"
+    | "/"
+    ;
+```
+Type
+```ebnf
+    : "i32"
+    | "bool"
+    ;
+```
+Term
+```ebnf
+    : Num
+    | Id
+    | Bool
+    | Function
+    | "(" Expr ")"
+    ;
+```
+Num
+```ebnf
+    : [ "-" ], Digit
+    ;
+```
+
+Id
+```ebnf
+    : Letter , { Letter | Digit }, - White_space
+```
+Bool
+```ebnf
+    : "true"
+    | "false"
+    ;
+```
+Function
+```ebnf
+    : Id "(" [ "," ], Expr ")"
+    ;
+```
+Letter
+```ebnf
+    : "A" | "B" | "C" | "D" | "E" | "F" | "G"
+    | "H" | "I" | "J" | "K" | "L" | "M" | "N"
+    | "O" | "P" | "Q" | "R" | "S" | "T" | "U"
+    | "V" | "W" | "X" | "Y" | "Z" | "a" | "b"
+    | "c" | "d" | "e" | "f" | "g" | "h" | "i"
+    | "j" | "k" | "l" | "m" | "n" | "o" | "p"
+    | "q" | "r" | "s" | "t" | "u" | "v" | "w"
+    | "x" | "y" | "z" 
+    ;
+```
+Digit
+```ebnf
+    : "0" | "1" | "2" | "3" | "4" | "5" | "6" | "7" | "8" | "9" 
+    ;
+```
+White_space
+```ebnf
+    : ? White_space characters ? 
+    ;
+```
+
+Showcase
+```rust
+fn test_i32(b: i32, c: i32) -> i32 {
+    let a : i32 = 10 + 2 * 3;
+    a = -1 - (1 - 1);
+    return a + b;
+}
+fn test_bool(b: bool) -> bool {
+    if b && true || false {
+        let a : i32 = test_i32(1, 999);
+        while (a < 5) {
+            a = a + 1;
+        }
+        return a;
+    }
+}
+
+fn main() {
+    test_bool(true);
+}
+```
+Above descirbed EBNF and showcase defines a minimal subset of Rust, including
+- Function declaration, with excplicit return type either void (NONE) or primitive type
+- Primitive types are i32 and boolean
+- let, assignment, if, while and return statements with explicit types
+- Operands are divided into logical, conditionl and arithmetic operands with precedence
+- Parantesized precedence and FactorOp has precedence over SumOp
+- Assignment operands are seperated from above mentioned operands
+- Location information for Error is not implemented
+- Error recovery is not implemented
+
+For furture development
+- Rethink how assignment operands are handled in grammar
+- Add if else statement
+- Add support for String in Term
+- Error handling, location information and recovery
+
+## Your semantics
+
+- Give an as complete as possible Structural Operetional Semantics (SOS) for your language
+
+- Explain (in text) what an interpretation of your example should produce, do that by dry running your given example step by step. Relate back to the SOS rules. You may skip repetions to avoid cluttering.
+
+- Compare your solution to the requirements (as stated in the README.md). What are your contributions to the implementation.
+
+Structural Operational Semantics (SOS)
+Symbolx: 
+- σ, state
+- σ', derived state
+- ⇓, evaluates
+- c, command
+- x, variable
+- e, expression
+- b, boolean expression
+
+Command sequence
+
+$\frac{<c1,σ> ⇓ σ' <c2,σ'> ⇓ σ''}{<c1;c2,σ> ⇓ σ''}$
+
+Arithmetic
+
+$\frac{<e1,σ> ⇓ n1 <e2, σ> ⇓ n2}{<e1 + e2, σ> ⇓ n1 plus n2}$
+
+If true
+
+$\frac{<b, σ> ⇓ true <c1, σ> ⇓ σ'}{<if b then c1> ⇓ σ}$
+
+If false
+
+$\frac{<b, σ> ⇓ false <c1, σ> ⇓ σ'}{<if b then c1> ⇓ σ}$
+
+
+
+
+## Your type checker
+
+- Give an as complete as possible set of Type Checking Rules for your language (those rules look very much like the SOS rules, but over types not values).
+
+- Demonstrate each "type rule" by an example.
+
+- Compare your solution to the requirements (as stated in the README.md). What are your contributions to the implementation.
+
+## Your borrrow checker
+
+- Give a specification for well versus ill formed borrows. (What are the rules the borrow checker should check).
+
+- Demonstrate the cases of ill formed borrows that your borrow checker is able to detect and reject.
+
+- Compare your solution to the requirements (as stated in the README.md). What are your contributions to the implementation.
+
+## Your LLVM backend
+
+- Let your backend produces LLVM-IR for your example program.
+
+- Describe where and why you introduced allocations and phi nodes.
+
+- If you have added optimization passes and/or attributed your code for better optimization (using e.g., `noalias`).
+
+- Compare your solution to the requirements (as stated in the README.md). What are your contributions to the implementation.
+
+## Overal course goals and learning outcomes.
+
+Comment on the alignment of the concrete course goals (taken from the course description) to the theory presented, work You have done and knowledge You have gained. (I have put some comments in [...]).
+
+- Lexical analysis, syntax analysis, and translation into abstract syntax.
+
+- Regular expressions and grammars, context-free languages and grammars, lexer and parser generators. [Nom is lexer/parser library (and replaces the need for a generator, while lalr-pop is a classical parser generator)]
+
+- Identifier handling and symbol table organization. Type-checking, logical inference systems. [SOS is a logical inference system]
+
+- Intermediate representations and transformations for different languages. [LLVM is a cross language compiler infrastructure]
+
+- Code optimization and register allocation. Machine code generation for common architectures. [LLVM is a cross target compiler infrastructure, doing the "dirty work" of optimazation/register allocation leveraging the SSA form of the LLVM-IR]
+
+Comment on additional things that you have experienced and learned throughout the course.
--- a/README.md
+++ b/README.md
@@ -6,7 +6,7 @@ The repo will be updated througout the course and includes a draft outline of th

 Fundamental theories about computation and different models of computation. Construction of compilers. Lexical analysis, syntax analysis, and translation into abstract syntax. Regular expressions and grammars, context-free languages and grammars, lexer and parser generators. Identifier handling and symbol table organization. Type-checking, logical inference systems. Intermediate representations and transformations for different languages. Code optimization and register allocation. Machine code generation for common architectures.

-In the course you will learn and develop your skills through hands on implementation work building your own complier from scratch. In this way theoretical aspects such as formal grammars, Structural Operational Semantics (SOS), and type rule formalisations becomes tangible. We will even touch upon memory safety and how guarantees can be achieved through static (compilet time) borrow checking. Compiler backend (code optimization etc.) will be discussed in context of LLVM, which you will optionally interface as a library for code generation.
+In the course you will learn and develop your skills through hands on implementation work building your own complier from scratch. In this way theoretical aspects such as formal grammars, Structural Operational Semantics (SOS), and type rule formalisations becomes tangible. We will even touch upon memory safety and how guarantees can be achieved through static (compile time) borrow checking. Compiler backend (code optimization etc.) will be discussed in context of LLVM, which you will optionally interface as a library for code generation.

 ## Draft outline


--- a/examples/climb.rs
+++ b/examples/climb.rs
@@ -149,9 +149,7 @@ fn main() {
 }

 // helpers
-fn parse_par<'a, O, F, E>(
-    inner: F,
-) -> impl Fn(&'a str) -> IResult<&'a str, O, E>
+fn parse_par<'a, O, F, E>(inner: F) -> impl Fn(&'a str) -> IResult<&'a str, O, E>
 where
    F: Fn(&'a str) -> IResult<&'a str, O, E>,
    E: ParseError<&'a str>,

--- a/examples/crust.rs
+++ b/examples/crust.rs
--- a/examples/llvm-jit.rs
+++ b/examples/llvm-jit.rs
+extern crate inkwell;
+
+use inkwell::builder::Builder;
+use inkwell::context::Context;
+use inkwell::execution_engine::{ExecutionEngine, JitFunction};
+use inkwell::module::Module;
+use inkwell::OptimizationLevel;
+
+use std::error::Error;
+
+/// Convenience type alias for the `sum` function.
+///
+/// Calling this is innately `unsafe` because there's no guarantee it doesn't
+/// do `unsafe` operations internally.
+type SumFunc = unsafe extern "C" fn(u64, u64, u64) -> u64;
+
+fn jit_compile_sum(
+    context: &Context,
+    module: &Module,
+    builder: &Builder,
+    execution_engine: &ExecutionEngine,
+) -> Option<JitFunction<SumFunc>> {
+    let i64_type = context.i64_type();
+    let fn_type = i64_type
+        .fn_type(&[i64_type.into(), i64_type.into(), i64_type.into()], false);
+
+    let function = module.add_function("sum", fn_type, None);
+    let basic_block = context.append_basic_block(&function, "entry");
+
+    builder.position_at_end(&basic_block);
+
+    let x = function.get_nth_param(0)?.into_int_value();
+    let y = function.get_nth_param(1)?.into_int_value();
+    let z = function.get_nth_param(2)?.into_int_value();
+
+    let sum = builder.build_int_add(x, y, "sum");
+    let sum = builder.build_int_add(sum, z, "sum");
+
+    builder.build_return(Some(&sum));
+
+    unsafe { execution_engine.get_function("sum").ok() }
+}
+
+fn main() -> Result<(), Box<dyn Error>> {
+    let context = Context::create();
+    let module = context.create_module("sum");
+    let builder = context.create_builder();
+    let execution_engine =
+        module.create_jit_execution_engine(OptimizationLevel::None)?;
+
+    let sum = jit_compile_sum(&context, &module, &builder, &execution_engine)
+        .ok_or("Unable to JIT compile `sum`")?;
+
+    let x = 1u64;
+    let y = 2u64;
+    let z = 3u64;
+
+    unsafe {
+        println!("{} + {} + {} = {}", x, y, z, sum.call(x, y, z));
+        assert_eq!(sum.call(x, y, z), x + y + z);
+    }
+
+    Ok(())
+}
--- a/examples/main_locate.rs
+++ b/examples/main_locate.rs
@@ -81,7 +81,6 @@ fn test_parse_i32_1() {
    );
 }

-
 fn main() {
    let (a, b) = parse_expr(Span::new("1")).unwrap();
    println!("{:?}", parse_expr(Span::new("1")));

--- a/examples/main_span_expr_custom_err.rs
+++ b/examples/main_span_expr_custom_err.rs
@@ -61,7 +61,11 @@ pub fn parse_i32<'a>(i: Span<'a>) -> IResult<Span<'a>, SpanExpr> {
    let (i, digits) = digit1(i)?;
    match digits.fragment.parse() {
        Ok(int) => Ok((i, (digits, Expr::Num(int)))),
-        Err(e) => Err(Err::Failure(Error(i, Some(digits), ErrorKind::ParseIntError(e)))),
+        Err(e) => Err(Err::Failure(Error(
+            i,
+            Some(digits),
+            ErrorKind::ParseIntError(e),
+        ))),
    }
 }


--- a/examples/tmp.rs
+++ b/examples/tmp.rs
-extern crate nom;
+use crust::{
+    ast::Span,
+    interpreter::eval_expr,
+    parse::{parse_assign, parse_expr, parse_prog},
+};

+fn test(s: &str, v: i32) {
+    match parse_expr(Span::new(s)) {
+        Ok((Span { fragment: "", .. }, e)) => {
+            println!("{:?}", &e);
+            println!("eval {} {}", eval_expr(&e), v);
+            assert_eq!(eval_expr(&e), v);
+        }
+        Ok((s, t)) => println!(
+            "parse incomplete, \n parsed tokens \t{:?}, \n remaining \t{:?}",
+            t, s
+        ),
+        Err(err) => println!("{:?}", err),
+    }
+}

-use crust::parse::test;
+fn test_expr() {
+    // test("- -1 + + 1", - -1 + 1);  // rust does not allow + as a unary op (I do ;)
+    // test("(-1-1)+(-1+3)", (-1 - 1) + (-1) + 3);
+    // // just to check that right associative works (you don't need to implement pow)
+    // test("2+3**2**3*5+1", 2 + 3i32.pow(2u32.pow(3)) * 5 + 1);
+    // test("(12*2)/3-4", (12 * 2) / 3 - 4);
+    // test("1*2+3", 1 * 2 + 3);
+    // // just to check that we get a parse error
+    // test("1*2+3+3*21-a12+2", 1 * 2 + 3 + 3 * 21 - 12 + 2);
+    test("1 + (1 - 2)", 1 + (1 - 2));
+}

 fn main() {
-    test("- -1 + + 1", - -1 + 1);  // rust does not allow + as a unary op (I do ;)
-    test("(-1-1)+(-1+3)", (-1 - 1) + (-1) + 3);
-    // just to check that right associative works (you don't need to implement pow)
-    test("2+3**2**3*5+1", 2 + 3i32.pow(2u32.pow(3)) * 5 + 1);
-    test("(12*2)/3-4", (12 * 2) / 3 - 4);
-    test("1*2+3", 1 * 2 + 3);
-    // just to check that we get a parse error
-    test("1*2+3+3*21-a12+2", 1 * 2 + 3 + 3 * 21 - 12 + 2);
+    // println!("{:?}", parse_assign(Span::new("3 = a(1, 2+3)")));
+    println!(
+        "{:?}",
+        parse_prog(Span::new(
+            "
+      fn main() { let a:i32 = 1} "
+        ))
+    );
 }
-
-
--- a/src/ast.rs
+++ b/src/ast.rs
@@ -20,14 +20,67 @@ pub enum Op {

 type SpanOp<'a> = (Span<'a>, Op);

+pub type SpanId<'a> = Span<'a>;
+
 #[derive(Debug, Clone, PartialEq)]
 pub enum Expr<'a> {
    Num(i32),
+    Bool(bool),
    Par(Box<SpanExpr<'a>>),
-    // Identifier
-    // Function application
+    Id(String),
+    Call(String, Vec<SpanExpr<'a>>),
    BinOp(Op, Box<SpanExpr<'a>>, Box<SpanExpr<'a>>),
    UnaryOp(Op, Box<SpanExpr<'a>>),
 }

 pub type SpanExpr<'a> = (Span<'a>, Expr<'a>);
+
+#[derive(Debug, PartialEq, Clone)]
+pub enum Cmd<'a> {
+    // let <mut> id : <& <mut>>Type = expr
+    Let(Mutability, SpanId<'a>, Type, SpanExpr<'a>),
+    // id = expr
+    Assign(SpanExpr<'a>, SpanExpr<'a>),
+    // if predicate do-this, and optionally do-that)
+    If(SpanExpr<'a>, Block<'a>, Option<Block<'a>>),
+    // while predicate do-this
+    While(SpanExpr<'a>, Block<'a>),
+    Return(SpanExpr<'a>),
+}
+
+pub type SpanCmd<'a> = (Span<'a>, Cmd<'a>);
+
+#[derive(Debug, PartialEq, Clone)]
+pub enum Mutability {
+    Imm,
+    Mut,
+}
+pub type SpanMut<'a> = (Span<'a>, Mutability);
+
+pub type SpanBlock<'a> = (Span<'a>, Vec<SpanCmd<'a>>);
+pub type Block<'a> = Vec<Cmd<'a>>;
+
+#[derive(Debug, PartialEq, Clone)]
+pub enum Type {
+    I32,
+    Bool,
+    Unit,
+    Mut(Box<Type>),
+    Ref(Box<Type>),
+    // no structs
+}
+
+// pub type SpanType<'a> = (Span<'a>, Type<'a>);
+
+#[derive(Debug, PartialEq, Clone)]
+pub struct Func<'a> {
+    pub sig: (SpanId<'a>, Vec<(SpanId<'a>, Type)>, Type),
+    pub body: Block<'a>,
+}
+
+#[derive(Debug, PartialEq, Clone)]
+pub enum Item<'a> {
+    Func(Func<'a>),
+}
+
+pub type Prog<'a> = Vec<Item<'a>>;
--- a/src/interpreter.rs
+++ b/src/interpreter.rs
+// Interpreter
+use crate::ast::{Expr, Op, SpanExpr};
+
+pub fn eval_expr(e: &SpanExpr) -> i32 {
+    match e.clone().1 {
+        Expr::Num(i) => i,
+        Expr::BinOp(op, l, r) => {
+            let lv = eval_expr(&l);
+            let rv = eval_expr(&r);
+            match op {
+                Op::Add => lv + rv,
+                Op::Sub => lv - rv,
+                Op::Mul => lv * rv,
+                Op::Div => lv / rv,
+                Op::Pow => lv.pow(rv as u32),
+                _ => unimplemented!(),
+            }
+        }
+        Expr::UnaryOp(op, e) => {
+            let e = eval_expr(&e);
+            match op {
+                Op::Add => e,
+                Op::Sub => -e,
+                _ => unimplemented!(),
+            }
+        }
+        _ => unimplemented!(),
+    }
+}
+
+// use crate::ast::{Binop, Cmd, Constant, Expr, Item, Prog, TypeDecl};
+// use crate::check::{check_prog, Fenv, Tenv};
+// use crate::parse::parse_prog;
+
+//use std::collections::HashMap;
+
+// pub type Addr = u32;
+
+// #[derive(Debug, PartialEq, Clone)]
+// pub enum Data {
+//     Value(Constant),
+//     Pointer(Addr),
+// }
+
+// pub type Venv = HashMap<String, Addr>;
+// pub type Menv = HashMap<Addr, Data>;
+
+// #[derive(Debug, PartialEq, Clone)]
+// pub struct Mem {
+//     pub Addr: u32,
+//     pub Menv: Menv,
+// }
+
+// impl Mem {
+//     fn new() -> Mem {
+//         Mem {
+//             Addr: 0,
+//             Menv: Menv::new(),
+//         }
+//     }
+
+//     fn alloc(&mut self) -> Addr {
+//         // allocate a new address
+//         self.Addr += 1;
+//         self.Addr
+//     }
+// }
+
+// pub fn get_bool(d: Data) -> bool {
+//     if let Data::Value(Constant::Boolean(b)) = d {
+//         b
+//     } else {
+//         panic!("cannot evaluate into Boolean");
+//     }
+// }
+
+// pub fn get_i32(d: Data) -> i32 {
+//     if let Data::Value(Constant::Num(i)) = d {
+//         i
+//     } else {
+//         panic!("cannot evaluate into i32");
+//     }
+// }
+
+// pub fn eval_expr(exp: &Expr, mem: &mut Mem, venv: &Venv, fenv: &Fenv) -> Data {
+//     println!("\neval_expr {:?}, mem {:?}, venv {:?}", exp, mem, venv);
+//     match exp {
+//         Expr::Constant(c) => Data::Value(c.clone()),
+//         Expr::Binop(e1, op, e2) => {
+//             let ev1 = eval_expr(e1, mem, venv, fenv);
+//             let ev2 = eval_expr(e2, mem, venv, fenv);
+//             Data::Value(match op {
+//                 Binop::And => Constant::Boolean(get_bool(ev1) && get_bool(ev2)),
+//                 Binop::Or => Constant::Boolean(get_bool(ev1) && get_bool(ev2)),
+//                 Binop::Equal => Constant::Boolean(ev1 == ev2),
+//                 Binop::Less => Constant::Boolean(get_i32(ev1) < get_i32(ev2)),
+//                 Binop::LessEqual => Constant::Boolean(get_i32(ev1) <= get_i32(ev2)),
+//                 Binop::Greater => Constant::Boolean(get_i32(ev1) > get_i32(ev2)),
+//                 Binop::GreaterEqual => Constant::Boolean(get_i32(ev1) >= get_i32(ev2)),
+//                 Binop::Divide => Constant::Num(get_i32(ev1) / get_i32(ev2)),
+//                 Binop::Reminder => Constant::Num(get_i32(ev1) % get_i32(ev2)),
+//                 Binop::Minus => Constant::Num(get_i32(ev1) - get_i32(ev2)),
+//                 Binop::Plus => Constant::Num(get_i32(ev1) + get_i32(ev2)),
+//                 Binop::Times => Constant::Num(get_i32(ev1) * get_i32(ev2)),
+//             })
+//         }
+//         Expr::Id(id) => {
+//             let e = mem.Menv.get(venv.get(id).unwrap()).unwrap();
+//             println!("{:?} -> {:?}", id, e);
+//             e.to_owned()
+//         }
+//         Expr::Not(e) => {
+//             let ev = eval_expr(e, mem, venv, fenv);
+//             Data::Value(Constant::Boolean(!get_bool(ev)))
+//         }
+//         Expr::Application(id, exprs) => {
+//             // evaluate arguments
+//             println!("application {:?}", id);
+//             let args: Vec<Data> = exprs
+//                 .into_iter()
+//                 .map(|e| eval_expr(e, mem, venv, fenv))
+//                 .collect();
+//             println!("args {:?}", args);
+//             // lookup callee
+//             let f = fenv.get(id).unwrap();
+//             println!("f {:?}", &f);
+//             let parameter_names: Vec<String> =
+//                 f.sig.1.clone().into_iter().map(|idt| idt.0).collect();
+//             println!("f par_names {:?}", &parameter_names);
+//             let mut lenv = Venv::new(); // local environment for function application
+//             let arg_assign: Vec<(String, Data)> =
+//                 parameter_names.into_iter().zip(args.into_iter()).collect();
+
+//             println!("arg assignments {:?}", &arg_assign);
+
+//             for (id, val) in arg_assign {
+//                 let addr = mem.alloc(); // get new allocation slot
+//                 mem.Menv.insert(addr, val); // write the new value
+//                 lenv.insert(id, addr);
+//             }
+//             println!("local enviroment {:?}", &lenv);
+//             println!("memory {:?}", &mem);
+
+//             // execute function, unwrap the result as we need a Constant
+//             eval_body(f.body.clone(), mem, &mut lenv, fenv).unwrap()
+//         }
+//         Expr::Ref(exp) => {
+//             println!("here");
+//             match *exp.to_owned() {
+//                 Expr::Id(id) => {
+//                     println!("id {:?}", &id);
+//                     let addr = venv.get(&id).unwrap();
+//                     Data::Pointer(*addr)
+//                 }
+//                 _ => {
+//                     let val = eval_expr(exp, mem, venv, fenv);
+//                     println!("-- value {:?}", &val);
+//                     let addr = mem.alloc(); // get new allocation slot
+//                     mem.Menv.insert(addr, val.to_owned()); // write the new value
+//                     let ref_val = Data::Pointer(addr);
+//                     println!(
+//                         "Ref exp {:?} e {:?} mem {:?} venv {:?}",
+//                         exp, val, mem, venv
+//                     );
+//                     ref_val
+//                 }
+//             }
+//         }
+//         Expr::RefMut(exp) => {
+//             println!("here");
+//             match *exp.to_owned() {
+//                 Expr::Id(id) => {
+//                     println!("id {:?}", &id);
+//                     let addr = venv.get(&id).unwrap();
+//                     Data::Pointer(*addr)
+//                 }
+//                 _ => {
+//                     let val = eval_expr(exp, mem, venv, fenv);
+//                     println!("-- value {:?}", &val);
+//                     let addr = mem.alloc(); // get new allocation slot
+//                     mem.Menv.insert(addr, val.to_owned()); // write the new value
+//                     let ref_val = Data::Pointer(addr);
+//                     println!(
+//                         "Ref exp {:?} e {:?} mem {:?} venv {:?}",
+//                         exp, val, mem, venv
+//                     );
+//                     ref_val
+//                 }
+//             }
+//         }
+//         Expr::Deref(exp) => {
+//             println!("-- Deref");
+//             let e = eval_expr(exp, mem, venv, fenv);
+//             println!("-- DereRef {:?} {:?}", exp, e);
+//             if let Data::Pointer(addr) = e {
+//                 mem.Menv.get(&addr).unwrap().to_owned()
+//             } else {
+//                 panic!("cannot deref {:?}", e);
+//             }
+//         }
+//         _ => unimplemented!(),
+//     }
+// }
+// pub fn eval_lvalue(exp: &Expr, mem: &mut Mem, venv: &Venv, fenv: &Fenv) -> Addr {
+//     println!("eval_lvalue {:?},{:?},{:?},{:?} ", exp, mem, venv, fenv);
+//     match exp {
+//         Expr::Id(id) => {
+//             let addr = venv.get(id).unwrap();
+//             println!("addr {:?}", addr);
+//             addr.to_owned()
+//         }
+//         Expr::Deref(exp) => {
+//             let lv = eval_expr(exp, mem, venv, fenv);
+//             println!("lv {:?}", lv);
+//             match eval_expr(exp, mem, venv, fenv) {
+//                 Data::Pointer(addr) => addr,
+//                 _ => panic!("cannot deref {:?}", exp),
+//             }
+//         }
+//         _ => unimplemented!(),
+//     }
+// }
+
+// // commands may return with a value
+// // either directly (return) or
+// // if inside an inner block (then/else, or while)
+// pub fn menv_update(data: Data, menv: &mut Menv) {
+//     // match data {
+//     //     Pointer::
+//     // }
+// }
+
+// pub fn dump(msg: &str, mem: &Mem, venv: &Venv) {
+//     println!("{:?} {:?} {:?}", msg, mem, venv);
+// }
+
+// // A return genaretes Some(Data) else None
+// pub fn eval_cmd(cmd: &Cmd, mem: &mut Mem, venv: &mut Venv, fenv: &Fenv) -> Option<Data> {
+//     println!("{:?}", cmd);
+//     match cmd {
+//         Cmd::Assign(lexp, rexp) => {
+//             let rval = eval_expr(rexp, mem, venv, fenv);
+//             println!("val {:?}", rval);
+//             let addr = eval_lvalue(lexp, mem, venv, fenv);
+//             // println!("lval {:?}", lval);
+//             // let addr = venv.get(&lval).unwrap();
+
+//             mem.Menv.insert(addr, rval);
+//             None
+//         }
+//         Cmd::If(exp, then_block, opt_else) => {
+//             if get_bool(eval_expr(exp, mem, venv, fenv)) {
+//                 eval_body(then_block.to_vec(), mem, venv, fenv)
+//             } else {
+//                 if let Some(else_block) = opt_else {
+//                     eval_body(else_block.to_vec(), mem, venv, fenv)
+//                 } else {
+//                     None
+//                 }
+//             }
+//         }
+//         Cmd::Let(_, id, _, exp) => {
+//             let val = eval_expr(exp, mem, venv, fenv);
+//             println!("val {:?}", val);
+
+//             let addr = mem.alloc(); // get new allocation slot
+//             mem.Menv.insert(addr, val); // write the new value
+//             venv.insert(id.to_owned(), addr);
+//             dump("after Let", mem, venv);
+//             None
+//         }
+//         Cmd::Return(exp) => {
+//             let v = Some(eval_expr(exp, mem, venv, fenv));
+//             println!("return value {:?}", v);
+//             v
+//         }
+//         Cmd::While(exp, body) => {
+//             while get_bool(eval_expr(exp, mem, venv, fenv)) {
+//                 if let Some(retv) = eval_body(body.to_vec(), mem, venv, fenv) {
+//                     return Some(retv);
+//                 }
+//             }
+//             None
+//         }
+//     }
+// }
+
+// pub fn eval_body(cmds: Vec<Cmd>, mem: &mut Mem, venv: &mut Venv, fenv: &Fenv) -> Option<Data> {
+//     for c in &cmds {
+//         if let Some(ret) = eval_cmd(c, mem, venv, fenv) {
+//             return Some(ret);
+//         }
+//     }
+//     None
+// }
+
+// pub fn build_env(prog: Prog) -> (Tenv, Fenv) {
+//     let mut tenv = Tenv::new();
+//     let mut fenv = Fenv::new();
+
+//     for i in prog {
+//         match i {
+//             Item::TypeDecl(TypeDecl::Struct(id, layout)) => {
+//                 tenv.insert(id.clone(), TypeDecl::Struct(id, layout));
+//             }
+//             Item::Function(f) => {
+//                 fenv.insert(f.sig.0.to_owned(), f);
+//             }
+//         }
+//     }
+//     (tenv, fenv)
+// }
+
+// pub fn eval_prog(prog: &str) {
+//     let (unparsed, prog) = parse_prog(prog).unwrap();
+//     println!("prog: {:?}", prog);
+//     println!("unparsed: {:?}", unparsed);
+//     let (tenv, fenv) = check_prog(&prog);
+
+//     println!("envs {:?}", (tenv, &fenv));
+
+//     // assume main does not take any parameters
+//     let call_main = Expr::Application("main".to_owned(), Vec::<Expr>::new());
+
+//     let mut mem = Mem::new();
+//     let mut venv = Venv::new();
+//     let ret = eval_expr(&call_main, &mut mem, &mut venv, &fenv);
+//     println!("return from main = {:?}", ret);
+
+//     println!("venv = {:?}", venv);
+//     println!("mem = {:?}", mem);
+// }
--- a/src/lib.rs
+++ b/src/lib.rs
 // lib

 pub mod ast;
+pub mod interpreter;
 pub mod parse;
--- a/src/parse.rs
+++ b/src/parse.rs
@@ -6,16 +6,20 @@ use std::slice::Iter;
 use nom::{
    branch::alt,
    bytes::complete::tag,
-    character::complete::char,
-    character::complete::{digit1, multispace0},
-    combinator::{cut, map},
+    character::complete::{
+        alpha1, alphanumeric0, char, digit1, multispace0, multispace1,
+    },
+    combinator::{cut, map, opt},
    error::ParseError,
-    multi::many1,
-    sequence::{delimited, preceded},
+    multi::{many1, separated_list},
+    sequence::{delimited, preceded, terminated, tuple},
    IResult,
 };

-use crate::ast::{Expr, Op, Span, SpanExpr};
+use crate::ast::{
+    Block, Cmd, Expr, Func, Item, Mutability, Op, Prog, Span, SpanCmd,
+    SpanExpr, SpanId, Type,
+};

 pub fn parse_i32(i: Span) -> IResult<Span, (Span, i32)> {
    map(digit1, |digit_str: Span| {
@@ -41,15 +45,42 @@ fn parse_op(i: Span) -> IResult<Span, (Span, Op)> {
 #[derive(Debug, Clone, PartialEq)]
 pub enum Token<'a> {
    Num(i32),
+    Bool(bool),
+    Id(String),
+    Call(String, Vec<(Span<'a>, Vec<SpanToken<'a>>)>),
    Par(Vec<SpanToken<'a>>),
    Op(Op),
 }

 type SpanToken<'a> = (Span<'a>, Token<'a>);

+pub fn parse_id(i: Span) -> IResult<Span, Span> {
+    // an identifier needs to start with one or more alphas (head)
+    // followed by zero or more alphanumerics (tail)
+    map(
+        preceded(multispace0, tuple((alpha1, alphanumeric0, tag("")))),
+        // we concatenate the head and tail into a single String
+        |(_, _, end): (Span, Span, Span)| {
+            let mut res = i;
+            res.fragment = &i.fragment[..(end.offset - i.offset)];
+            res
+        },
+    )(i)
+}
+
 fn parse_terminal(i: Span) -> IResult<Span, SpanToken> {
    alt((
        map(parse_i32, |(s, v)| (s, Token::Num(v))),
+        map(tag("true"), |s| (s, Token::Bool(true))),
+        map(tag("false"), |s| (s, Token::Bool(false))),
+        map(
+            tuple((
+                parse_id,
+                parse_par(separated_list(char(','), parse_tokens)),
+            )),
+            |(s, t)| (s, Token::Call(s.to_string(), t)),
+        ),
+        map(parse_id, |s: Span| (s, Token::Id(s.to_string()))),
        map(parse_par(parse_tokens), |(s, tokens)| {
            (s, Token::Par(tokens))
        }),
@@ -71,8 +102,20 @@ fn parse_tokens(i: Span) -> IResult<Span, (Span, Vec<SpanToken>)> {
 fn compute_atom<'a>(t: &mut Peekable<Iter<SpanToken<'a>>>) -> SpanExpr<'a> {
    match t.next() {
        Some((s, Token::Num(i))) => (*s, Expr::Num(*i)),
+        Some((s, Token::Bool(b))) => (*s, Expr::Bool(*b)),
+        Some((s, Token::Id(id))) => (*s, Expr::Id(id.to_string())),
        Some((_, Token::Par(v))) => climb(&mut v.iter().peekable(), 0),
-        Some((s, Token::Op(op))) => (*s, Expr::UnaryOp(*op, Box::new(climb(t, 4)))), // assume highest precedence
+        Some((s, Token::Call(id, vv))) => {
+            //
+            let v: Vec<SpanExpr> = vv
+                .iter()
+                .map(|(span, t)| climb(&mut (*t).iter().peekable(), 0))
+                .collect();
+            (*s, Expr::Call(id.to_string(), v))
+        }
+        Some((s, Token::Op(op))) => {
+            (*s, Expr::UnaryOp(*op, Box::new(climb(t, 4))))
+        } // assume highest precedence
        _ => panic!("error in compute atom"),
    }
 }
@@ -107,62 +150,160 @@ fn climb<'a>(
    result
 }

-pub fn test(s: &str, v: i32) {
-    match parse_tokens(Span::new(s)) {
-        Ok((Span { fragment: "", .. }, (_, t))) => {
-            let mut t = t.iter().peekable();
-            println!("{:?}", &t);
-            let e = climb(&mut t, 0);
-            println!("{:?}", &e);
-            println!("eval {} {}", math_eval(&e), v);
-            assert_eq!(math_eval(&e), v);
+pub fn parse_expr(i: Span) -> IResult<Span, SpanExpr> {
+    map(parse_tokens, |(_, tokens)| {
+        climb(&mut tokens.iter().peekable(), 0)
+    })(i)
 }
-        Ok((s, t)) => println!(
-            "parse incomplete, \n parsed tokens \t{:?}, \n remaining \t{:?}",
-            t, s
+
+fn parse_if(i: Span) -> IResult<Span, Cmd> {
+    map(
+        preceded(
+            // here to avoid ambiguity with other names starting with `if`, if we added
+            // variables to our language, we say that if must be terminated by at least
+            // one whitespace character
+            terminated(tag("if"), multispace1),
+            cut(tuple((
+                parse_expr,
+                parse_block,
+                opt(preceded(preceded(multispace0, tag("else")), parse_block)),
+            ))),
        ),
-        Err(err) => println!("{:?}", err),
+        |(pred, true_branch, maybe_false_branch)| {
+            Cmd::If(pred, true_branch, maybe_false_branch)
+        },
+    )(i)
 }
+
+pub fn parse_let(i: Span) -> IResult<Span, Cmd> {
+    map(
+        preceded(
+            // here to avoid ambiguity with other names starting with `let`, if we added
+            // variables to our language, we say that if must be terminated by at least
+            // one whitespace character
+            terminated(tag("let"), multispace1),
+            cut(tuple((
+                opt(preceded(multispace0, terminated(tag("mut"), multispace1))),
+                parse_id,
+                preceded(preceded(multispace0, tag(":")), parse_type),
+                preceded(preceded(multispace0, tag("=")), parse_expr),
+            ))),
+        ),
+        |(m, id, t, expr)| {
+            Cmd::Let(
+                if m.is_some() {
+                    Mutability::Mut
+                } else {
+                    Mutability::Imm
+                },
+                id,
+                t,
+                expr,
+            )
+        },
+    )(i)
 }

-// helpers
-fn parse_par<'a, O, F, E>(
-    inner: F,
-) -> impl Fn(Span<'a>) -> IResult<Span<'a>, O, E>
-where
-    F: Fn(Span<'a>) -> IResult<Span<'a>, O, E>,
-    E: ParseError<Span<'a>>,
-{
-    // delimited allows us to split up the input
-    // cut allwos us to consume the input (and prevent backtracking)
-    delimited(char('('), preceded(multispace0, inner), cut(char(')')))
+pub fn parse_assign<'a>(i: Span<'a>) -> IResult<Span<'a>, Cmd<'a>> {
+    map(
+        // here to avoid ambiguity with other names starting with `let`, if we added
+        // variables to our language, we say that if must be terminated by at least
+        // one whitespace character
+        tuple((
+            parse_expr,
+            preceded(preceded(multispace0, tag("=")), parse_expr),
+        )),
+        |(id_expr, expr)| Cmd::Assign(id_expr, expr),
+    )(i)
 }

-fn math_eval(e: &SpanExpr) -> i32 {
-    match e.clone().1 {
-        Expr::Num(i) => i,
-        Expr::BinOp(op, l, r) => {
-            let lv = math_eval(&l);
-            let rv = math_eval(&r);
-            match op {
-                Op::Add => lv + rv,
-                Op::Sub => lv - rv,
-                Op::Mul => lv * rv,
-                Op::Div => lv / rv,
-                Op::Pow => lv.pow(rv as u32),
-                _ => unimplemented!(),
+pub fn parse_return(i: Span) -> IResult<Span, Cmd> {
+    map(
+        preceded(terminated(tag("return"), multispace1), parse_expr),
+        |expr| Cmd::Return(expr),
+    )(i)
+}
+
+pub fn parse_while(i: Span) -> IResult<Span, Cmd> {
+    map(
+        preceded(
+            // here to avoid ambiguity with other names starting with `let`, if we added
+            // variables to our language, we say that if must be terminated by at least
+            // one whitespace character
+            terminated(tag("while"), multispace1),
+            cut(tuple((parse_expr, parse_block))),
+        ),
+        |(pred, body)| Cmd::While(pred, body),
+    )(i)
 }
+
+// pub fn parse_cmd<'a>(i: Span<'a>) -> IResult<Span<'a>, Cmd<'a>> {
+pub fn parse_cmd(i: Span) -> IResult<Span, Cmd> {
+    preceded(
+        multispace0,
+        //parse_assign,
+        alt((parse_while, parse_let, parse_if, parse_assign, parse_return)),
+    )(i)
 }
-        Expr::UnaryOp(op, e) => {
-            let e = math_eval(&e);
-            match op {
-                Op::Add => e,
-                Op::Sub => -e,
-                _ => unimplemented!(),
+
+pub fn parse_block(i: Span) -> IResult<Span, Block> {
+    preceded(multispace0, parse_sem(separated_list(tag(";"), parse_cmd)))(i)
+}
+
+pub fn parse_type(i: Span) -> IResult<Span, Type> {
+    preceded(
+        multispace0,
+        alt((
+            map(tag("i32"), |_| Type::I32),
+            map(tag("bool"), |_| Type::Bool),
+            map(preceded(tag("&"), parse_type), |t| Type::Ref(Box::new(t))),
+            map(
+                preceded(terminated(tag("mut"), multispace1), parse_type),
+                |t| Type::Mut(Box::new(t)),
+            ),
+        )),
+    )(i)
 }
+
+pub fn parse_par_decls(i: Span) -> IResult<Span, Vec<(SpanId, Type)>> {
+    parse_par(separated_list(tag(","), parse_par_decl))(i)
 }
-        _ => unimplemented!(),
+
+pub fn parse_par_decl(i: Span) -> IResult<Span, (SpanId, Type)> {
+    map(
+        tuple((
+            opt(preceded(multispace0, terminated(tag("mut"), multispace1))),
+            parse_id,
+            preceded(multispace0, tag(":")),
+            parse_type,
+        )),
+        |(b, id, _, t)| (id, t),
+    )(i)
+}
+
+pub fn parse_function_decl(i: Span) -> IResult<Span, Func> {
+    map(
+        preceded(
+            preceded(multispace0, terminated(tag("fn"), multispace1)),
+            tuple((
+                parse_id,
+                parse_par(separated_list(tag(","), parse_par_decl)),
+                opt(preceded(
+                    preceded(multispace0, terminated(tag("->"), multispace1)),
+                    parse_type,
+                )),
+                parse_block,
+            )),
+        ),
+        |(id, par, ret, body)| Func {
+            sig: (id, par, ret.unwrap_or(Type::Unit)),
+            body: body,
+        },
+    )(i)
 }
+
+pub fn parse_prog(i: Span) -> IResult<Span, Prog> {
+    separated_list(multispace0, map(parse_function_decl, |f| Item::Func(f)))(i)
 }

 #[derive(Debug, Copy, Clone, PartialEq)]
@@ -181,3 +322,36 @@ fn get_prec(op: &Op) -> (u8, Ass) {
        _ => unimplemented!(),
    }
 }
+
+// helpers
+fn parse_par<'a, O, F, E>(
+    inner: F,
+) -> impl Fn(Span<'a>) -> IResult<Span<'a>, O, E>
+where
+    F: Fn(Span<'a>) -> IResult<Span<'a>, O, E>,
+    E: ParseError<Span<'a>>,
+{
+    // delimited allows us to split up the input
+    // cut allwos us to consume the input (and prevent backtracking)
+    delimited(
+        char('('),
+        preceded(multispace0, inner),
+        cut(preceded(multispace0, char(')'))),
+    )
+}
+
+fn parse_sem<'a, O, F, E>(
+    inner: F,
+) -> impl Fn(Span<'a>) -> IResult<Span<'a>, O, E>
+where
+    F: Fn(Span<'a>) -> IResult<Span<'a>, O, E>,
+    E: ParseError<Span<'a>>,
+{
+    // delimited allows us to split up the input
+    // cut allwos us to consume the input (and prevent backtracking)
+    delimited(
+        char('{'),
+        preceded(multispace0, inner),
+        cut(preceded(multispace0, char('}'))),
+    )
+}
No results found