Skip to content
Snippets Groups Projects

Compare revisions

Changes are shown as if the source revision was being merged into the target revision. Learn more about comparing revisions.

Source

Select target project
No results found
Select Git revision
Loading items

Target

Select target project
  • mikala3/d7050e
  • 97gushan/d7050e
  • Abrikot/d7050e
  • Hammarkvast/d7050e
  • banunkers/d7050e
  • markhakansson/d7050e
  • inaule-6/d7050e
  • pln/d7050e
  • widforss/d7050e
  • arostr-5/d7050e
  • Grumme2/d7050e
  • brathen/d7050e
12 results
Select Git revision
Loading items
Show changes
Commits on Source (13)
...@@ -62,6 +62,18 @@ ...@@ -62,6 +62,18 @@
"kind": "build", "kind": "build",
"isDefault": true "isDefault": true
} }
},
{
"type": "shell",
"label": "cargo run --example crust",
"command": "cargo run --example crust",
"problemMatcher": [
"$rustc"
],
"group": {
"kind": "build",
"isDefault": true
}
} }
] ]
} }
\ No newline at end of file
...@@ -9,3 +9,4 @@ edition = "2018" ...@@ -9,3 +9,4 @@ edition = "2018"
[dependencies] [dependencies]
nom = "5.0.1" nom = "5.0.1"
nom_locate = "1.0.0" nom_locate = "1.0.0"
inkwell = { git = "https://github.com/TheDan64/inkwell", branch = "llvm8-0" }
...@@ -6,7 +6,7 @@ The repo will be updated througout the course and includes a draft outline of th ...@@ -6,7 +6,7 @@ The repo will be updated througout the course and includes a draft outline of th
Fundamental theories about computation and different models of computation. Construction of compilers. Lexical analysis, syntax analysis, and translation into abstract syntax. Regular expressions and grammars, context-free languages and grammars, lexer and parser generators. Identifier handling and symbol table organization. Type-checking, logical inference systems. Intermediate representations and transformations for different languages. Code optimization and register allocation. Machine code generation for common architectures. Fundamental theories about computation and different models of computation. Construction of compilers. Lexical analysis, syntax analysis, and translation into abstract syntax. Regular expressions and grammars, context-free languages and grammars, lexer and parser generators. Identifier handling and symbol table organization. Type-checking, logical inference systems. Intermediate representations and transformations for different languages. Code optimization and register allocation. Machine code generation for common architectures.
In the course you will learn and develop your skills through hands on implementation work building your own complier from scratch. In this way theoretical aspects such as formal grammars, Structural Operational Semantics (SOS), and type rule formalisations becomes tangible. We will even touch upon memory safety and how guarantees can be achieved through static (compilet time) borrow checking. Compiler backend (code optimization etc.) will be discussed in context of LLVM, which you will optionally interface as a library for code generation. In the course you will learn and develop your skills through hands on implementation work building your own complier from scratch. In this way theoretical aspects such as formal grammars, Structural Operational Semantics (SOS), and type rule formalisations becomes tangible. We will even touch upon memory safety and how guarantees can be achieved through static (compile time) borrow checking. Compiler backend (code optimization etc.) will be discussed in context of LLVM, which you will optionally interface as a library for code generation.
## Draft outline ## Draft outline
......
...@@ -149,9 +149,7 @@ fn main() { ...@@ -149,9 +149,7 @@ fn main() {
} }
// helpers // helpers
fn parse_par<'a, O, F, E>( fn parse_par<'a, O, F, E>(inner: F) -> impl Fn(&'a str) -> IResult<&'a str, O, E>
inner: F,
) -> impl Fn(&'a str) -> IResult<&'a str, O, E>
where where
F: Fn(&'a str) -> IResult<&'a str, O, E>, F: Fn(&'a str) -> IResult<&'a str, O, E>,
E: ParseError<&'a str>, E: ParseError<&'a str>,
......
This diff is collapsed.
extern crate inkwell;
use inkwell::builder::Builder;
use inkwell::context::Context;
use inkwell::execution_engine::{ExecutionEngine, JitFunction};
use inkwell::module::Module;
use inkwell::OptimizationLevel;
use std::error::Error;
/// Convenience type alias for the `sum` function.
///
/// Calling this is innately `unsafe` because there's no guarantee it doesn't
/// do `unsafe` operations internally.
type SumFunc = unsafe extern "C" fn(u64, u64, u64) -> u64;
fn jit_compile_sum(
context: &Context,
module: &Module,
builder: &Builder,
execution_engine: &ExecutionEngine,
) -> Option<JitFunction<SumFunc>> {
let i64_type = context.i64_type();
let fn_type = i64_type
.fn_type(&[i64_type.into(), i64_type.into(), i64_type.into()], false);
let function = module.add_function("sum", fn_type, None);
let basic_block = context.append_basic_block(&function, "entry");
builder.position_at_end(&basic_block);
let x = function.get_nth_param(0)?.into_int_value();
let y = function.get_nth_param(1)?.into_int_value();
let z = function.get_nth_param(2)?.into_int_value();
let sum = builder.build_int_add(x, y, "sum");
let sum = builder.build_int_add(sum, z, "sum");
builder.build_return(Some(&sum));
unsafe { execution_engine.get_function("sum").ok() }
}
fn main() -> Result<(), Box<dyn Error>> {
let context = Context::create();
let module = context.create_module("sum");
let builder = context.create_builder();
let execution_engine =
module.create_jit_execution_engine(OptimizationLevel::None)?;
let sum = jit_compile_sum(&context, &module, &builder, &execution_engine)
.ok_or("Unable to JIT compile `sum`")?;
let x = 1u64;
let y = 2u64;
let z = 3u64;
unsafe {
println!("{} + {} + {} = {}", x, y, z, sum.call(x, y, z));
assert_eq!(sum.call(x, y, z), x + y + z);
}
Ok(())
}
...@@ -81,7 +81,6 @@ fn test_parse_i32_1() { ...@@ -81,7 +81,6 @@ fn test_parse_i32_1() {
); );
} }
fn main() { fn main() {
let (a, b) = parse_expr(Span::new("1")).unwrap(); let (a, b) = parse_expr(Span::new("1")).unwrap();
println!("{:?}", parse_expr(Span::new("1"))); println!("{:?}", parse_expr(Span::new("1")));
......
...@@ -61,7 +61,11 @@ pub fn parse_i32<'a>(i: Span<'a>) -> IResult<Span<'a>, SpanExpr> { ...@@ -61,7 +61,11 @@ pub fn parse_i32<'a>(i: Span<'a>) -> IResult<Span<'a>, SpanExpr> {
let (i, digits) = digit1(i)?; let (i, digits) = digit1(i)?;
match digits.fragment.parse() { match digits.fragment.parse() {
Ok(int) => Ok((i, (digits, Expr::Num(int)))), Ok(int) => Ok((i, (digits, Expr::Num(int)))),
Err(e) => Err(Err::Failure(Error(i, Some(digits), ErrorKind::ParseIntError(e)))), Err(e) => Err(Err::Failure(Error(
i,
Some(digits),
ErrorKind::ParseIntError(e),
))),
} }
} }
......
extern crate nom; use crust::{
ast::Span,
interpreter::eval_expr,
parse::{parse_assign, parse_expr, parse_prog},
};
fn test(s: &str, v: i32) {
match parse_expr(Span::new(s)) {
Ok((Span { fragment: "", .. }, e)) => {
println!("{:?}", &e);
println!("eval {} {}", eval_expr(&e), v);
assert_eq!(eval_expr(&e), v);
}
Ok((s, t)) => println!(
"parse incomplete, \n parsed tokens \t{:?}, \n remaining \t{:?}",
t, s
),
Err(err) => println!("{:?}", err),
}
}
use crust::parse::test; fn test_expr() {
// test("- -1 + + 1", - -1 + 1); // rust does not allow + as a unary op (I do ;)
// test("(-1-1)+(-1+3)", (-1 - 1) + (-1) + 3);
// // just to check that right associative works (you don't need to implement pow)
// test("2+3**2**3*5+1", 2 + 3i32.pow(2u32.pow(3)) * 5 + 1);
// test("(12*2)/3-4", (12 * 2) / 3 - 4);
// test("1*2+3", 1 * 2 + 3);
// // just to check that we get a parse error
// test("1*2+3+3*21-a12+2", 1 * 2 + 3 + 3 * 21 - 12 + 2);
test("1 + (1 - 2)", 1 + (1 - 2));
}
fn main() { fn main() {
test("- -1 + + 1", - -1 + 1); // rust does not allow + as a unary op (I do ;) // println!("{:?}", parse_assign(Span::new("3 = a(1, 2+3)")));
test("(-1-1)+(-1+3)", (-1 - 1) + (-1) + 3); println!(
// just to check that right associative works (you don't need to implement pow) "{:?}",
test("2+3**2**3*5+1", 2 + 3i32.pow(2u32.pow(3)) * 5 + 1); parse_prog(Span::new(
test("(12*2)/3-4", (12 * 2) / 3 - 4); "
test("1*2+3", 1 * 2 + 3); fn main() { let a:i32 = 1} "
// just to check that we get a parse error ))
test("1*2+3+3*21-a12+2", 1 * 2 + 3 + 3 * 21 - 12 + 2); );
} }
...@@ -20,14 +20,67 @@ pub enum Op { ...@@ -20,14 +20,67 @@ pub enum Op {
type SpanOp<'a> = (Span<'a>, Op); type SpanOp<'a> = (Span<'a>, Op);
pub type SpanId<'a> = Span<'a>;
#[derive(Debug, Clone, PartialEq)] #[derive(Debug, Clone, PartialEq)]
pub enum Expr<'a> { pub enum Expr<'a> {
Num(i32), Num(i32),
Bool(bool),
Par(Box<SpanExpr<'a>>), Par(Box<SpanExpr<'a>>),
// Identifier Id(String),
// Function application Call(String, Vec<SpanExpr<'a>>),
BinOp(Op, Box<SpanExpr<'a>>, Box<SpanExpr<'a>>), BinOp(Op, Box<SpanExpr<'a>>, Box<SpanExpr<'a>>),
UnaryOp(Op, Box<SpanExpr<'a>>), UnaryOp(Op, Box<SpanExpr<'a>>),
} }
pub type SpanExpr<'a> = (Span<'a>, Expr<'a>); pub type SpanExpr<'a> = (Span<'a>, Expr<'a>);
#[derive(Debug, PartialEq, Clone)]
pub enum Cmd<'a> {
// let <mut> id : <& <mut>>Type = expr
Let(Mutability, SpanId<'a>, Type, SpanExpr<'a>),
// id = expr
Assign(SpanExpr<'a>, SpanExpr<'a>),
// if predicate do-this, and optionally do-that)
If(SpanExpr<'a>, Block<'a>, Option<Block<'a>>),
// while predicate do-this
While(SpanExpr<'a>, Block<'a>),
Return(SpanExpr<'a>),
}
pub type SpanCmd<'a> = (Span<'a>, Cmd<'a>);
#[derive(Debug, PartialEq, Clone)]
pub enum Mutability {
Imm,
Mut,
}
pub type SpanMut<'a> = (Span<'a>, Mutability);
pub type SpanBlock<'a> = (Span<'a>, Vec<SpanCmd<'a>>);
pub type Block<'a> = Vec<Cmd<'a>>;
#[derive(Debug, PartialEq, Clone)]
pub enum Type {
I32,
Bool,
Unit,
Mut(Box<Type>),
Ref(Box<Type>),
// no structs
}
// pub type SpanType<'a> = (Span<'a>, Type<'a>);
#[derive(Debug, PartialEq, Clone)]
pub struct Func<'a> {
pub sig: (SpanId<'a>, Vec<(SpanId<'a>, Type)>, Type),
pub body: Block<'a>,
}
#[derive(Debug, PartialEq, Clone)]
pub enum Item<'a> {
Func(Func<'a>),
}
pub type Prog<'a> = Vec<Item<'a>>;
// Interpreter
use crate::ast::{Expr, Op, SpanExpr};
pub fn eval_expr(e: &SpanExpr) -> i32 {
match e.clone().1 {
Expr::Num(i) => i,
Expr::BinOp(op, l, r) => {
let lv = eval_expr(&l);
let rv = eval_expr(&r);
match op {
Op::Add => lv + rv,
Op::Sub => lv - rv,
Op::Mul => lv * rv,
Op::Div => lv / rv,
Op::Pow => lv.pow(rv as u32),
_ => unimplemented!(),
}
}
Expr::UnaryOp(op, e) => {
let e = eval_expr(&e);
match op {
Op::Add => e,
Op::Sub => -e,
_ => unimplemented!(),
}
}
_ => unimplemented!(),
}
}
// use crate::ast::{Binop, Cmd, Constant, Expr, Item, Prog, TypeDecl};
// use crate::check::{check_prog, Fenv, Tenv};
// use crate::parse::parse_prog;
//use std::collections::HashMap;
// pub type Addr = u32;
// #[derive(Debug, PartialEq, Clone)]
// pub enum Data {
// Value(Constant),
// Pointer(Addr),
// }
// pub type Venv = HashMap<String, Addr>;
// pub type Menv = HashMap<Addr, Data>;
// #[derive(Debug, PartialEq, Clone)]
// pub struct Mem {
// pub Addr: u32,
// pub Menv: Menv,
// }
// impl Mem {
// fn new() -> Mem {
// Mem {
// Addr: 0,
// Menv: Menv::new(),
// }
// }
// fn alloc(&mut self) -> Addr {
// // allocate a new address
// self.Addr += 1;
// self.Addr
// }
// }
// pub fn get_bool(d: Data) -> bool {
// if let Data::Value(Constant::Boolean(b)) = d {
// b
// } else {
// panic!("cannot evaluate into Boolean");
// }
// }
// pub fn get_i32(d: Data) -> i32 {
// if let Data::Value(Constant::Num(i)) = d {
// i
// } else {
// panic!("cannot evaluate into i32");
// }
// }
// pub fn eval_expr(exp: &Expr, mem: &mut Mem, venv: &Venv, fenv: &Fenv) -> Data {
// println!("\neval_expr {:?}, mem {:?}, venv {:?}", exp, mem, venv);
// match exp {
// Expr::Constant(c) => Data::Value(c.clone()),
// Expr::Binop(e1, op, e2) => {
// let ev1 = eval_expr(e1, mem, venv, fenv);
// let ev2 = eval_expr(e2, mem, venv, fenv);
// Data::Value(match op {
// Binop::And => Constant::Boolean(get_bool(ev1) && get_bool(ev2)),
// Binop::Or => Constant::Boolean(get_bool(ev1) && get_bool(ev2)),
// Binop::Equal => Constant::Boolean(ev1 == ev2),
// Binop::Less => Constant::Boolean(get_i32(ev1) < get_i32(ev2)),
// Binop::LessEqual => Constant::Boolean(get_i32(ev1) <= get_i32(ev2)),
// Binop::Greater => Constant::Boolean(get_i32(ev1) > get_i32(ev2)),
// Binop::GreaterEqual => Constant::Boolean(get_i32(ev1) >= get_i32(ev2)),
// Binop::Divide => Constant::Num(get_i32(ev1) / get_i32(ev2)),
// Binop::Reminder => Constant::Num(get_i32(ev1) % get_i32(ev2)),
// Binop::Minus => Constant::Num(get_i32(ev1) - get_i32(ev2)),
// Binop::Plus => Constant::Num(get_i32(ev1) + get_i32(ev2)),
// Binop::Times => Constant::Num(get_i32(ev1) * get_i32(ev2)),
// })
// }
// Expr::Id(id) => {
// let e = mem.Menv.get(venv.get(id).unwrap()).unwrap();
// println!("{:?} -> {:?}", id, e);
// e.to_owned()
// }
// Expr::Not(e) => {
// let ev = eval_expr(e, mem, venv, fenv);
// Data::Value(Constant::Boolean(!get_bool(ev)))
// }
// Expr::Application(id, exprs) => {
// // evaluate arguments
// println!("application {:?}", id);
// let args: Vec<Data> = exprs
// .into_iter()
// .map(|e| eval_expr(e, mem, venv, fenv))
// .collect();
// println!("args {:?}", args);
// // lookup callee
// let f = fenv.get(id).unwrap();
// println!("f {:?}", &f);
// let parameter_names: Vec<String> =
// f.sig.1.clone().into_iter().map(|idt| idt.0).collect();
// println!("f par_names {:?}", &parameter_names);
// let mut lenv = Venv::new(); // local environment for function application
// let arg_assign: Vec<(String, Data)> =
// parameter_names.into_iter().zip(args.into_iter()).collect();
// println!("arg assignments {:?}", &arg_assign);
// for (id, val) in arg_assign {
// let addr = mem.alloc(); // get new allocation slot
// mem.Menv.insert(addr, val); // write the new value
// lenv.insert(id, addr);
// }
// println!("local enviroment {:?}", &lenv);
// println!("memory {:?}", &mem);
// // execute function, unwrap the result as we need a Constant
// eval_body(f.body.clone(), mem, &mut lenv, fenv).unwrap()
// }
// Expr::Ref(exp) => {
// println!("here");
// match *exp.to_owned() {
// Expr::Id(id) => {
// println!("id {:?}", &id);
// let addr = venv.get(&id).unwrap();
// Data::Pointer(*addr)
// }
// _ => {
// let val = eval_expr(exp, mem, venv, fenv);
// println!("-- value {:?}", &val);
// let addr = mem.alloc(); // get new allocation slot
// mem.Menv.insert(addr, val.to_owned()); // write the new value
// let ref_val = Data::Pointer(addr);
// println!(
// "Ref exp {:?} e {:?} mem {:?} venv {:?}",
// exp, val, mem, venv
// );
// ref_val
// }
// }
// }
// Expr::RefMut(exp) => {
// println!("here");
// match *exp.to_owned() {
// Expr::Id(id) => {
// println!("id {:?}", &id);
// let addr = venv.get(&id).unwrap();
// Data::Pointer(*addr)
// }
// _ => {
// let val = eval_expr(exp, mem, venv, fenv);
// println!("-- value {:?}", &val);
// let addr = mem.alloc(); // get new allocation slot
// mem.Menv.insert(addr, val.to_owned()); // write the new value
// let ref_val = Data::Pointer(addr);
// println!(
// "Ref exp {:?} e {:?} mem {:?} venv {:?}",
// exp, val, mem, venv
// );
// ref_val
// }
// }
// }
// Expr::Deref(exp) => {
// println!("-- Deref");
// let e = eval_expr(exp, mem, venv, fenv);
// println!("-- DereRef {:?} {:?}", exp, e);
// if let Data::Pointer(addr) = e {
// mem.Menv.get(&addr).unwrap().to_owned()
// } else {
// panic!("cannot deref {:?}", e);
// }
// }
// _ => unimplemented!(),
// }
// }
// pub fn eval_lvalue(exp: &Expr, mem: &mut Mem, venv: &Venv, fenv: &Fenv) -> Addr {
// println!("eval_lvalue {:?},{:?},{:?},{:?} ", exp, mem, venv, fenv);
// match exp {
// Expr::Id(id) => {
// let addr = venv.get(id).unwrap();
// println!("addr {:?}", addr);
// addr.to_owned()
// }
// Expr::Deref(exp) => {
// let lv = eval_expr(exp, mem, venv, fenv);
// println!("lv {:?}", lv);
// match eval_expr(exp, mem, venv, fenv) {
// Data::Pointer(addr) => addr,
// _ => panic!("cannot deref {:?}", exp),
// }
// }
// _ => unimplemented!(),
// }
// }
// // commands may return with a value
// // either directly (return) or
// // if inside an inner block (then/else, or while)
// pub fn menv_update(data: Data, menv: &mut Menv) {
// // match data {
// // Pointer::
// // }
// }
// pub fn dump(msg: &str, mem: &Mem, venv: &Venv) {
// println!("{:?} {:?} {:?}", msg, mem, venv);
// }
// // A return genaretes Some(Data) else None
// pub fn eval_cmd(cmd: &Cmd, mem: &mut Mem, venv: &mut Venv, fenv: &Fenv) -> Option<Data> {
// println!("{:?}", cmd);
// match cmd {
// Cmd::Assign(lexp, rexp) => {
// let rval = eval_expr(rexp, mem, venv, fenv);
// println!("val {:?}", rval);
// let addr = eval_lvalue(lexp, mem, venv, fenv);
// // println!("lval {:?}", lval);
// // let addr = venv.get(&lval).unwrap();
// mem.Menv.insert(addr, rval);
// None
// }
// Cmd::If(exp, then_block, opt_else) => {
// if get_bool(eval_expr(exp, mem, venv, fenv)) {
// eval_body(then_block.to_vec(), mem, venv, fenv)
// } else {
// if let Some(else_block) = opt_else {
// eval_body(else_block.to_vec(), mem, venv, fenv)
// } else {
// None
// }
// }
// }
// Cmd::Let(_, id, _, exp) => {
// let val = eval_expr(exp, mem, venv, fenv);
// println!("val {:?}", val);
// let addr = mem.alloc(); // get new allocation slot
// mem.Menv.insert(addr, val); // write the new value
// venv.insert(id.to_owned(), addr);
// dump("after Let", mem, venv);
// None
// }
// Cmd::Return(exp) => {
// let v = Some(eval_expr(exp, mem, venv, fenv));
// println!("return value {:?}", v);
// v
// }
// Cmd::While(exp, body) => {
// while get_bool(eval_expr(exp, mem, venv, fenv)) {
// if let Some(retv) = eval_body(body.to_vec(), mem, venv, fenv) {
// return Some(retv);
// }
// }
// None
// }
// }
// }
// pub fn eval_body(cmds: Vec<Cmd>, mem: &mut Mem, venv: &mut Venv, fenv: &Fenv) -> Option<Data> {
// for c in &cmds {
// if let Some(ret) = eval_cmd(c, mem, venv, fenv) {
// return Some(ret);
// }
// }
// None
// }
// pub fn build_env(prog: Prog) -> (Tenv, Fenv) {
// let mut tenv = Tenv::new();
// let mut fenv = Fenv::new();
// for i in prog {
// match i {
// Item::TypeDecl(TypeDecl::Struct(id, layout)) => {
// tenv.insert(id.clone(), TypeDecl::Struct(id, layout));
// }
// Item::Function(f) => {
// fenv.insert(f.sig.0.to_owned(), f);
// }
// }
// }
// (tenv, fenv)
// }
// pub fn eval_prog(prog: &str) {
// let (unparsed, prog) = parse_prog(prog).unwrap();
// println!("prog: {:?}", prog);
// println!("unparsed: {:?}", unparsed);
// let (tenv, fenv) = check_prog(&prog);
// println!("envs {:?}", (tenv, &fenv));
// // assume main does not take any parameters
// let call_main = Expr::Application("main".to_owned(), Vec::<Expr>::new());
// let mut mem = Mem::new();
// let mut venv = Venv::new();
// let ret = eval_expr(&call_main, &mut mem, &mut venv, &fenv);
// println!("return from main = {:?}", ret);
// println!("venv = {:?}", venv);
// println!("mem = {:?}", mem);
// }
// lib // lib
pub mod ast; pub mod ast;
pub mod interpreter;
pub mod parse; pub mod parse;
...@@ -6,16 +6,20 @@ use std::slice::Iter; ...@@ -6,16 +6,20 @@ use std::slice::Iter;
use nom::{ use nom::{
branch::alt, branch::alt,
bytes::complete::tag, bytes::complete::tag,
character::complete::char, character::complete::{
character::complete::{digit1, multispace0}, alpha1, alphanumeric0, char, digit1, multispace0, multispace1,
combinator::{cut, map}, },
combinator::{cut, map, opt},
error::ParseError, error::ParseError,
multi::many1, multi::{many1, separated_list},
sequence::{delimited, preceded}, sequence::{delimited, preceded, terminated, tuple},
IResult, IResult,
}; };
use crate::ast::{Expr, Op, Span, SpanExpr}; use crate::ast::{
Block, Cmd, Expr, Func, Item, Mutability, Op, Prog, Span, SpanCmd,
SpanExpr, SpanId, Type,
};
pub fn parse_i32(i: Span) -> IResult<Span, (Span, i32)> { pub fn parse_i32(i: Span) -> IResult<Span, (Span, i32)> {
map(digit1, |digit_str: Span| { map(digit1, |digit_str: Span| {
...@@ -41,15 +45,42 @@ fn parse_op(i: Span) -> IResult<Span, (Span, Op)> { ...@@ -41,15 +45,42 @@ fn parse_op(i: Span) -> IResult<Span, (Span, Op)> {
#[derive(Debug, Clone, PartialEq)] #[derive(Debug, Clone, PartialEq)]
pub enum Token<'a> { pub enum Token<'a> {
Num(i32), Num(i32),
Bool(bool),
Id(String),
Call(String, Vec<(Span<'a>, Vec<SpanToken<'a>>)>),
Par(Vec<SpanToken<'a>>), Par(Vec<SpanToken<'a>>),
Op(Op), Op(Op),
} }
type SpanToken<'a> = (Span<'a>, Token<'a>); type SpanToken<'a> = (Span<'a>, Token<'a>);
pub fn parse_id(i: Span) -> IResult<Span, Span> {
// an identifier needs to start with one or more alphas (head)
// followed by zero or more alphanumerics (tail)
map(
preceded(multispace0, tuple((alpha1, alphanumeric0, tag("")))),
// we concatenate the head and tail into a single String
|(_, _, end): (Span, Span, Span)| {
let mut res = i;
res.fragment = &i.fragment[..(end.offset - i.offset)];
res
},
)(i)
}
fn parse_terminal(i: Span) -> IResult<Span, SpanToken> { fn parse_terminal(i: Span) -> IResult<Span, SpanToken> {
alt(( alt((
map(parse_i32, |(s, v)| (s, Token::Num(v))), map(parse_i32, |(s, v)| (s, Token::Num(v))),
map(tag("true"), |s| (s, Token::Bool(true))),
map(tag("false"), |s| (s, Token::Bool(false))),
map(
tuple((
parse_id,
parse_par(separated_list(char(','), parse_tokens)),
)),
|(s, t)| (s, Token::Call(s.to_string(), t)),
),
map(parse_id, |s: Span| (s, Token::Id(s.to_string()))),
map(parse_par(parse_tokens), |(s, tokens)| { map(parse_par(parse_tokens), |(s, tokens)| {
(s, Token::Par(tokens)) (s, Token::Par(tokens))
}), }),
...@@ -71,8 +102,20 @@ fn parse_tokens(i: Span) -> IResult<Span, (Span, Vec<SpanToken>)> { ...@@ -71,8 +102,20 @@ fn parse_tokens(i: Span) -> IResult<Span, (Span, Vec<SpanToken>)> {
fn compute_atom<'a>(t: &mut Peekable<Iter<SpanToken<'a>>>) -> SpanExpr<'a> { fn compute_atom<'a>(t: &mut Peekable<Iter<SpanToken<'a>>>) -> SpanExpr<'a> {
match t.next() { match t.next() {
Some((s, Token::Num(i))) => (*s, Expr::Num(*i)), Some((s, Token::Num(i))) => (*s, Expr::Num(*i)),
Some((s, Token::Bool(b))) => (*s, Expr::Bool(*b)),
Some((s, Token::Id(id))) => (*s, Expr::Id(id.to_string())),
Some((_, Token::Par(v))) => climb(&mut v.iter().peekable(), 0), Some((_, Token::Par(v))) => climb(&mut v.iter().peekable(), 0),
Some((s, Token::Op(op))) => (*s, Expr::UnaryOp(*op, Box::new(climb(t, 4)))), // assume highest precedence Some((s, Token::Call(id, vv))) => {
//
let v: Vec<SpanExpr> = vv
.iter()
.map(|(span, t)| climb(&mut (*t).iter().peekable(), 0))
.collect();
(*s, Expr::Call(id.to_string(), v))
}
Some((s, Token::Op(op))) => {
(*s, Expr::UnaryOp(*op, Box::new(climb(t, 4))))
} // assume highest precedence
_ => panic!("error in compute atom"), _ => panic!("error in compute atom"),
} }
} }
...@@ -107,62 +150,160 @@ fn climb<'a>( ...@@ -107,62 +150,160 @@ fn climb<'a>(
result result
} }
pub fn test(s: &str, v: i32) { pub fn parse_expr(i: Span) -> IResult<Span, SpanExpr> {
match parse_tokens(Span::new(s)) { map(parse_tokens, |(_, tokens)| {
Ok((Span { fragment: "", .. }, (_, t))) => { climb(&mut tokens.iter().peekable(), 0)
let mut t = t.iter().peekable(); })(i)
println!("{:?}", &t);
let e = climb(&mut t, 0);
println!("{:?}", &e);
println!("eval {} {}", math_eval(&e), v);
assert_eq!(math_eval(&e), v);
} }
Ok((s, t)) => println!(
"parse incomplete, \n parsed tokens \t{:?}, \n remaining \t{:?}", fn parse_if(i: Span) -> IResult<Span, Cmd> {
t, s map(
preceded(
// here to avoid ambiguity with other names starting with `if`, if we added
// variables to our language, we say that if must be terminated by at least
// one whitespace character
terminated(tag("if"), multispace1),
cut(tuple((
parse_expr,
parse_block,
opt(preceded(preceded(multispace0, tag("else")), parse_block)),
))),
), ),
Err(err) => println!("{:?}", err), |(pred, true_branch, maybe_false_branch)| {
Cmd::If(pred, true_branch, maybe_false_branch)
},
)(i)
} }
pub fn parse_let(i: Span) -> IResult<Span, Cmd> {
map(
preceded(
// here to avoid ambiguity with other names starting with `let`, if we added
// variables to our language, we say that if must be terminated by at least
// one whitespace character
terminated(tag("let"), multispace1),
cut(tuple((
opt(preceded(multispace0, terminated(tag("mut"), multispace1))),
parse_id,
preceded(preceded(multispace0, tag(":")), parse_type),
preceded(preceded(multispace0, tag("=")), parse_expr),
))),
),
|(m, id, t, expr)| {
Cmd::Let(
if m.is_some() {
Mutability::Mut
} else {
Mutability::Imm
},
id,
t,
expr,
)
},
)(i)
} }
// helpers pub fn parse_assign<'a>(i: Span<'a>) -> IResult<Span<'a>, Cmd<'a>> {
fn parse_par<'a, O, F, E>( map(
inner: F, // here to avoid ambiguity with other names starting with `let`, if we added
) -> impl Fn(Span<'a>) -> IResult<Span<'a>, O, E> // variables to our language, we say that if must be terminated by at least
where // one whitespace character
F: Fn(Span<'a>) -> IResult<Span<'a>, O, E>, tuple((
E: ParseError<Span<'a>>, parse_expr,
{ preceded(preceded(multispace0, tag("=")), parse_expr),
// delimited allows us to split up the input )),
// cut allwos us to consume the input (and prevent backtracking) |(id_expr, expr)| Cmd::Assign(id_expr, expr),
delimited(char('('), preceded(multispace0, inner), cut(char(')'))) )(i)
} }
fn math_eval(e: &SpanExpr) -> i32 { pub fn parse_return(i: Span) -> IResult<Span, Cmd> {
match e.clone().1 { map(
Expr::Num(i) => i, preceded(terminated(tag("return"), multispace1), parse_expr),
Expr::BinOp(op, l, r) => { |expr| Cmd::Return(expr),
let lv = math_eval(&l); )(i)
let rv = math_eval(&r); }
match op {
Op::Add => lv + rv, pub fn parse_while(i: Span) -> IResult<Span, Cmd> {
Op::Sub => lv - rv, map(
Op::Mul => lv * rv, preceded(
Op::Div => lv / rv, // here to avoid ambiguity with other names starting with `let`, if we added
Op::Pow => lv.pow(rv as u32), // variables to our language, we say that if must be terminated by at least
_ => unimplemented!(), // one whitespace character
terminated(tag("while"), multispace1),
cut(tuple((parse_expr, parse_block))),
),
|(pred, body)| Cmd::While(pred, body),
)(i)
} }
// pub fn parse_cmd<'a>(i: Span<'a>) -> IResult<Span<'a>, Cmd<'a>> {
pub fn parse_cmd(i: Span) -> IResult<Span, Cmd> {
preceded(
multispace0,
//parse_assign,
alt((parse_while, parse_let, parse_if, parse_assign, parse_return)),
)(i)
} }
Expr::UnaryOp(op, e) => {
let e = math_eval(&e); pub fn parse_block(i: Span) -> IResult<Span, Block> {
match op { preceded(multispace0, parse_sem(separated_list(tag(";"), parse_cmd)))(i)
Op::Add => e, }
Op::Sub => -e,
_ => unimplemented!(), pub fn parse_type(i: Span) -> IResult<Span, Type> {
preceded(
multispace0,
alt((
map(tag("i32"), |_| Type::I32),
map(tag("bool"), |_| Type::Bool),
map(preceded(tag("&"), parse_type), |t| Type::Ref(Box::new(t))),
map(
preceded(terminated(tag("mut"), multispace1), parse_type),
|t| Type::Mut(Box::new(t)),
),
)),
)(i)
} }
pub fn parse_par_decls(i: Span) -> IResult<Span, Vec<(SpanId, Type)>> {
parse_par(separated_list(tag(","), parse_par_decl))(i)
} }
_ => unimplemented!(),
pub fn parse_par_decl(i: Span) -> IResult<Span, (SpanId, Type)> {
map(
tuple((
opt(preceded(multispace0, terminated(tag("mut"), multispace1))),
parse_id,
preceded(multispace0, tag(":")),
parse_type,
)),
|(b, id, _, t)| (id, t),
)(i)
}
pub fn parse_function_decl(i: Span) -> IResult<Span, Func> {
map(
preceded(
preceded(multispace0, terminated(tag("fn"), multispace1)),
tuple((
parse_id,
parse_par(separated_list(tag(","), parse_par_decl)),
opt(preceded(
preceded(multispace0, terminated(tag("->"), multispace1)),
parse_type,
)),
parse_block,
)),
),
|(id, par, ret, body)| Func {
sig: (id, par, ret.unwrap_or(Type::Unit)),
body: body,
},
)(i)
} }
pub fn parse_prog(i: Span) -> IResult<Span, Prog> {
separated_list(multispace0, map(parse_function_decl, |f| Item::Func(f)))(i)
} }
#[derive(Debug, Copy, Clone, PartialEq)] #[derive(Debug, Copy, Clone, PartialEq)]
...@@ -181,3 +322,36 @@ fn get_prec(op: &Op) -> (u8, Ass) { ...@@ -181,3 +322,36 @@ fn get_prec(op: &Op) -> (u8, Ass) {
_ => unimplemented!(), _ => unimplemented!(),
} }
} }
// helpers
fn parse_par<'a, O, F, E>(
inner: F,
) -> impl Fn(Span<'a>) -> IResult<Span<'a>, O, E>
where
F: Fn(Span<'a>) -> IResult<Span<'a>, O, E>,
E: ParseError<Span<'a>>,
{
// delimited allows us to split up the input
// cut allwos us to consume the input (and prevent backtracking)
delimited(
char('('),
preceded(multispace0, inner),
cut(preceded(multispace0, char(')'))),
)
}
fn parse_sem<'a, O, F, E>(
inner: F,
) -> impl Fn(Span<'a>) -> IResult<Span<'a>, O, E>
where
F: Fn(Span<'a>) -> IResult<Span<'a>, O, E>,
E: ParseError<Span<'a>>,
{
// delimited allows us to split up the input
// cut allwos us to consume the input (and prevent backtracking)
delimited(
char('{'),
preceded(multispace0, inner),
cut(preceded(multispace0, char('}'))),
)
}