From e5a735d5beec2abb008793b94173cfa25aab2b13 Mon Sep 17 00:00:00 2001 From: Per Lindgren <per.lindgren@ltu.se> Date: Fri, 20 Sep 2019 22:40:02 +0200 Subject: [PATCH] wip (span working) --- examples/climb.rs | 6 +- examples/main_locate.rs | 3 +- examples/main_span_expr_custom_err.rs | 6 +- examples/tmp.rs | 20 +- src/ast.rs | 60 ++--- src/lib.rs | 2 +- src/parse.rs | 344 +++++++++++--------------- 7 files changed, 195 insertions(+), 246 deletions(-) diff --git a/examples/climb.rs b/examples/climb.rs index de59ff6..0b2ff34 100644 --- a/examples/climb.rs +++ b/examples/climb.rs @@ -138,7 +138,7 @@ fn test(s: &str, v: i32) { } fn main() { - test("- -1 + + 1", - -1 + 1); // rust does not allow + as a unary op (I do ;) + test("- -1 + + 1", --1 + 1); // rust does not allow + as a unary op (I do ;) test("(-1-1)+(-1+3)", (-1 - 1) + (-1) + 3); // just to check that right associative works (you don't need to implement pow) test("2+3**2**3*5+1", 2 + 3i32.pow(2u32.pow(3)) * 5 + 1); @@ -149,9 +149,7 @@ fn main() { } // helpers -fn parse_par<'a, O, F, E>( - inner: F, -) -> impl Fn(&'a str) -> IResult<&'a str, O, E> +fn parse_par<'a, O, F, E>(inner: F) -> impl Fn(&'a str) -> IResult<&'a str, O, E> where F: Fn(&'a str) -> IResult<&'a str, O, E>, E: ParseError<&'a str>, diff --git a/examples/main_locate.rs b/examples/main_locate.rs index fdf884f..dca10e8 100644 --- a/examples/main_locate.rs +++ b/examples/main_locate.rs @@ -26,7 +26,7 @@ pub enum Expr<'a> { BinOp(Span<'a>, Box<Expr<'a>>, Op, Box<Expr<'a>>), } -// this is the definititon of IResult +// this is the definititon of IResult // type IResult<I, O, E = u32> = Result<(I, O), Err<I, E>>; pub fn parse_i32(i: Span) -> IResult<Span, Expr> { @@ -81,7 +81,6 @@ fn test_parse_i32_1() { ); } - fn main() { let (a, b) = parse_expr(Span::new("1")).unwrap(); println!("{:?}", parse_expr(Span::new("1"))); diff --git a/examples/main_span_expr_custom_err.rs b/examples/main_span_expr_custom_err.rs index f8a3b58..12f013c 100644 --- a/examples/main_span_expr_custom_err.rs +++ b/examples/main_span_expr_custom_err.rs @@ -61,7 +61,11 @@ pub fn parse_i32<'a>(i: Span<'a>) -> IResult<Span<'a>, SpanExpr> { let (i, digits) = digit1(i)?; match digits.fragment.parse() { Ok(int) => Ok((i, (digits, Expr::Num(int)))), - Err(e) => Err(Err::Failure(Error(i, Some(digits), ErrorKind::ParseIntError(e)))), + Err(e) => Err(Err::Failure(Error( + i, + Some(digits), + ErrorKind::ParseIntError(e), + ))), } } diff --git a/examples/tmp.rs b/examples/tmp.rs index 3a1280c..79fbb74 100644 --- a/examples/tmp.rs +++ b/examples/tmp.rs @@ -1,7 +1,7 @@ use crust::{ ast::Span, - parse::{parse_assign, parse_expr}, - interpreter::{eval_expr} + interpreter::eval_expr, + parse::{parse_assign, parse_expr, parse_prog}, }; fn test(s: &str, v: i32) { @@ -19,7 +19,7 @@ fn test(s: &str, v: i32) { } } -fn main() { +fn test_expr() { // test("- -1 + + 1", - -1 + 1); // rust does not allow + as a unary op (I do ;) // test("(-1-1)+(-1+3)", (-1 - 1) + (-1) + 3); // // just to check that right associative works (you don't need to implement pow) @@ -28,6 +28,16 @@ fn main() { // test("1*2+3", 1 * 2 + 3); // // just to check that we get a parse error // test("1*2+3+3*21-a12+2", 1 * 2 + 3 + 3 * 21 - 12 + 2); - test("1 + (1 - 2)", 1 + (1 - 2) ); - println!("{:?}", parse_assign(Span::new("3 = a(1, 2+3)"))); + test("1 + (1 - 2)", 1 + (1 - 2)); +} + +fn main() { + // println!("{:?}", parse_assign(Span::new("3 = a(1, 2+3)"))); + println!( + "{:?}", + parse_prog(Span::new( + " + fn main() { let a:i32 = 1} " + )) + ); } diff --git a/src/ast.rs b/src/ast.rs index f445e1c..2d8a71c 100644 --- a/src/ast.rs +++ b/src/ast.rs @@ -20,6 +20,8 @@ pub enum Op { type SpanOp<'a> = (Span<'a>, Op); +pub type SpanId<'a> = Span<'a>; + #[derive(Debug, Clone, PartialEq)] pub enum Expr<'a> { Num(i32), @@ -36,14 +38,14 @@ pub type SpanExpr<'a> = (Span<'a>, Expr<'a>); #[derive(Debug, PartialEq, Clone)] pub enum Cmd<'a> { // let <mut> id : <& <mut>>Type = expr - Let(SpanMut<'a>, String, SpanType<'a>, SpanExpr<'a>), + Let(Mutability, SpanId<'a>, Type, SpanExpr<'a>), // id = expr Assign(SpanExpr<'a>, SpanExpr<'a>), // if predicate do-this, and optionally do-that) - If(SpanExpr<'a>, SpanBlock<'a>, Option<SpanBlock<'a>>), - // /// while predicate do-this - // While(Expr, Block), - // Return(Expr), + If(SpanExpr<'a>, Block<'a>, Option<Block<'a>>), + // while predicate do-this + While(SpanExpr<'a>, Block<'a>), + Return(SpanExpr<'a>), } pub type SpanCmd<'a> = (Span<'a>, Cmd<'a>); @@ -53,52 +55,32 @@ pub enum Mutability { Imm, Mut, } - pub type SpanMut<'a> = (Span<'a>, Mutability); -// #[derive(Debug, PartialEq, Clone)] -// pub enum Cmd { -// /// let <mut> id : <& <mut>>Type = expr -// Let(Mutability, String, Type, Expr), -// /// id = expr -// Assign(Expr, Expr), -// /// if predicate do-this, and optionally do-that) -// If(Expr, Block, Option<Block>), -// /// while predicate do-this -// While(Expr, Block), -// Return(Expr), -// } - pub type SpanBlock<'a> = (Span<'a>, Vec<SpanCmd<'a>>); pub type Block<'a> = Vec<Cmd<'a>>; #[derive(Debug, PartialEq, Clone)] -pub enum Type<'a> { +pub enum Type { I32, Bool, Unit, - Mut(Box<SpanType<'a>>), - Ref(Box<SpanType<'a>>), + Mut(Box<Type>), + Ref(Box<Type>), // no structs } -pub type SpanType<'a> = (Span<'a>, Type<'a>); - -// #[derive(Debug, PartialEq, Clone)] -// pub enum TypeDecl { -// Struct(String, Vec<(String, Type)>), -// } +// pub type SpanType<'a> = (Span<'a>, Type<'a>); -// #[derive(Debug, PartialEq, Clone)] -// pub struct Function { -// pub sig: (String, Vec<(String, Type)>, Type), -// pub body: Block, -// } +#[derive(Debug, PartialEq, Clone)] +pub struct Func<'a> { + pub sig: (SpanId<'a>, Vec<(SpanId<'a>, Type)>, Type), + pub body: Block<'a>, +} -// #[derive(Debug, PartialEq, Clone)] -// pub enum Item { -// TypeDecl(TypeDecl), -// Function(Function), -// } +#[derive(Debug, PartialEq, Clone)] +pub enum Item<'a> { + Func(Func<'a>), +} -// pub type Prog = Vec<Item>; +pub type Prog<'a> = Vec<Item<'a>>; diff --git a/src/lib.rs b/src/lib.rs index 56c2ccd..94134f5 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -1,5 +1,5 @@ // lib pub mod ast; -pub mod parse; pub mod interpreter; +pub mod parse; diff --git a/src/parse.rs b/src/parse.rs index 66e9823..50897e9 100644 --- a/src/parse.rs +++ b/src/parse.rs @@ -6,7 +6,7 @@ use std::slice::Iter; use nom::{ branch::alt, bytes::complete::tag, - character::complete::{alpha1, char, digit1, multispace0, multispace1}, + character::complete::{alpha1, alphanumeric0, char, digit1, multispace0, multispace1}, combinator::{cut, map, opt}, error::ParseError, multi::{many1, separated_list}, @@ -14,7 +14,9 @@ use nom::{ IResult, }; -use crate::ast::{Cmd, Expr, Op, Span, SpanCmd, SpanExpr, Block}; +use crate::ast::{ + Block, Cmd, Expr, Func, Item, Mutability, Op, Prog, Span, SpanCmd, SpanExpr, SpanId, Type, +}; pub fn parse_i32(i: Span) -> IResult<Span, (Span, i32)> { map(digit1, |digit_str: Span| { @@ -49,16 +51,26 @@ pub enum Token<'a> { type SpanToken<'a> = (Span<'a>, Token<'a>); +pub fn parse_id(i: Span) -> IResult<Span, Span> { + // an identifier needs to start with one or more alphas (head) + // followed by zero or more alphanumerics (tail) + map( + preceded(multispace0, tuple((alpha1, alphanumeric0))), + // we concatenate the head and tail into a single String + |(_, _)| i, // head.to_string() + &tail.to_string(), + )(i) +} + fn parse_terminal(i: Span) -> IResult<Span, SpanToken> { alt(( map(parse_i32, |(s, v)| (s, Token::Num(v))), map(tag("true"), |s| (s, Token::Bool(true))), map(tag("false"), |s| (s, Token::Bool(false))), map( - tuple((alpha1, parse_par(separated_list(char(','), parse_tokens)))), + tuple((parse_id, parse_par(separated_list(char(','), parse_tokens)))), |(s, t)| (s, Token::Call(s.to_string(), t)), ), - map(alpha1, |s: Span| (s, Token::Id(s.to_string()))), + map(parse_id, |s: Span| (s, Token::Id(s.to_string()))), map(parse_par(parse_tokens), |(s, tokens)| { (s, Token::Par(tokens)) }), @@ -129,78 +141,52 @@ pub fn parse_expr(i: Span) -> IResult<Span, SpanExpr> { })(i) } -// fn parse_if(i: Span) -> IResult<Span, SpanCmd> { -// map( -// preceded( -// // here to avoid ambiguity with other names starting with `if`, if we added -// // variables to our language, we say that if must be terminated by at least -// // one whitespace character -// terminated(tag("if"), multispace1), -// cut(tuple(( -// parse_expr, -// parse_block, -// opt(preceded(preceded(multispace0, tag("else")), parse_block)), -// ))), -// ), -// |(pred, true_branch, maybe_false_branch)| { -// Cmd::If(pred, true_branch, maybe_false_branch) -// }, -// )(i) -// } - -// helpers -fn parse_par<'a, O, F, E>(inner: F) -> impl Fn(Span<'a>) -> IResult<Span<'a>, O, E> -where - F: Fn(Span<'a>) -> IResult<Span<'a>, O, E>, - E: ParseError<Span<'a>>, -{ - // delimited allows us to split up the input - // cut allwos us to consume the input (and prevent backtracking) - delimited(char('('), preceded(multispace0, inner), cut(char(')'))) +fn parse_if(i: Span) -> IResult<Span, Cmd> { + map( + preceded( + // here to avoid ambiguity with other names starting with `if`, if we added + // variables to our language, we say that if must be terminated by at least + // one whitespace character + terminated(tag("if"), multispace1), + cut(tuple(( + parse_expr, + parse_block, + opt(preceded(preceded(multispace0, tag("else")), parse_block)), + ))), + ), + |(pred, true_branch, maybe_false_branch)| Cmd::If(pred, true_branch, maybe_false_branch), + )(i) } -fn parse_sem<'a, O, F, E>(inner: F) -> impl Fn(Span<'a>) -> IResult<Span<'a>, O, E> -where - F: Fn(Span<'a>) -> IResult<Span<'a>, O, E>, - E: ParseError<Span<'a>>, -{ - // delimited allows us to split up the input - // cut allwos us to consume the input (and prevent backtracking) - delimited(char('{'), preceded(multispace0, inner), cut(char('}'))) +pub fn parse_let(i: Span) -> IResult<Span, Cmd> { + map( + preceded( + // here to avoid ambiguity with other names starting with `let`, if we added + // variables to our language, we say that if must be terminated by at least + // one whitespace character + terminated(tag("let"), multispace1), + cut(tuple(( + opt(preceded(multispace0, terminated(tag("mut"), multispace1))), + parse_id, + preceded(preceded(multispace0, tag(":")), parse_type), + preceded(preceded(multispace0, tag("=")), parse_expr), + ))), + ), + |(m, id, t, expr)| { + Cmd::Let( + if m.is_some() { + Mutability::Mut + } else { + Mutability::Imm + }, + id, + t, + expr, + ) + }, + )(i) } -// pub fn parse_let(i: &str) -> IResult<&str, Cmd, VerboseError<&str>> { -// context( -// "let expression", -// map( -// preceded( -// // here to avoid ambiguity with other names starting with `let`, if we added -// // variables to our language, we say that if must be terminated by at least -// // one whitespace character -// terminated(tag("let"), multispace1), -// cut(tuple(( -// opt(preceded(multispace0, terminated(tag("mut"), multispace1))), -// parse_id, -// preceded(preceded(multispace0, tag(":")), parse_type), -// preceded(preceded(multispace0, tag("=")), parse_expr), -// ))), -// ), -// |(m, id, t, expr)| { -// Cmd::Let( -// if m.is_some() { -// Mutability::Mut -// } else { -// Mutability::Imm -// }, -// id, -// t, -// expr, -// ) -// }, -// ), -// )(i) -// } - pub fn parse_assign<'a>(i: Span<'a>) -> IResult<Span<'a>, Cmd<'a>> { map( // here to avoid ambiguity with other names starting with `let`, if we added @@ -214,38 +200,32 @@ pub fn parse_assign<'a>(i: Span<'a>) -> IResult<Span<'a>, Cmd<'a>> { )(i) } -// pub fn parse_return(i: &str) -> IResult<&str, Cmd, VerboseError<&str>> { -// context( -// "assign", -// map( -// preceded(terminated(tag("return"), multispace1), parse_expr), -// |expr| Cmd::Return(expr), -// ), -// )(i) -// } +pub fn parse_return(i: Span) -> IResult<Span, Cmd> { + map( + preceded(terminated(tag("return"), multispace1), parse_expr), + |expr| Cmd::Return(expr), + )(i) +} -// pub fn parse_while(i: &str) -> IResult<&str, Cmd, VerboseError<&str>> { -// context( -// "while expression", -// map( -// preceded( -// // here to avoid ambiguity with other names starting with `let`, if we added -// // variables to our language, we say that if must be terminated by at least -// // one whitespace character -// terminated(tag("while"), multispace1), -// cut(tuple((parse_expr, parse_block))), -// ), -// |(pred, body)| Cmd::While(pred, body), -// ), -// )(i) -// } +pub fn parse_while(i: Span) -> IResult<Span, Cmd> { + map( + preceded( + // here to avoid ambiguity with other names starting with `let`, if we added + // variables to our language, we say that if must be terminated by at least + // one whitespace character + terminated(tag("while"), multispace1), + cut(tuple((parse_expr, parse_block))), + ), + |(pred, body)| Cmd::While(pred, body), + )(i) +} // pub fn parse_cmd<'a>(i: Span<'a>) -> IResult<Span<'a>, Cmd<'a>> { pub fn parse_cmd(i: Span) -> IResult<Span, Cmd> { preceded( multispace0, - parse_assign, - // alt((parse_while, parse_let, parse_if, parse_assign, parse_return)), + //parse_assign, + alt((parse_while, parse_let, parse_if, parse_assign, parse_return)), )(i) } @@ -253,106 +233,61 @@ pub fn parse_block(i: Span) -> IResult<Span, Block> { preceded(multispace0, parse_sem(separated_list(tag(";"), parse_cmd)))(i) } -// fn s_cmd<'a, O, F>(inner: F) -> impl Fn(&'a str) -> IResult<&'a str, O, VerboseError<&'a str>> -// where -// F: Fn(&'a str) -> IResult<&'a str, O, VerboseError<&'a str>>, -// { -// // delimited allows us to split up the input -// // cut allows us to consume the input (and prevent backtracking) -// preceded( -// multispace0, -// delimited( -// char('{'), -// preceded(multispace0, inner), -// context( -// "closing curly bracket", -// cut(preceded(multispace0, char('}'))), -// ), -// ), -// ) -// } - -// pub fn parse_type(i: &str) -> IResult<&str, Type, VerboseError<&str>> { -// preceded( -// multispace0, -// alt(( -// map(tag("i32"), |_| Type::I32), -// map(tag("bool"), |_| Type::Bool), -// map(preceded(tag("&"), parse_type), |t| Type::Ref(Box::new(t))), -// map( -// preceded(terminated(tag("mut"), multispace1), parse_type), -// |t| Type::Mut(Box::new(t)), -// ), -// )), -// )(i) -// } - -// pub fn parse_field_decl(i: &str) -> IResult<&str, (String, Type), VerboseError<&str>> { -// map( -// tuple((parse_id, preceded(multispace0, tag(":")), parse_type)), -// |(l, _, r)| (l, r), -// )(i) -// } - -// pub fn parse_field_decls(i: &str) -> IResult<&str, Vec<(String, Type)>, VerboseError<&str>> { -// s_cmd(separated_list(tag(","), parse_field_decl))(i) -// } - -// // pub fn parse_par_decls(i: &str) -> IResult<&str, Vec<(String, Type)>, VerboseError<&str>> { -// // s_cmd(separated_list(tag(","), parse_par_decl))(i) -// // } +pub fn parse_type(i: Span) -> IResult<Span, Type> { + preceded( + multispace0, + alt(( + map(tag("i32"), |_| Type::I32), + map(tag("bool"), |_| Type::Bool), + map(preceded(tag("&"), parse_type), |t| Type::Ref(Box::new(t))), + map( + preceded(terminated(tag("mut"), multispace1), parse_type), + |t| Type::Mut(Box::new(t)), + ), + )), + )(i) +} -// pub fn parse_par_decl(i: &str) -> IResult<&str, (String, Type), VerboseError<&str>> { -// map( -// tuple(( -// opt(preceded(multispace0, terminated(tag("mut"), multispace1))), -// parse_id, -// preceded(multispace0, tag(":")), -// parse_type, -// )), -// |(b, id, _, t)| (id, t), -// )(i) -// } +pub fn parse_par_decls(i: Span) -> IResult<Span, Vec<(SpanId, Type)>> { + parse_par(separated_list(tag(","), parse_par_decl))(i) +} -// pub fn parse_type_decl(i: &str) -> IResult<&str, TypeDecl, VerboseError<&str>> { -// preceded( -// preceded(multispace0, terminated(tag("struct"), multispace1)), -// map(tuple((parse_id, parse_field_decls)), |(id, fields)| { -// TypeDecl::Struct(id, fields) -// }), -// )(i) -// } +pub fn parse_par_decl(i: Span) -> IResult<Span, (SpanId, Type)> { + map( + tuple(( + opt(preceded(multispace0, terminated(tag("mut"), multispace1))), + parse_id, + preceded(multispace0, tag(":")), + parse_type, + )), + |(b, id, _, t)| (id, t), + )(i) +} -// pub fn parse_function_decl(i: &str) -> IResult<&str, Function, VerboseError<&str>> { -// map( -// preceded( -// preceded(multispace0, terminated(tag("fn"), multispace1)), -// tuple(( -// parse_id, -// s_exp(separated_list(tag(","), parse_par_decl)), -// opt(preceded( -// preceded(multispace0, terminated(tag("->"), multispace1)), -// parse_type, -// )), -// parse_block, -// )), -// ), -// |(id, par, ret, body)| Function { -// sig: (id, par, ret.unwrap_or(Type::Unit)), -// body: body, -// }, -// )(i) -// } +pub fn parse_function_decl(i: Span) -> IResult<Span, Func> { + map( + preceded( + preceded(multispace0, terminated(tag("fn"), multispace1)), + tuple(( + parse_id, + parse_par(separated_list(tag(","), parse_par_decl)), + opt(preceded( + preceded(multispace0, terminated(tag("->"), multispace1)), + parse_type, + )), + parse_block, + )), + ), + |(id, par, ret, body)| Func { + sig: (id, par, ret.unwrap_or(Type::Unit)), + body: body, + }, + )(i) +} -// pub fn parse_prog(i: &str) -> IResult<&str, Prog, VerboseError<&str>> { -// separated_list( -// multispace0, -// alt(( -// map(parse_function_decl, |f| Item::Function(f)), -// map(parse_type_decl, |t| Item::TypeDecl(t)), -// )), -// )(i) -// } +pub fn parse_prog(i: Span) -> IResult<Span, Prog> { + separated_list(multispace0, map(parse_function_decl, |f| Item::Func(f)))(i) +} #[derive(Debug, Copy, Clone, PartialEq)] enum Ass { @@ -370,3 +305,24 @@ fn get_prec(op: &Op) -> (u8, Ass) { _ => unimplemented!(), } } + +// helpers +fn parse_par<'a, O, F, E>(inner: F) -> impl Fn(Span<'a>) -> IResult<Span<'a>, O, E> +where + F: Fn(Span<'a>) -> IResult<Span<'a>, O, E>, + E: ParseError<Span<'a>>, +{ + // delimited allows us to split up the input + // cut allwos us to consume the input (and prevent backtracking) + delimited(char('('), preceded(multispace0, inner), cut(char(')'))) +} + +fn parse_sem<'a, O, F, E>(inner: F) -> impl Fn(Span<'a>) -> IResult<Span<'a>, O, E> +where + F: Fn(Span<'a>) -> IResult<Span<'a>, O, E>, + E: ParseError<Span<'a>>, +{ + // delimited allows us to split up the input + // cut allwos us to consume the input (and prevent backtracking) + delimited(char('{'), preceded(multispace0, inner), cut(char('}'))) +} -- GitLab