Skip to content
Snippets Groups Projects
Select Git revision
  • b2c59e2a7f66495723fd53f0d70fb34470448a75
  • master default protected
  • cro
  • v0.1.0
4 results

parse.rs

Blame
  • parse.rs 9.99 KiB
    extern crate nom;
    
    use std::iter::Peekable;
    use std::slice::Iter;
    
    use nom::{
        branch::alt,
        bytes::complete::tag,
        character::complete::{alpha1, alphanumeric0, char, digit1, multispace0, multispace1},
        combinator::{cut, map, opt},
        error::ParseError,
        multi::{many1, separated_list},
        sequence::{delimited, preceded, terminated, tuple},
        IResult,
    };
    
    use crate::ast::{
        Block, Cmd, Expr, Func, Item, Mutability, Op, Prog, Span, SpanCmd, SpanExpr, SpanId, Type,
    };
    
    pub fn parse_i32(i: Span) -> IResult<Span, (Span, i32)> {
        map(digit1, |digit_str: Span| {
            (digit_str, digit_str.fragment.parse::<i32>().unwrap())
        })(i)
    }
    
    fn parse_op(i: Span) -> IResult<Span, (Span, Op)> {
        alt((
            map(tag("=="), |s| (s, Op::Eq)),
            map(tag("!="), |s| (s, Op::Neq)),
            map(tag("**"), |s| (s, Op::Pow)),
            map(tag("&&"), |s| (s, Op::And)),
            map(tag("||"), |s| (s, Op::Or)),
            map(tag("+"), |s| (s, Op::Add)),
            map(tag("-"), |s| (s, Op::Sub)),
            map(tag("*"), |s| (s, Op::Mul)),
            map(tag("/"), |s| (s, Op::Div)),
            map(tag("!"), |s| (s, Op::Not)),
        ))(i)
    }
    
    #[derive(Debug, Clone, PartialEq)]
    pub enum Token<'a> {
        Num(i32),
        Bool(bool),
        Id(String),
        Call(String, Vec<(Span<'a>, Vec<SpanToken<'a>>)>),
        Par(Vec<SpanToken<'a>>),
        Op(Op),
    }
    
    type SpanToken<'a> = (Span<'a>, Token<'a>);
    
    pub fn parse_id(i: Span) -> IResult<Span, Span> {
        // an identifier needs to start with one or more alphas (head)
        // followed by zero or more alphanumerics (tail)
        map(
            preceded(multispace0, tuple((alpha1, alphanumeric0))),
            // we concatenate the head and tail into a single String
            |(_, _)| i, // head.to_string() + &tail.to_string(),
        )(i)
    }
    
    fn parse_terminal(i: Span) -> IResult<Span, SpanToken> {
        alt((
            map(parse_i32, |(s, v)| (s, Token::Num(v))),
            map(tag("true"), |s| (s, Token::Bool(true))),
            map(tag("false"), |s| (s, Token::Bool(false))),
            map(
                tuple((parse_id, parse_par(separated_list(char(','), parse_tokens)))),
                |(s, t)| (s, Token::Call(s.to_string(), t)),
            ),
            map(parse_id, |s: Span| (s, Token::Id(s.to_string()))),
            map(parse_par(parse_tokens), |(s, tokens)| {
                (s, Token::Par(tokens))
            }),
        ))(i)
    }
    
    fn parse_token(i: Span) -> IResult<Span, SpanToken> {
        preceded(
            multispace0,
            alt((map(parse_op, |(s, op)| (s, Token::Op(op))), parse_terminal)),
        )(i)
    }
    
    // I think the outer span is wrong
    fn parse_tokens(i: Span) -> IResult<Span, (Span, Vec<SpanToken>)> {
        map(many1(parse_token), |tokens| (i, tokens))(i)
    }
    
    fn compute_atom<'a>(t: &mut Peekable<Iter<SpanToken<'a>>>) -> SpanExpr<'a> {
        match t.next() {
            Some((s, Token::Num(i))) => (*s, Expr::Num(*i)),
            Some((s, Token::Bool(b))) => (*s, Expr::Bool(*b)),
            Some((s, Token::Id(id))) => (*s, Expr::Id(id.to_string())),
            Some((_, Token::Par(v))) => climb(&mut v.iter().peekable(), 0),
            Some((s, Token::Call(id, vv))) => {
                //
                let v: Vec<SpanExpr> = vv
                    .iter()
                    .map(|(span, t)| climb(&mut (*t).iter().peekable(), 0))
                    .collect();
                (*s, Expr::Call(id.to_string(), v))
            }
            Some((s, Token::Op(op))) => (*s, Expr::UnaryOp(*op, Box::new(climb(t, 4)))), // assume highest precedence
            _ => panic!("error in compute atom"),
        }
    }
    
    fn climb<'a>(t: &mut Peekable<Iter<SpanToken<'a>>>, min_prec: u8) -> SpanExpr<'a> {
        let mut result: SpanExpr = compute_atom(t);
    
        loop {
            match t.peek() {
                Some((s, Token::Op(op))) => {
                    let (prec, ass) = get_prec(op);
                    if prec < min_prec {
                        break;
                    };
                    let next_prec = prec
                        + match ass {
                            Ass::Left => 1,
                            _ => 0,
                        };
                    t.next();
                    let rhs = climb(t, next_prec);
                    result = (*s, Expr::BinOp(*op, Box::new(result), Box::new(rhs)))
                }
                _ => {
                    break;
                }
            }
        }
        result
    }
    
    pub fn parse_expr(i: Span) -> IResult<Span, SpanExpr> {
        map(parse_tokens, |(_, tokens)| {
            climb(&mut tokens.iter().peekable(), 0)
        })(i)
    }
    
    fn parse_if(i: Span) -> IResult<Span, Cmd> {
        map(
            preceded(
                // here to avoid ambiguity with other names starting with `if`, if we added
                // variables to our language, we say that if must be terminated by at least
                // one whitespace character
                terminated(tag("if"), multispace1),
                cut(tuple((
                    parse_expr,
                    parse_block,
                    opt(preceded(preceded(multispace0, tag("else")), parse_block)),
                ))),
            ),
            |(pred, true_branch, maybe_false_branch)| Cmd::If(pred, true_branch, maybe_false_branch),
        )(i)
    }
    
    pub fn parse_let(i: Span) -> IResult<Span, Cmd> {
        map(
            preceded(
                // here to avoid ambiguity with other names starting with `let`, if we added
                // variables to our language, we say that if must be terminated by at least
                // one whitespace character
                terminated(tag("let"), multispace1),
                cut(tuple((
                    opt(preceded(multispace0, terminated(tag("mut"), multispace1))),
                    parse_id,
                    preceded(preceded(multispace0, tag(":")), parse_type),
                    preceded(preceded(multispace0, tag("=")), parse_expr),
                ))),
            ),
            |(m, id, t, expr)| {
                Cmd::Let(
                    if m.is_some() {
                        Mutability::Mut
                    } else {
                        Mutability::Imm
                    },
                    id,
                    t,
                    expr,
                )
            },
        )(i)
    }
    
    pub fn parse_assign<'a>(i: Span<'a>) -> IResult<Span<'a>, Cmd<'a>> {
        map(
            // here to avoid ambiguity with other names starting with `let`, if we added
            // variables to our language, we say that if must be terminated by at least
            // one whitespace character
            tuple((
                parse_expr,
                preceded(preceded(multispace0, tag("=")), parse_expr),
            )),
            |(id_expr, expr)| Cmd::Assign(id_expr, expr),
        )(i)
    }
    
    pub fn parse_return(i: Span) -> IResult<Span, Cmd> {
        map(
            preceded(terminated(tag("return"), multispace1), parse_expr),
            |expr| Cmd::Return(expr),
        )(i)
    }
    
    pub fn parse_while(i: Span) -> IResult<Span, Cmd> {
        map(
            preceded(
                // here to avoid ambiguity with other names starting with `let`, if we added
                // variables to our language, we say that if must be terminated by at least
                // one whitespace character
                terminated(tag("while"), multispace1),
                cut(tuple((parse_expr, parse_block))),
            ),
            |(pred, body)| Cmd::While(pred, body),
        )(i)
    }
    
    // pub fn parse_cmd<'a>(i: Span<'a>) -> IResult<Span<'a>, Cmd<'a>> {
    pub fn parse_cmd(i: Span) -> IResult<Span, Cmd> {
        preceded(
            multispace0,
            //parse_assign,
            alt((parse_while, parse_let, parse_if, parse_assign, parse_return)),
        )(i)
    }
    
    pub fn parse_block(i: Span) -> IResult<Span, Block> {
        preceded(multispace0, parse_sem(separated_list(tag(";"), parse_cmd)))(i)
    }
    
    pub fn parse_type(i: Span) -> IResult<Span, Type> {
        preceded(
            multispace0,
            alt((
                map(tag("i32"), |_| Type::I32),
                map(tag("bool"), |_| Type::Bool),
                map(preceded(tag("&"), parse_type), |t| Type::Ref(Box::new(t))),
                map(
                    preceded(terminated(tag("mut"), multispace1), parse_type),
                    |t| Type::Mut(Box::new(t)),
                ),
            )),
        )(i)
    }
    
    pub fn parse_par_decls(i: Span) -> IResult<Span, Vec<(SpanId, Type)>> {
        parse_par(separated_list(tag(","), parse_par_decl))(i)
    }
    
    pub fn parse_par_decl(i: Span) -> IResult<Span, (SpanId, Type)> {
        map(
            tuple((
                opt(preceded(multispace0, terminated(tag("mut"), multispace1))),
                parse_id,
                preceded(multispace0, tag(":")),
                parse_type,
            )),
            |(b, id, _, t)| (id, t),
        )(i)
    }
    
    pub fn parse_function_decl(i: Span) -> IResult<Span, Func> {
        map(
            preceded(
                preceded(multispace0, terminated(tag("fn"), multispace1)),
                tuple((
                    parse_id,
                    parse_par(separated_list(tag(","), parse_par_decl)),
                    opt(preceded(
                        preceded(multispace0, terminated(tag("->"), multispace1)),
                        parse_type,
                    )),
                    parse_block,
                )),
            ),
            |(id, par, ret, body)| Func {
                sig: (id, par, ret.unwrap_or(Type::Unit)),
                body: body,
            },
        )(i)
    }
    
    pub fn parse_prog(i: Span) -> IResult<Span, Prog> {
        separated_list(multispace0, map(parse_function_decl, |f| Item::Func(f)))(i)
    }
    
    #[derive(Debug, Copy, Clone, PartialEq)]
    enum Ass {
        Left,
        Right,
    }
    
    fn get_prec(op: &Op) -> (u8, Ass) {
        match op {
            Op::Add => (1, Ass::Left),
            Op::Sub => (1, Ass::Left),
            Op::Mul => (2, Ass::Left),
            Op::Div => (2, Ass::Left),
            Op::Pow => (3, Ass::Right),
            _ => unimplemented!(),
        }
    }
    
    // helpers
    fn parse_par<'a, O, F, E>(inner: F) -> impl Fn(Span<'a>) -> IResult<Span<'a>, O, E>
    where
        F: Fn(Span<'a>) -> IResult<Span<'a>, O, E>,
        E: ParseError<Span<'a>>,
    {
        // delimited allows us to split up the input
        // cut allwos us to consume the input (and prevent backtracking)
        delimited(char('('), preceded(multispace0, inner), cut(char(')')))
    }
    
    fn parse_sem<'a, O, F, E>(inner: F) -> impl Fn(Span<'a>) -> IResult<Span<'a>, O, E>
    where
        F: Fn(Span<'a>) -> IResult<Span<'a>, O, E>,
        E: ParseError<Span<'a>>,
    {
        // delimited allows us to split up the input
        // cut allwos us to consume the input (and prevent backtracking)
        delimited(char('{'), preceded(multispace0, inner), cut(char('}')))
    }