Skip to content
Snippets Groups Projects

Compare revisions

Changes are shown as if the source revision was being merged into the target revision. Learn more about comparing revisions.

Source

Select target project
No results found
Select Git revision
  • home_exam
  • master
  • wip
3 results

Target

Select target project
  • mikala3/d7050e
  • 97gushan/d7050e
  • Abrikot/d7050e
  • Hammarkvast/d7050e
  • banunkers/d7050e
  • markhakansson/d7050e
  • inaule-6/d7050e
  • pln/d7050e
  • widforss/d7050e
  • arostr-5/d7050e
  • Grumme2/d7050e
  • brathen/d7050e
12 results
Select Git revision
  • home_exam
  • master
2 results
Show changes
Commits on Source (25)
......@@ -4,11 +4,64 @@
"version": "2.0.0",
"tasks": [
{
"type": "cargo",
"subcommand": "build",
"type": "shell",
"label": "cargo check --example main_span_expr",
"command": "cargo check --example main_span_expr",
"problemMatcher": [
"$rustc"
]
],
"group": {
"kind": "build",
"isDefault": true
}
},
{
"type": "shell",
"label": "cargo check --example main_span_expr_custom_err",
"command": "cargo check --example main_span_expr_custom_err",
"problemMatcher": [
"$rustc"
],
"group": {
"kind": "build",
"isDefault": true
}
},
{
"type": "shell",
"label": "cargo run --example precedence",
"command": "cargo run --example precedence",
"problemMatcher": [
"$rustc"
],
"group": {
"kind": "build",
"isDefault": true
}
},
{
"type": "shell",
"label": "cargo run --example main2",
"command": "cargo run --example main2",
"problemMatcher": [
"$rustc"
],
"group": {
"kind": "build",
"isDefault": true
}
},
{
"type": "shell",
"label": "cargo run --example tmp",
"command": "cargo run --example tmp",
"problemMatcher": [
"$rustc"
],
"group": {
"kind": "build",
"isDefault": true
}
}
]
}
\ No newline at end of file
# This file is automatically @generated by Cargo.
# It is not intended for manual editing.
[[package]]
name = "bytecount"
version = "0.3.2"
source = "registry+https://github.com/rust-lang/crates.io-index"
[[package]]
name = "cfg-if"
version = "0.1.9"
source = "registry+https://github.com/rust-lang/crates.io-index"
[[package]]
name = "first"
name = "crust"
version = "0.1.0"
dependencies = [
"nom 5.0.1 (registry+https://github.com/rust-lang/crates.io-index)",
"nom_locate 1.0.0 (registry+https://github.com/rust-lang/crates.io-index)",
]
[[package]]
......@@ -39,6 +45,16 @@ dependencies = [
"version_check 0.1.5 (registry+https://github.com/rust-lang/crates.io-index)",
]
[[package]]
name = "nom_locate"
version = "1.0.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
dependencies = [
"bytecount 0.3.2 (registry+https://github.com/rust-lang/crates.io-index)",
"memchr 2.2.1 (registry+https://github.com/rust-lang/crates.io-index)",
"nom 5.0.1 (registry+https://github.com/rust-lang/crates.io-index)",
]
[[package]]
name = "rustc_version"
version = "0.2.3"
......@@ -98,10 +114,12 @@ version = "1.0.2"
source = "registry+https://github.com/rust-lang/crates.io-index"
[metadata]
"checksum bytecount 0.3.2 (registry+https://github.com/rust-lang/crates.io-index)" = "f861d9ce359f56dbcb6e0c2a1cb84e52ad732cadb57b806adeb3c7668caccbd8"
"checksum cfg-if 0.1.9 (registry+https://github.com/rust-lang/crates.io-index)" = "b486ce3ccf7ffd79fdeb678eac06a9e6c09fc88d33836340becb8fffe87c5e33"
"checksum lexical-core 0.4.3 (registry+https://github.com/rust-lang/crates.io-index)" = "b8b0f90c979adde96d19eb10eb6431ba0c441e2f9e9bdff868b2f6f5114ff519"
"checksum memchr 2.2.1 (registry+https://github.com/rust-lang/crates.io-index)" = "88579771288728879b57485cc7d6b07d648c9f0141eb955f8ab7f9d45394468e"
"checksum nom 5.0.1 (registry+https://github.com/rust-lang/crates.io-index)" = "c618b63422da4401283884e6668d39f819a106ef51f5f59b81add00075da35ca"
"checksum nom_locate 1.0.0 (registry+https://github.com/rust-lang/crates.io-index)" = "f932834fd8e391fc7710e2ba17e8f9f8645d846b55aa63207e17e110a1e1ce35"
"checksum rustc_version 0.2.3 (registry+https://github.com/rust-lang/crates.io-index)" = "138e3e0acb6c9fb258b19b67cb8abd63c00679d2851805ea151465464fe9030a"
"checksum ryu 1.0.0 (registry+https://github.com/rust-lang/crates.io-index)" = "c92464b447c0ee8c4fb3824ecc8383b81717b9f1e74ba2e72540aef7b9f82997"
"checksum semver 0.9.0 (registry+https://github.com/rust-lang/crates.io-index)" = "1d7eb9ef2c18661902cc47e535f9bc51b78acd254da71d375c2f6720d9a40403"
......
[package]
name = "first"
name = "crust"
version = "0.1.0"
authors = ["Per Lindgren <per.lindgren@ltu.se>"]
edition = "2018"
......@@ -8,3 +8,4 @@ edition = "2018"
[dependencies]
nom = "5.0.1"
nom_locate = "1.0.0"
# Repo for the D7050E course 2019
The repo will be updated througout the course and includes a draft outline of the course and hints towards reaching the learning goals.
## Course Aim
Fundamental theories about computation and different models of computation. Construction of compilers. Lexical analysis, syntax analysis, and translation into abstract syntax.Regular expressions and grammars, context-free languages and grammars, lexer and parser generators. Identifier handling and symbol table organization. Type-checking, logical inference systems. Intermediate representations and transformations for different languages. Code optimization and register allocation. Machine code generation for common architectures.
In the course you will learn and develop your skills through hands on implementation work building your own complier from scratch. In this way theoretical aspects such as formal grammars, Structural Operational Semantics (SOS), and type rule formalisations becomes tangible. We will even touch upon memory safety and how guarantees can be achieved through static (compilet time) borrow checking. Compiler backend (code optimization etc.) will be discussed in context of LLVM, which you will optionally interface as a library for code generation.
## Draft outline
### W1 The big picture, parsing, semantic analysis, code generation.
Practical assigment:
- Define a minimal subset of Rust, including
- Function definitions
- Commands (let, assignment, if then (else), while)
- Expressions (includig function calls)
- Primitive types (boolean, i32) and their literals
- Explicit types everywhere
- Explicit return(s)
- Begin writing a parser for expressions in Rust using `nom` (parser combinator library)
### W2 Formal languages and Structural Operational Semantics
Theory:
- Regular expressions and automata
- EBNF
- Structural Operational Semantics
Practical assignment:
- Formulate an EBNF for your language
- Continue on the parser implementation (you may use other tools)
### W3 Context Free Grammars, Push Down Automata and Type Checking
Theory:
- DFA/NFA (regular expressions)
- Push Down Automata (PDA) for Context Free Grammars (CFG)
- Typing Rules and their Derivations
Practical assignment:
- Formulate SOS rules for your language
- Finish parser
- Implement interpreter. Panic! on run-time error.
### W4 Parsing strategies, Mutability and Memory References
Theory:
- Parsing stratigies, pros and cons. L(1), LALR, Parising Expression Grammars (PEG), etc.
- Mutability and memory references
Practical assignment
- Formalize type rules for your language (optional)
- Start to implement type checker
- Extend parser/AST/interpreter to support `&` and `&mut. Panic! on run-time error.
### W5 Borrow checking
Theory:
- Linear types and memory safety
- The Rust borrow model
Practical assigmnent
- Finish type checker. (A program passing type checking should never run into panics in the interpreter due to type errors.)
- Start to implement borrow checker
### W6 LLVM
Theory:
- SSA form
- Concept of `unique`
- Code optimization techniques (performed by LLVM)
- LLVM API (a minimal subset)
Practical assignment
- Borrow checker implementation.
- Optional. Use LLVM as library for code generation.
### W7 Wrapping it up
Practical assignment
- Compiler harness (cli interface)
- Finish work on the compiler
### W8 Home Exam
You will get the home exam to work on the last weeks of the course. This may imply further theoretical exercises and experiments on your compiler.
### Examination
You will each be scheduled 30 minutes to present Your home exam to Jingsen and me, based on which Your grade will be determined. Schudule will be agreed on later using Doodle.
## Files
In this repo you find some examples using `nom` to parse expressions.
- main.rs
Simple recursive decent parsing.
- main*
Shows different approaches to introduce location information and custom error types.
- examples/aron.rs
- examples/climb.rs
Shows two approches to do precedence climbing.
---
## Your parser
- You may implement your parser using any tool of choice.
- You are NOT required to account for operator precedence in expressions, however you MUST support parantesized sub expressions. (+ for precedence towards higher grades)
- You are NOT required to account for location information, but your error messages will be better if you do. (+ for spans, towards higher grades)
- Error recovery is NOT required (+ for recovery towards higher grades)
## Your interpreter
- Your interpreter should be able to correctly execute programs according to your SOS.
- Your interpreter should panic (with an appropriote error message) when encountering an evaluation error (e.g., 1 + false)
## Your type checker
- Your type checker should reject ill-typed programs according to your typing rules.
- (+ for higher grades)
- span information in type errors
- multiple error reporting
- type inference (relaxing explicit typing where possible)
## Your borrow checker
- Your borrow checker should reject borrow errors according to lexical scoping
- (+ for higher grades)
- Non Lexical Lifetimes (likely hard)
## Your LLVM bindings (Optional)
Implement for higher grades
- Basic code generation.
- Pass `noalias` where possible allowing for better optimization (assuming your borrowchecker prevents aliasing).
- Other attributes, intrinsics, etc. that enables further LLVM optimizations.
\ No newline at end of file
use nom::{branch, bytes::complete::tag, character::complete::digit1, error, Err};
use nom_locate::LocatedSpan;
const INPUT: &str = "-2+3**2*3/5-4";
//const INPUT: &str = "2+-3";
//const INPUT: &str = "2+30000000000000000000000";
//const INPUT: &str = "2";
//const INPUT: &str = "30000000000000000000000";
//const INPUT: &str = "3+2a";
const UNARYS: [Funcmap; 1] = [Funcmap {
keyword: "-",
prec: 4,
ass: Ass::Right,
func: Function::UnSub,
}];
const INFIXS: [Funcmap; 5] = [
Funcmap {
keyword: "**",
prec: 3,
ass: Ass::Right,
func: Function::Pow,
},
Funcmap {
keyword: "*",
prec: 2,
ass: Ass::Left,
func: Function::Mult,
},
Funcmap {
keyword: "/",
prec: 2,
ass: Ass::Left,
func: Function::Div,
},
Funcmap {
keyword: "+",
prec: 1,
ass: Ass::Left,
func: Function::Add,
},
Funcmap {
keyword: "-",
prec: 1,
ass: Ass::Left,
func: Function::Sub,
},
];
struct Expr<'a> {
span: Span<'a>,
val: Value<'a>,
}
enum Value<'a> {
Int(i32),
UnFunc(Function, Box<Expr<'a>>),
Func(Function, Box<Expr<'a>>, Box<Expr<'a>>),
}
#[derive(Clone, Copy)]
enum Function {
UnSub,
Pow,
Mult,
Div,
Add,
Sub,
}
struct Funcmap {
keyword: &'static str,
prec: u8,
ass: Ass,
func: Function,
}
enum Ass {
Left,
Right,
}
type Span<'a> = LocatedSpan<&'a str>;
type SpanFuncmap<'a> = (Span<'a>, &'a Funcmap);
type IResult<'a, I, O, E = Error<'a>> = Result<(I, O), Err<E>>;
fn main() {
match parse(Span::new(INPUT)) {
Ok(tree) => {
println!("{:#?}", SimpleExpr::new(&tree));
}
Err(Error {
val: Some(val),
error,
..
}) => println!(
"{:#?} at line {}, column {}:\n\t{}",
error.description(),
val.line,
val.get_utf8_column(),
val.fragment,
),
Err(err) => panic!(err),
}
}
fn parse<'a>(input: Span) -> Result<Expr, Error> {
match parse_expr(input) {
Ok((Span { fragment: "", .. }, tree)) => Ok(tree),
Ok((input, _)) => Err(Error {
input: input,
val: Some(input),
error: ErrorKind::NotRecognised,
}),
Err(Err::Incomplete(_)) => Err(Error {
input: input,
val: Some(input),
error: ErrorKind::Incomplete,
}),
Err(Err::Error(err)) => Err(err),
Err(Err::Failure(err)) => Err(err),
}
}
fn parse_expr<'a>(input: Span) -> IResult<Span, Expr> {
branch::alt((parse_infix, parse_expr_nobin))(input)
}
fn parse_expr_nobin<'a>(input: Span) -> IResult<Span, Expr> {
branch::alt((parse_unary, parse_expr_nobin_noun))(input)
}
fn parse_expr_nobin_noun<'a>(input: Span) -> IResult<Span, Expr> {
parse_value(input)
}
fn parse_value(input: Span) -> IResult<Span, Expr> {
// More primitive type categories go here (branch::alt() if > 1):
parse_number(input)
}
fn parse_number(input: Span) -> IResult<Span, Expr> {
// More number types go here (branch::alt() if > 1):
parse_int(input)
}
fn parse_int(input: Span) -> IResult<Span, Expr> {
let (input, digits) = digit1(input)?;
let int = match digits.fragment.parse() {
Ok(int) => int,
Err(err) => {
return Err(Err::Failure(Error {
input,
val: Some(digits),
error: ErrorKind::ParseInt(err),
}))
}
};
Ok((
input,
Expr {
span: digits,
val: Value::Int(int),
},
))
}
fn parse_unary(input: Span) -> IResult<Span, Expr> {
let (input, (span, func_map)) = tag_unary(input)?;
let (input, right) = parse_expr_nobin(input)?;
Ok((
input,
Expr {
span,
val: Value::UnFunc(func_map.func, Box::new(right)),
},
))
}
fn parse_infix(input: Span) -> IResult<Span, Expr> {
// Initialize with minimum precedence 1.
parse_infix_first(input, 1)
}
fn parse_infix_first<'a>(input: Span, min_prec: u8) -> IResult<Span, Expr> {
// First, find left hand side expression. Search for everything but BinOps.
let (input, left) = parse_expr_nobin(input)?;
parse_infix_left(input, min_prec, left)
}
fn parse_infix_prec<'a>(input: Span, min_prec: u8) -> IResult<Span, Expr> {
// Almost identical to parse_infix_first(), but we do not accept unary ops.
let (input, left) = parse_expr_nobin_noun(input)?;
parse_infix_left(input, min_prec, left)
}
fn parse_infix_left<'a>(
input_bin: Span<'a>,
min_prec: u8,
left: Expr<'a>,
) -> IResult<'a, Span<'a>, Expr<'a>> {
// See if we can find an infix function. If not, return what we have.
let (input, (span, func_map)) = match tag_infix(input_bin) {
Ok(res) => res,
Err(Err::Error(Error { input, .. })) => return Ok((input, left)),
Err(err) => return Err(err),
};
// Does the new infix fulfill our precedence criteria?
if func_map.prec < min_prec {
return Ok((input_bin, left));
}
// Parse the right-hand-side.
let new_min_prec = match func_map.ass {
Ass::Left => func_map.prec + 1,
Ass::Right => func_map.prec,
};
let (input, right) = parse_infix_prec(input, new_min_prec)?;
// Put together the infix function with arguments. Continue forward.
let expr = Expr {
span,
val: Value::Func(func_map.func, Box::new(left), Box::new(right)),
};
parse_infix_left(input, min_prec, expr)
}
fn tag_unary(input: Span) -> IResult<Span, SpanFuncmap> {
tag_func(input, &UNARYS)
}
fn tag_infix(input: Span) -> IResult<Span, SpanFuncmap> {
tag_func(input, &INFIXS)
}
fn tag_func<'a>(input: Span<'a>, funcs: &'a [Funcmap]) -> IResult<'a, Span<'a>, SpanFuncmap<'a>> {
for func_map in funcs.iter() {
match tag(func_map.keyword)(input) {
Ok((input, span)) => return Ok((input, (span, &func_map))),
Err(Err::Error(_)) => (),
Err(err) => return Err(err),
}
}
Err(Err::Error(Error {
input,
val: None,
error: ErrorKind::Nom(error::ErrorKind::Tag),
}))
}
#[derive(Debug)]
enum SimpleExpr<'a> {
Int(&'a i32),
UnSub(Box<SimpleExpr<'a>>),
Pow(Box<SimpleExpr<'a>>, Box<SimpleExpr<'a>>),
Mult(Box<SimpleExpr<'a>>, Box<SimpleExpr<'a>>),
Div(Box<SimpleExpr<'a>>, Box<SimpleExpr<'a>>),
Add(Box<SimpleExpr<'a>>, Box<SimpleExpr<'a>>),
Sub(Box<SimpleExpr<'a>>, Box<SimpleExpr<'a>>),
}
impl<'a> SimpleExpr<'a> {
fn new(tree: &'a Expr) -> Self {
match tree {
Expr {
val: Value::Int(int),
..
} => SimpleExpr::Int(&int),
Expr {
val: Value::UnFunc(Function::UnSub, x),
..
} => SimpleExpr::UnSub(Box::new(SimpleExpr::new(x))),
Expr {
val: Value::Func(Function::Pow, x, y),
..
} => SimpleExpr::Pow(Box::new(SimpleExpr::new(x)), Box::new(SimpleExpr::new(y))),
Expr {
val: Value::Func(Function::Mult, x, y),
..
} => SimpleExpr::Mult(Box::new(SimpleExpr::new(x)), Box::new(SimpleExpr::new(y))),
Expr {
val: Value::Func(Function::Div, x, y),
..
} => SimpleExpr::Div(Box::new(SimpleExpr::new(x)), Box::new(SimpleExpr::new(y))),
Expr {
val: Value::Func(Function::Add, x, y),
..
} => SimpleExpr::Add(Box::new(SimpleExpr::new(x)), Box::new(SimpleExpr::new(y))),
Expr {
val: Value::Func(Function::Sub, x, y),
..
} => SimpleExpr::Sub(Box::new(SimpleExpr::new(x)), Box::new(SimpleExpr::new(y))),
_ => panic!(),
}
}
}
impl<'a> error::ParseError<Span<'a>> for Error<'a> {
fn from_error_kind(input: Span<'a>, kind: error::ErrorKind) -> Self {
Error {
input,
val: None,
error: ErrorKind::Nom(kind),
}
}
fn append(_: Span<'a>, _: error::ErrorKind, other: Self) -> Self {
other
}
}
struct Error<'a> {
input: Span<'a>,
val: Option<Span<'a>>,
error: ErrorKind,
}
enum ErrorKind {
NotRecognised,
Incomplete,
ParseInt(std::num::ParseIntError),
Nom(error::ErrorKind),
}
impl ErrorKind {
fn description(&self) -> String {
match self {
ErrorKind::NotRecognised => String::from("Failed to parse input."),
ErrorKind::Incomplete => String::from("There was not enough data."),
ErrorKind::ParseInt(err) => format!("Parse Int Error ({})", &err),
ErrorKind::Nom(err) => String::from(err.description()),
}
}
}
extern crate nom;
use std::iter::Peekable;
use std::slice::Iter;
use nom::{
branch::alt,
bytes::complete::tag,
character::complete::char,
character::complete::{digit1, multispace0},
combinator::{cut, map},
error::ParseError,
multi::many1,
sequence::{delimited, preceded},
IResult,
};
#[derive(Debug, Clone, Copy, PartialEq)]
pub enum Op {
Eq,
Neq,
And,
Or,
Add,
Sub,
Mul,
Div,
Pow,
Not,
}
#[derive(Debug, Clone, PartialEq)]
pub enum Expr {
Num(i32),
Par(Box<Expr>),
// Identifier
// Function application
BinOp(Op, Box<Expr>, Box<Expr>),
UnaryOp(Op, Box<Expr>),
}
pub fn parse_i32(i: &str) -> IResult<&str, i32> {
map(digit1, |digit_str: &str| digit_str.parse::<i32>().unwrap())(i)
}
fn parse_op(i: &str) -> IResult<&str, Op> {
alt((
map(tag("=="), |_| Op::Eq),
map(tag("!="), |_| Op::Neq),
map(tag("**"), |_| Op::Pow),
map(tag("&&"), |_| Op::And),
map(tag("||"), |_| Op::Or),
map(tag("+"), |_| Op::Add),
map(tag("-"), |_| Op::Sub),
map(tag("*"), |_| Op::Mul),
map(tag("/"), |_| Op::Div),
map(tag("!"), |_| Op::Not),
))(i)
}
#[derive(Debug, Clone, PartialEq)]
pub enum Token {
Num(i32),
Par(Vec<Token>),
Op(Op),
}
fn parse_terminal(i: &str) -> IResult<&str, Token> {
alt((
map(parse_i32, |v| Token::Num(v)),
map(parse_par(parse_tokens), |tokens| Token::Par(tokens)),
))(i)
}
fn parse_token(i: &str) -> IResult<&str, Token> {
preceded(
multispace0,
alt((map(parse_op, |op| Token::Op(op)), parse_terminal)),
)(i)
}
fn parse_tokens(i: &str) -> IResult<&str, Vec<Token>> {
many1(parse_token)(i)
}
fn compute_atom(t: &mut Peekable<Iter<Token>>) -> Expr {
match t.next() {
Some(Token::Num(i)) => Expr::Num(*i),
Some(Token::Par(v)) => climb(&mut v.iter().peekable(), 0),
Some(Token::Op(op)) => Expr::UnaryOp(*op, Box::new(climb(t, 4))), // assume highest precedence
_ => panic!("error in compute atom"),
}
}
fn climb(t: &mut Peekable<Iter<Token>>, min_prec: u8) -> Expr {
let mut result = compute_atom(t);
loop {
match t.peek() {
Some(Token::Op(op)) => {
let (prec, ass) = get_prec(op);
if prec < min_prec {
break;
};
let next_prec = prec
+ match ass {
Ass::Left => 1,
_ => 0,
};
t.next();
let rhs = climb(t, next_prec);
result = Expr::BinOp(*op, Box::new(result), Box::new(rhs))
}
_ => {
break;
}
}
}
result
}
fn test(s: &str, v: i32) {
match parse_tokens(s) {
Ok(("", t)) => {
let mut t = t.iter().peekable();
println!("{:?}", &t);
let e = climb(&mut t, 0);
println!("{:?}", &e);
println!("eval {} {}", math_eval(&e), v);
assert_eq!(math_eval(&e), v);
}
Ok((s, t)) => println!(
"parse incomplete, \n parsed tokens \t{:?}, \n remaining \t{:?}",
t, s
),
Err(err) => println!("{:?}", err),
}
}
fn main() {
test("- -1 + + 1", - -1 + 1); // rust does not allow + as a unary op (I do ;)
test("(-1-1)+(-1+3)", (-1 - 1) + (-1) + 3);
// just to check that right associative works (you don't need to implement pow)
test("2+3**2**3*5+1", 2 + 3i32.pow(2u32.pow(3)) * 5 + 1);
test("(12*2)/3-4", (12 * 2) / 3 - 4);
test("1*2+3", 1 * 2 + 3);
// just to check that we get a parse error
test("1*2+3+3*21-a12+2", 1 * 2 + 3 + 3 * 21 - 12 + 2);
}
// helpers
fn parse_par<'a, O, F, E>(
inner: F,
) -> impl Fn(&'a str) -> IResult<&'a str, O, E>
where
F: Fn(&'a str) -> IResult<&'a str, O, E>,
E: ParseError<&'a str>,
{
// delimited allows us to split up the input
// cut allwos us to consume the input (and prevent backtracking)
delimited(char('('), preceded(multispace0, inner), cut(char(')')))
}
fn math_eval(e: &Expr) -> i32 {
match e {
Expr::Num(i) => *i,
Expr::BinOp(op, l, r) => {
let lv = math_eval(l);
let rv = math_eval(r);
match op {
Op::Add => lv + rv,
Op::Sub => lv - rv,
Op::Mul => lv * rv,
Op::Div => lv / rv,
Op::Pow => lv.pow(rv as u32),
_ => unimplemented!(),
}
}
Expr::UnaryOp(op, e) => {
let e = math_eval(e);
match op {
Op::Add => e,
Op::Sub => -e,
_ => unimplemented!(),
}
}
_ => unimplemented!(),
}
}
#[derive(Debug, Copy, Clone, PartialEq)]
enum Ass {
Left,
Right,
}
fn get_prec(op: &Op) -> (u8, Ass) {
match op {
Op::Add => (1, Ass::Left),
Op::Sub => (1, Ass::Left),
Op::Mul => (2, Ass::Left),
Op::Div => (2, Ass::Left),
Op::Pow => (3, Ass::Right),
_ => unimplemented!(),
}
}
extern crate nom;
use nom::combinator::map_res;
use nom::{
branch::alt,
bytes::complete::tag,
character::complete::{digit1, multispace0},
combinator::map,
error::{context, VerboseError, VerboseErrorKind},
sequence::{preceded, tuple},
IResult,
};
#[derive(Debug, PartialEq)]
pub enum Op {
Add,
}
#[derive(Debug, PartialEq)]
pub enum Expr {
Num(i32),
BinOp(Box<Expr>, Op, Box<Expr>),
}
pub fn parse_i32(i: &str) -> IResult<&str, Expr> {
map(digit1, |digit_str: &str| {
Expr::Num(digit_str.parse::<i32>().unwrap())
})(i)
}
fn parse_expr(input: &str) -> IResult<&str, Expr> {
preceded(
multispace0,
alt((
map(
tuple((parse_i32, preceded(multispace0, tag("+")), parse_expr)),
|(l, _, r)| Expr::BinOp(Box::new(l), Op::Add, Box::new(r)),
),
parse_i32,
)),
)(input)
}
// cargo test
#[test]
fn test_parse_i32_1() {
let res = parse_expr("2");
assert!(res == Ok(("", Expr::Num(1))))
}
#[test]
fn test_parse_i32_2() {
let _ = parse_expr("1a").is_ok();
}
fn main() {
println!("{:?}", parse_expr("1"));
println!("{:?}", parse_expr("1+2 + 3"));
println!("{:?}", parse_expr(" 1+ 1a"));
println!("{:?}", parse_expr("11111111111111111111111111"));
}
extern crate nom;
use nom::combinator::map_res;
use nom::{
branch::alt,
bytes::complete::tag,
character::complete::{digit1, multispace0},
combinator::map,
error::{context, ErrorKind, VerboseError, VerboseErrorKind},
sequence::{preceded, tuple},
Err, IResult,
};
use nom_locate::{position, LocatedSpan};
type Span<'a> = LocatedSpan<&'a str>;
#[derive(Debug, PartialEq)]
pub enum Op {
Add,
}
#[derive(Debug, PartialEq)]
pub enum Expr<'a> {
Num(Span<'a>, i32),
BinOp(Span<'a>, Box<Expr<'a>>, Op, Box<Expr<'a>>),
}
// this is the definititon of IResult
// type IResult<I, O, E = u32> = Result<(I, O), Err<I, E>>;
pub fn parse_i32(i: Span) -> IResult<Span, Expr> {
map(digit1, |digit_str: Span| {
Expr::Num(digit_str, digit_str.fragment.parse::<i32>().unwrap())
})(i)
// below is just an exapmle on howto generate an Error explicitly
// Err(Err::Error((i, ErrorKind::Alpha)))
}
fn parse_expr(i: Span) -> IResult<Span, Expr> {
preceded(
multispace0,
alt((
map(
tuple((parse_i32, preceded(multispace0, tag("+")), parse_expr)),
|(l, _, r)| Expr::BinOp(i, Box::new(l), Op::Add, Box::new(r)),
),
parse_i32,
)),
)(i)
}
// cargo test
#[test]
fn test_parse_i32_1() {
let (rest, expr) = parse_expr(Span::new("1")).unwrap();
// check that we are at the end of the input
assert_eq!(
rest,
Span {
offset: 1,
line: 1,
fragment: "",
extra: (),
},
);
// check that the expression is parsed correctly
assert_eq!(
expr,
Expr::Num(
Span {
offset: 0,
line: 1,
fragment: "1",
extra: (),
},
1
)
);
}
fn main() {
let (a, b) = parse_expr(Span::new("1")).unwrap();
println!("{:?}", parse_expr(Span::new("1")));
println!("{:?}", parse_expr(Span::new("1+2 + 3")));
println!("{:?}", parse_expr(Span::new(" 1+ 1a")));
println!("{:?}", parse_expr(Span::new("11111111111111111111111111")));
}
extern crate nom;
use nom::{
branch::alt,
bytes::complete::tag,
character::complete::{digit1, multispace0},
combinator::map,
sequence::{preceded, tuple},
IResult,
};
use nom_locate::LocatedSpan;
type Span<'a> = LocatedSpan<&'a str>;
#[derive(Debug, PartialEq)]
pub enum Op {
Add,
Sub,
}
type SpanOp<'a> = (Span<'a>, Op);
fn parse_op(i: Span) -> IResult<Span, SpanOp> {
alt((
map(tag("+"), |s| (s, Op::Add)),
map(tag("-"), |s| (s, Op::Sub)),
))(i)
}
#[derive(Debug, PartialEq)]
pub enum Expr<'a> {
Num(i32),
BinOp(Box<SpanExpr<'a>>, SpanOp<'a>, Box<SpanExpr<'a>>),
}
type SpanExpr<'a> = (Span<'a>, Expr<'a>);
pub fn parse_i32(i: Span) -> IResult<Span, SpanExpr> {
map(digit1, |digit_str: Span| {
(
digit_str,
Expr::Num(digit_str.fragment.parse::<i32>().unwrap()),
)
})(i)
}
fn parse_expr(i: Span) -> IResult<Span, SpanExpr> {
alt((
map(
tuple((parse_i32, preceded(multispace0, parse_op), parse_expr_ms)),
|(l, op, r)| (i, Expr::BinOp(Box::new(l), op, Box::new(r))),
),
parse_i32,
))(i)
}
fn parse_expr_ms(i: Span) -> IResult<Span, SpanExpr> {
preceded(multispace0, parse_expr)(i)
}
// dumps a Span into a String
fn dump_span(s: &Span) -> String {
format!(
"[line :{:?}, col:{:?}, {:?}]",
s.line,
s.get_column(),
s.fragment
)
}
// dumps a SpanExpr into a String
fn dump_expr(se: &SpanExpr) -> String {
let (s, e) = se;
match e {
Expr::Num(_) => dump_span(s),
Expr::BinOp(l, (sop, _), r) => {
format!("<{} {} {}>", dump_expr(l), dump_span(sop), dump_expr(r))
}
}
}
fn main() {
let (_, (s, e)) = parse_expr_ms(Span::new("\n\n 1+2 - \n3")).unwrap();
println!(
"span for the whole,expression: {:?}, \nline: {:?}, \ncolumn: {:?}",
s,
s.line,
s.get_column()
);
println!("raw e: {:?}", &e);
println!("pretty e: {}", dump_expr(&(s, e)));
}
// In this example, we have a `parse_expr_ms` is the "top" level parser.
// It consumes white spaces, allowing the location information to reflect the exact
// positions in the input file.
//
// The dump_expr will create a pretty printing of the expression with spans for
// each terminal. This will be useful for later for precise type error reporting.
//
// The extra field is not used, it can be used for metadata, such as filename.
extern crate nom;
use nom::{
branch::alt,
bytes::complete::tag,
character::complete::{digit1, multispace0},
combinator::map,
error,
sequence::{preceded, tuple},
Err,
};
use nom_locate::LocatedSpan;
type Span<'a> = LocatedSpan<&'a str>;
#[derive(Debug)]
pub struct Error<'a>(Span<'a>, Option<Span<'a>>, ErrorKind);
type IResult<'a, I, O, E = Error<'a>> = Result<(I, O), Err<E>>;
impl<'a> error::ParseError<Span<'a>> for Error<'a> {
fn from_error_kind(input: Span<'a>, kind: error::ErrorKind) -> Self {
Error(input, None, ErrorKind::Nom(kind))
}
fn append(_: Span<'a>, _: error::ErrorKind, other: Self) -> Self {
other
}
}
#[derive(Debug)]
enum ErrorKind {
ParseIntError(std::num::ParseIntError),
Nom(error::ErrorKind),
}
#[derive(Debug, PartialEq)]
pub enum Op {
Add,
Sub,
}
type SpanOp<'a> = (Span<'a>, Op);
fn parse_op(i: Span) -> IResult<Span, SpanOp> {
alt((
map(tag("+"), |s| (s, Op::Add)),
map(tag("-"), |s| (s, Op::Sub)),
))(i)
}
#[derive(Debug, PartialEq)]
pub enum Expr<'a> {
Num(i32),
BinOp(Box<SpanExpr<'a>>, SpanOp<'a>, Box<SpanExpr<'a>>),
}
type SpanExpr<'a> = (Span<'a>, Expr<'a>);
pub fn parse_i32<'a>(i: Span<'a>) -> IResult<Span<'a>, SpanExpr> {
let (i, digits) = digit1(i)?;
match digits.fragment.parse() {
Ok(int) => Ok((i, (digits, Expr::Num(int)))),
Err(e) => Err(Err::Failure(Error(i, Some(digits), ErrorKind::ParseIntError(e)))),
}
}
fn parse_expr(i: Span) -> IResult<Span, SpanExpr> {
alt((
map(
tuple((parse_i32, preceded(multispace0, parse_op), parse_expr_ms)),
|(l, op, r)| (i, Expr::BinOp(Box::new(l), op, Box::new(r))),
),
parse_i32,
))(i)
}
fn parse_expr_ms(i: Span) -> IResult<Span, SpanExpr> {
preceded(multispace0, parse_expr)(i)
}
// dumps a Span into a String
fn dump_span(s: &Span) -> String {
format!(
"[line :{:?}, col:{:?}, {:?}]",
s.line,
s.get_column(),
s.fragment
)
}
// dumps a SpanExpr into a String
fn dump_expr(se: &SpanExpr) -> String {
let (s, e) = se;
match e {
Expr::Num(_) => dump_span(s),
Expr::BinOp(l, (sop, _), r) => {
format!("<{} {} {}>", dump_expr(l), dump_span(sop), dump_expr(r))
}
}
}
fn main() {
let i = "\n 1+2+10000- \n3";
// uncomment below for an error example
let i = "\n 1+200000000000000000+a10000- \n3";
let pe = parse_expr_ms(Span::new(i));
println!("pe: {:?}\n", pe);
match pe {
Ok((_, (s, e))) => {
println!(
"ok, span for expression: {:?}, \n\tline: {:?}, \n\tcolumn: {:?}\n",
s,
s.line,
s.get_column()
);
println!("raw e: {:?}\n", &e);
println!("pretty e: {}\n", dump_expr(&(s, e)));
}
Err(Err::Failure(Error(_, Some(s), err))) => {
println!(
"{:?} error at:\n\tline: {:?}\n\tcolumn: {:?}\n\tValue: {:?}\n",
err,
s.line,
s.get_column(),
s.fragment,
);
println!("raw s: {:?}", &s);
}
Err(err) => Err(err).unwrap(),
}
}
// In this example, we have a `parse_expr_ms` is the "top" level parser.
// It consumes white spaces, allowing the location information to reflect the exact
// positions in the input file.
//
// The dump_expr will create a pretty printing of the expression with spans for
// each terminal. This will be useful for later for precise type error reporting.
//
// The extra field is not used, it can be used for metadata, such as filename.
extern crate nom;
use nom::{
branch::alt,
bytes::complete::tag,
character::complete::{digit1, multispace0},
combinator::map,
error,
sequence::{preceded, tuple},
Err,
};
use nom_locate::LocatedSpan;
type Span<'a> = LocatedSpan<&'a str>;
#[derive(Debug)]
pub struct Error<'a>(Span<'a>, Option<Span<'a>>, ErrorKind);
type IResult<'a, I, O, E = Error<'a>> = Result<(I, O), Err<E>>;
impl<'a> error::ParseError<Span<'a>> for Error<'a> {
fn from_error_kind(input: Span<'a>, kind: error::ErrorKind) -> Self {
Error(input, None, ErrorKind::Nom(kind))
}
fn append(_: Span<'a>, _: error::ErrorKind, other: Self) -> Self {
other
}
}
#[derive(Debug)]
enum ErrorKind {
ParseIntError(std::num::ParseIntError),
Nom(error::ErrorKind),
}
#[derive(Debug, PartialEq)]
pub enum Op {
Add,
Sub,
}
type SpanOp<'a> = (Span<'a>, Op);
fn parse_op(i: Span) -> IResult<Span, SpanOp> {
alt((
map(tag("+"), |s| (s, Op::Add)),
map(tag("-"), |s| (s, Op::Sub)),
))(i)
}
#[derive(Debug, PartialEq)]
pub enum Expr<'a> {
Num(i32),
Num64(i64),
BinOp(Box<SpanExpr<'a>>, SpanOp<'a>, Box<SpanExpr<'a>>),
}
type SpanExpr<'a> = (Span<'a>, Expr<'a>);
pub fn parse_i32<'a>(i: Span<'a>) -> IResult<Span<'a>, SpanExpr> {
let (i, digits) = digit1(i)?;
match digits.fragment.parse() {
Ok(int) => Ok((i, (digits, Expr::Num(int)))),
Err(e) => Err(Err::Error(Error(
i,
Some(digits),
ErrorKind::ParseIntError(e),
))),
}
}
pub fn parse_i64<'a>(i: Span<'a>) -> IResult<Span<'a>, SpanExpr> {
let (i, digits) = digit1(i)?;
match digits.fragment.parse() {
Ok(int) => Ok((i, (digits, Expr::Num64(int)))),
Err(e) => Err(Err::Error(Error(
i,
Some(digits),
ErrorKind::ParseIntError(e),
))),
}
}
fn parse_expr(i: Span) -> IResult<Span, SpanExpr> {
alt((
map(
tuple((
alt((parse_i32, parse_i64)),
preceded(multispace0, parse_op),
parse_expr_ms,
)),
|(l, op, r)| (i, Expr::BinOp(Box::new(l), op, Box::new(r))),
),
parse_i32,
parse_i64,
))(i)
}
fn parse_expr_ms(i: Span) -> IResult<Span, SpanExpr> {
preceded(multispace0, parse_expr)(i)
}
// dumps a Span into a String
fn dump_span(s: &Span) -> String {
format!(
"[line :{:?}, col:{:?}, {:?}]",
s.line,
s.get_column(),
s.fragment
)
}
// dumps a SpanExpr into a String
fn dump_expr(se: &SpanExpr) -> String {
let (s, e) = se;
match e {
Expr::Num(_) => dump_span(s),
Expr::Num64(_) => dump_span(s),
Expr::BinOp(l, (sop, _), r) => {
format!("<{} {} {}>", dump_expr(l), dump_span(sop), dump_expr(r))
}
}
}
fn main() {
let i = "\n 1+2+10000- \n3";
// uncomment below for an error example
let i = "\n 1+200000000000000000+a10000- \n3";
let pe = parse_expr_ms(Span::new(i));
println!("pe: {:?}\n", pe);
match pe {
Ok((_, (s, e))) => {
println!(
"ok, span for expression: {:?}, \n\tline: {:?}, \n\tcolumn: {:?}\n",
s,
s.line,
s.get_column()
);
println!("raw e: {:?}\n", &e);
println!("pretty e: {}\n", dump_expr(&(s, e)));
}
Err(Err::Failure(Error(_, Some(s), err))) => {
println!(
"{:?} error at:\n\tline: {:?}\n\tcolumn: {:?}\n\tValue: {:?}\n",
err,
s.line,
s.get_column(),
s.fragment,
);
println!("raw s: {:?}", &s);
}
Err(err) => Err(err).unwrap(),
}
}
// In this example, we have a `parse_expr_ms` is the "top" level parser.
// It consumes white spaces, allowing the location information to reflect the exact
// positions in the input file.
//
// The dump_expr will create a pretty printing of the expression with spans for
// each terminal. This will be useful for later for precise type error reporting.
//
// The extra field is not used, it can be used for metadata, such as filename.
extern crate nom;
use crust::parse::test;
fn main() {
test("- -1 + + 1", - -1 + 1); // rust does not allow + as a unary op (I do ;)
test("(-1-1)+(-1+3)", (-1 - 1) + (-1) + 3);
// just to check that right associative works (you don't need to implement pow)
test("2+3**2**3*5+1", 2 + 3i32.pow(2u32.pow(3)) * 5 + 1);
test("(12*2)/3-4", (12 * 2) / 3 - 4);
test("1*2+3", 1 * 2 + 3);
// just to check that we get a parse error
test("1*2+3+3*21-a12+2", 1 * 2 + 3 + 3 * 21 - 12 + 2);
}
// AST
use nom_locate::LocatedSpan;
pub type Span<'a> = LocatedSpan<&'a str>;
#[derive(Debug, Clone, Copy, PartialEq)]
pub enum Op {
Eq,
Neq,
And,
Or,
Add,
Sub,
Mul,
Div,
Pow,
Not,
}
type SpanOp<'a> = (Span<'a>, Op);
#[derive(Debug, Clone, PartialEq)]
pub enum Expr<'a> {
Num(i32),
Par(Box<SpanExpr<'a>>),
// Identifier
// Function application
BinOp(Op, Box<SpanExpr<'a>>, Box<SpanExpr<'a>>),
UnaryOp(Op, Box<SpanExpr<'a>>),
}
pub type SpanExpr<'a> = (Span<'a>, Expr<'a>);
// lib
pub mod ast;
pub mod parse;
extern crate nom;
use std::iter::Peekable;
use std::slice::Iter;
use nom::{
branch::alt,
bytes::complete::tag,
character::complete::char,
character::complete::{digit1, multispace0},
combinator::{cut, map},
error::ParseError,
multi::many1,
sequence::{delimited, preceded},
IResult,
};
use crate::ast::{Expr, Op, Span, SpanExpr};
pub fn parse_i32(i: Span) -> IResult<Span, (Span, i32)> {
map(digit1, |digit_str: Span| {
(digit_str, digit_str.fragment.parse::<i32>().unwrap())
})(i)
}
fn parse_op(i: Span) -> IResult<Span, (Span, Op)> {
alt((
map(tag("=="), |s| (s, Op::Eq)),
map(tag("!="), |s| (s, Op::Neq)),
map(tag("**"), |s| (s, Op::Pow)),
map(tag("&&"), |s| (s, Op::And)),
map(tag("||"), |s| (s, Op::Or)),
map(tag("+"), |s| (s, Op::Add)),
map(tag("-"), |s| (s, Op::Sub)),
map(tag("*"), |s| (s, Op::Mul)),
map(tag("/"), |s| (s, Op::Div)),
map(tag("!"), |s| (s, Op::Not)),
))(i)
}
#[derive(Debug, Clone, PartialEq)]
pub enum Token<'a> {
Num(i32),
Par(Vec<SpanToken<'a>>),
Op(Op),
}
type SpanToken<'a> = (Span<'a>, Token<'a>);
fn parse_terminal(i: Span) -> IResult<Span, SpanToken> {
alt((
map(parse_i32, |(s, v)| (s, Token::Num(v))),
map(parse_par(parse_tokens), |(s, tokens)| {
(s, Token::Par(tokens))
}),
))(i)
}
fn parse_token(i: Span) -> IResult<Span, SpanToken> {
preceded(
multispace0,
alt((map(parse_op, |(s, op)| (s, Token::Op(op))), parse_terminal)),
)(i)
}
// I think the outer span is wrong
fn parse_tokens(i: Span) -> IResult<Span, (Span, Vec<SpanToken>)> {
map(many1(parse_token), |tokens| (i, tokens))(i)
}
fn compute_atom<'a>(t: &mut Peekable<Iter<SpanToken<'a>>>) -> SpanExpr<'a> {
match t.next() {
Some((s, Token::Num(i))) => (*s, Expr::Num(*i)),
Some((_, Token::Par(v))) => climb(&mut v.iter().peekable(), 0),
Some((s, Token::Op(op))) => (*s, Expr::UnaryOp(*op, Box::new(climb(t, 4)))), // assume highest precedence
_ => panic!("error in compute atom"),
}
}
fn climb<'a>(
t: &mut Peekable<Iter<SpanToken<'a>>>,
min_prec: u8,
) -> SpanExpr<'a> {
let mut result: SpanExpr = compute_atom(t);
loop {
match t.peek() {
Some((s, Token::Op(op))) => {
let (prec, ass) = get_prec(op);
if prec < min_prec {
break;
};
let next_prec = prec
+ match ass {
Ass::Left => 1,
_ => 0,
};
t.next();
let rhs = climb(t, next_prec);
result = (*s, Expr::BinOp(*op, Box::new(result), Box::new(rhs)))
}
_ => {
break;
}
}
}
result
}
pub fn test(s: &str, v: i32) {
match parse_tokens(Span::new(s)) {
Ok((Span { fragment: "", .. }, (_, t))) => {
let mut t = t.iter().peekable();
println!("{:?}", &t);
let e = climb(&mut t, 0);
println!("{:?}", &e);
println!("eval {} {}", math_eval(&e), v);
assert_eq!(math_eval(&e), v);
}
Ok((s, t)) => println!(
"parse incomplete, \n parsed tokens \t{:?}, \n remaining \t{:?}",
t, s
),
Err(err) => println!("{:?}", err),
}
}
// helpers
fn parse_par<'a, O, F, E>(
inner: F,
) -> impl Fn(Span<'a>) -> IResult<Span<'a>, O, E>
where
F: Fn(Span<'a>) -> IResult<Span<'a>, O, E>,
E: ParseError<Span<'a>>,
{
// delimited allows us to split up the input
// cut allwos us to consume the input (and prevent backtracking)
delimited(char('('), preceded(multispace0, inner), cut(char(')')))
}
fn math_eval(e: &SpanExpr) -> i32 {
match e.clone().1 {
Expr::Num(i) => i,
Expr::BinOp(op, l, r) => {
let lv = math_eval(&l);
let rv = math_eval(&r);
match op {
Op::Add => lv + rv,
Op::Sub => lv - rv,
Op::Mul => lv * rv,
Op::Div => lv / rv,
Op::Pow => lv.pow(rv as u32),
_ => unimplemented!(),
}
}
Expr::UnaryOp(op, e) => {
let e = math_eval(&e);
match op {
Op::Add => e,
Op::Sub => -e,
_ => unimplemented!(),
}
}
_ => unimplemented!(),
}
}
#[derive(Debug, Copy, Clone, PartialEq)]
enum Ass {
Left,
Right,
}
fn get_prec(op: &Op) -> (u8, Ass) {
match op {
Op::Add => (1, Ass::Left),
Op::Sub => (1, Ass::Left),
Op::Mul => (2, Ass::Left),
Op::Div => (2, Ass::Left),
Op::Pow => (3, Ass::Right),
_ => unimplemented!(),
}
}