diff --git a/Cimp.ml b/Cimp.ml index 519f45a8f3db999a5518a6594853f69a87ec7dca..503bc62cf4e4022858891ef599ffb680c51b736a 100644 --- a/Cimp.ml +++ b/Cimp.ml @@ -15,6 +15,8 @@ module Vm_Ex = Vm_ex__Vm_Ex module Imp_Ex = Imp_ex__Imp_Ex module State = State__State +module Imp = Imp__Imp + let () = cmd; (* parse command line options and put into opt *) @@ -32,13 +34,17 @@ let () = let lexbuf = Lexing.from_channel inBuffer in lexbuf.lex_curr_p <- { lexbuf.lex_curr_p with pos_fname = opt.infile }; try - let prog = Parser.prog Lexer.lex lexbuf in + let p = Parser.prog Lexer.lex lexbuf in + (* p_stdout ("Decl:" ^ nl ^ T_Dump.of_prog p); *) + + let com = T_Check.tc_prog inBuffer p in + if opt.d_ast then - p_stderr ("Raw AST : \n" ^ of_com prog ^ nl); + p_stderr ("Raw AST:" ^ nl ^ Dump.of_com com ^ nl); if opt.d_past then - p_stderr ("Pretty AST: \n" ^ pretty_of_com 0 prog ^ nl); + p_stderr ("Pretty AST:" ^ nl ^ Dump.pretty_of_com 0 com ^ nl); - let code = Compile.compile_program prog in + let code = Compile.compile_program com in if opt.d_code then p_stderr ("Raw Code : \n" ^ of_code false code ^ nl); if opt.d_pcode then @@ -50,7 +56,7 @@ let () = if opt.imp_ex then ( try p_stdout ("Execute : imp_ex" ); - let st_end = Imp_Ex.ceval_ex st_0 prog in + let st_end = Imp_Ex.ceval_ex st_0 com in p_stdout ("ceval_ex" ^ nl ^ Env.to_string st_end ^ nl); with | _ -> p_stdout "ceval : Exited with an error\n"; @@ -67,13 +73,15 @@ let () = p_stdout ("execution halted"); p_stdout ("instr_iter_ex" ^ nl ^ Env.to_string st_halt ^ nl); () - ); + ); p_stdout ("Done!"); with | Lexer.SyntaxError msg -> raise (CompilerError ("Syntax error. " ^ msg ^ parse_err_msg lexbuf)); - | Parser.Error -> raise (CompilerError ("Parser error." ^ parse_err_msg lexbuf)); + | Parser.Error -> + raise (CompilerError ("Parser error." ^ parse_err_msg lexbuf)); + (* raise (CompilerError ("Parser error.")); *) with | CompilerError msg -> p_stderr msg; | Failure msg -> p_stderr ("Failure (internal error): " ^ msg); diff --git a/Common.ml b/Common.ml index 0a3908f302548ab0f23d2a5c2f97549bdd63a3b3..f95a37bdcae029d045974b5ee2fd83424c90825d 100644 --- a/Common.ml +++ b/Common.ml @@ -20,6 +20,7 @@ let ecit = "\'" (* Error handling *) +exception TypeError of string exception CompilerError of string exception UnMatched diff --git a/Lexer.mll b/Lexer.mll index 3f4818093e788e4794ea2f2c52331549e054cfbb..9811bf7f6ad2a9ea7eeb8319571aa85ef3b6d5e7 100644 --- a/Lexer.mll +++ b/Lexer.mll @@ -26,6 +26,7 @@ let digits = ['0'-'9']+ (* lexing rules *) rule lex = parse + | "IF" { IF } | "THEN" { THEN } | "ELSE" { ELSE } @@ -41,8 +42,13 @@ rule lex = parse | "=" { BEQ } | "<=" { BLE } + | "SINT" { SINT } + | "UINT32" { UINT32 } + | ';' { SC } + | ':' { C } | ":=" { ASSIGN } + | '+' { PLUS } | "+u" { PLUSU } | '-' { MINUS } @@ -56,6 +62,7 @@ rule lex = parse | "(*" { set_info lexbuf; comments 0 lexbuf } (* nested comment *) | '(' { LP } (* must come after comment *) | ')' { RP } + | eof { EOF } | _ { raise (SyntaxError("Unknown Symbol.")) } diff --git a/Parser.mly b/Parser.mly index ddc40db915642c34b47bf839fc1e3a87fb4bc1e5..af6f5ac1391500dde6302e177658c656ea0a180f 100644 --- a/Parser.mly +++ b/Parser.mly @@ -7,11 +7,21 @@ %token <int> INTVAL %token IF THEN ELSE END WHILE DO DONE %token TRUE FALSE AND NOT BEQ BLE -%token SC LP RP ASSIGN PLUS PLUSU MINUS +%token SINT UINT32 + +%token SC C LP RP +%token ASSIGN +%token PLUS PLUSU MINUS %token EOF +(* precedence and associativity according to C/Java/Rust? *) +%left SC +%left AND +%left MINUS, PLUS, PLUSU +%left NOT + %{ - open Imp__Imp + open T_Imp open Common open Env open State__State @@ -19,34 +29,51 @@ %start prog -%type <Imp__Imp.com> prog +%type <T_Imp.prog> prog %% - prog: - | com EOF { $1 } + | decl_span SC com_span EOF { Prog ($1, $3) } +decl_span: + | decl { ($1, ($startofs, $endofs)) } +decl: + | decl_span SC decl_span { Dseq ($1, $3) } + | ID C primtype { Ddecl ($1, $3) } + +primtype: + | SINT { Tsint } + | UINT32 { Tuint32 } + +com_span: + | com { ($1, ($startofs, $endofs)) } com: - | com SC com { Cseq ($1, $3) } - | ID ASSIGN aexpr { Cassign ($1, $3) } - | IF bexpr THEN com ELSE com END { Cif ($2, $4, $6) } - | IF bexpr THEN com END { Cif ($2, $4, Cskip) } - | WHILE bexpr DO com DONE { Cwhile ($2, $4) } + | com_span SC com_span { Cseq ($1, $3) } + | ID ASSIGN aexpr_span { Cassign ($1, $3) } + | IF bexpr_span THEN com_span + ELSE com_span END { Cif ($2, $4, $6) } + | IF bexpr_span THEN com_span END { Cif ($2, $4, (Cskip, (0, 0))) } + | WHILE bexpr_span DO com_span DONE { Cwhile ($2, $4) } +bexpr_span: + | bexpr { ($1, ($startofs, $endofs)) } bexpr: - | LP bexpr RP { $2 } - | TRUE { Btrue } - | FALSE { Bfalse } - | bexpr AND bexpr { Band ($1, $3) } - | NOT bexpr { Bnot ($2) } - | aexpr BEQ aexpr { Beq ($1, $3) } - | aexpr BLE aexpr { Ble ($1, $3) } + | LP bexpr RP { $2 } + | TRUE { Btrue } + | FALSE { Bfalse } + | bexpr_span AND bexpr_span { Band ($1, $3) } + | NOT bexpr_span { Bnot ($2) } + | aexpr_span BEQ aexpr_span { Beq ($1, $3) } + | aexpr_span BLE aexpr_span { Ble ($1, $3) } + +aexpr_span: + | aexpr { ($1, ($startofs, $endofs)) } aexpr: - | LP aexpr RP { $2 } - | INTVAL { Anum (Z.of_int $1) } - | ID { Avar $1 } - | aexpr PLUS aexpr { Aadd ($1, $3) } - | aexpr PLUSU aexpr { Aaddu ($1, $3) } - | aexpr MINUS aexpr { Asub ($1, $3) } + | LP aexpr RP { $2 } + | INTVAL { Anum (Z.of_int $1) } + | ID { Avar $1 } + | aexpr_span PLUS aexpr_span { Aadd ($1, $3) } + | aexpr_span PLUSU aexpr_span { Aaddu ($1, $3) } + | aexpr_span MINUS aexpr_span { Asub ($1, $3) } diff --git a/T_Check.ml b/T_Check.ml new file mode 100644 index 0000000000000000000000000000000000000000..3a2d3604f32c33db48e89d429d6c6814f7bab956 --- /dev/null +++ b/T_Check.ml @@ -0,0 +1,135 @@ +(* Copyright Per Lindgren 2016-2018, see the file "LICENSE" *) +(* for the full license governing this code. *) + +(* type checker *) +open T_Imp +open T_Dump +open Common +open State__State + + +module Imp = Imp__Imp + +let of_span inb (start, stop) = + let _ = seek_in inb start in + let s = really_input_string inb (stop - start) in + "<" ^ string_of_int start ^ ".." ^ string_of_int stop ^ "> " ^ s + +let unique_id chan (id1, (t1, s1)) (id2, (t2, s2)) = + if id1 = id2 then + raise (CompilerError("Dupclicate variable definition: " ^ + of_span chan s1 ^ " already declared at " ^ of_span chan s2)) + else () + +let rec idt_acc ch sp acc = function + | Dseq (d1, d2) -> (idt_acc_span ch (idt_acc_span ch acc d1) d2) + | Ddecl (id, t) -> List.iter (unique_id ch (id, (t, sp))) acc; + (id, (t, sp)) :: acc + +and idt_acc_span c acc (d, s) = idt_acc c s acc d + +let of_idt ch (id, (t,s)) = + of_id id ^ ":" ^ of_types t ^ of_span ch s + + +let tc_unify ch t1 s1 t2 s2 : types = + match t1, t2 with + | Tsint, Tsint -> Tsint + | Tuint32, Tuint32 -> Tuint32 + | Tint, t -> t + | t, Tint -> t + | _, _ -> + raise (TypeError( + "Type error: " ^ + of_span ch s1 ^ " : " ^ of_types t1 ^ ", does not match " ^ + of_span ch s2 ^ " : " ^ of_types t2 )) + +let get_id_type itl (id : id) : types * span = + try + List.assoc id itl + with + _ -> raise (TypeError("Undeclared identifier: " ^ Dump.pretty_of_id id)) + +let rec tc_aexpr ch itl (a, span) : Imp.aexpr * types = + (* try *) + match a with + | Anum n -> (Imp.Anum n, Tint) + | Avar id -> + let (t, _ ) = get_id_type itl id in + (Imp.Avar id, t) + | Aadd ((a1, a1_span), (a2, a2_span)) -> + + (* type check a1 against Tsint *) + let (ai1, t1) = tc_aexpr ch itl (a1, a1_span) in + let _ = tc_unify ch Tsint (0, 0) t1 a1_span in + + (* type check a2 against Tsint *) + let (ai2, t2) = tc_aexpr ch itl (a2, a2_span) in + let _ = tc_unify ch Tsint (0, 0) t2 a2_span in + + (Imp.Aadd(ai1, ai2), t1) + | Aaddu ((a1, a1_span), (a2, a2_span)) -> + + (* type check a1 against Tuint32 *) + let (ai1, t1) = tc_aexpr ch itl (a1, a1_span) in + let _ = tc_unify ch Tuint32 (0, 0) t1 a1_span in + + (* type check a2 against Tuint32 *) + let (ai2, t2) = tc_aexpr ch itl (a2, a2_span) in + let _ = tc_unify ch Tuint32 (0, 0) t2 a2_span in + + (Imp.Aaddu(ai1, ai2), t1) + + | Asub ((a1, a1_span), (a2, a2_span)) -> + + (* type check a1 against Tsint *) + let (ai1, t1) = tc_aexpr ch itl (a1, a1_span) in + let _ = tc_unify ch Tsint (0, 0) t1 a1_span in + + (* type check a2 against Tsint *) + let (ai2, t2) = tc_aexpr ch itl (a2, a2_span) in + let _ = tc_unify ch Tsint (0, 0) t2 a2_span in + + (Imp.Asub(ai1, ai2), t1) +(* with + | TypeError msg -> raise (TypeError (msg ^ nl ^ "in expression:" ^ of_span ch span )) *) + +let rec tc_bexpr ch itl (b, span) = + try + match b with + | Btrue -> Imp.Btrue + | Bfalse -> Imp.Bfalse + | Band (b1, b2) -> Imp.Band(tc_bexpr ch itl b1,tc_bexpr ch itl b2) + | Bnot b -> Imp.Bnot(tc_bexpr ch itl b) + | Beq ((a1, a1_span),(a2, a2_span)) -> + let (a1, t1) = tc_aexpr ch itl (a1, a1_span) in + let (a2, t2) = tc_aexpr ch itl (a2, a2_span) in + let _ = tc_unify ch t1 a1_span t2 a2_span in + Imp.Beq(a1, a2) + | Ble (a1, a2) -> Imp.Ble(imp_of_aexpr_span a1, imp_of_aexpr_span a2) + with + | TypeError msg -> raise (TypeError (msg ^ nl ^ "in expression:" ^ of_span ch span )) + + +let rec tc_com ch itl span com = + try + match com with + | Cseq (c1, c2) -> Imp.Cseq(tc_com_span ch itl c1, tc_com_span ch itl c2) + | Cassign (id, a) -> + let (_, a_span) = a in + let (a, ta) = tc_aexpr ch itl a in + let (tid, tid_span) = get_id_type itl id in + let _ = tc_unify ch ta a_span tid tid_span in + Imp.Cassign (id, a) + | Cif (b, c1, c2) -> Imp.Cif(tc_bexpr ch itl b, tc_com_span ch itl c1, tc_com_span ch itl c2) + | Cwhile (b, c) -> Imp.Cwhile(tc_bexpr ch itl b, tc_com_span ch itl c) + | Cskip -> Imp.Cskip + with + | TypeError msg -> raise (CompilerError (msg ^ nl ^ "in command: " ^ of_span ch span )) +and + tc_com_span ch itl (com, span) = tc_com ch itl span com + +let tc_prog ch (Prog (decl, com)) = + let itl = idt_acc_span ch [] decl in + tc_com_span ch itl com + diff --git a/T_Dump.ml b/T_Dump.ml new file mode 100644 index 0000000000000000000000000000000000000000..b6bab2306262860aff740690bac62602a0840ce4 --- /dev/null +++ b/T_Dump.ml @@ -0,0 +1,56 @@ +(* Copyright Per Lindgren 2016-2018, see the file "LICENSE" *) +(* for the full license governing this code. *) + +open State__State +open Vm__Vm +open Env +open Common +open T_Imp + +(* AST dump of T_Imp *) + +let of_types = function + | Tsint -> "SINT" + | Tuint32 -> "UINT32" + | Tint -> "INT" + +let of_id = function + | Id i -> "Id #" ^ Z.to_string i + +let rec of_aexpr = function + | Anum v -> "Anum " ^ Z.to_string v + | Avar id -> "Avar " ^ of_id id + | Aadd (e1, e2) -> "Aadd (" ^ of_aexpr_span e1 ^ ") (" ^ of_aexpr_span e2 ^ ")" + | Aaddu (e1, e2) -> "Aaddu (" ^ of_aexpr_span e1 ^ ") (" ^ of_aexpr_span e2 ^")" + | Asub (e1, e2) -> "Asub (" ^ of_aexpr_span e1 ^ ") (" ^ of_aexpr_span e2 ^")" +and of_aexpr_span (e, (start, stop)) = + "< (" ^ string_of_int start ^ ", " ^ string_of_int stop ^ + ") " ^ of_aexpr e ^ ">" + +let rec of_bexpr = function + | Btrue -> "Btrue" + | Bfalse -> "Bfalse" + | Band (b1, b2) -> "Band (" ^ of_bexpr_span b1 ^ ") (" ^ of_bexpr_span b2 ^ ")" + | Bnot b -> "Bnot (" ^ of_bexpr_span b ^ ")" + | Beq (e1, e2) -> "Beq (" ^ of_aexpr_span e1 ^ ") (" ^ of_aexpr_span e2 ^ ")" + | Ble (e1, e2) -> "Ble (" ^ of_aexpr_span e1 ^ ") (" ^ of_aexpr_span e2 ^ ")" +and of_bexpr_span (e, s) = of_bexpr e + +let rec of_com = function + | Cskip -> "Cskip" + | Cassign (id, a) -> "Cassign " ^ of_id id ^ " " ^ of_aexpr_span a + | Cseq (c1, c2) -> "Cseq (" ^ of_com_span c1 ^ ") (" ^ of_com_span c2 ^ ")" + | Cif (b, c1, c2) -> "Cif (" ^ of_bexpr_span b ^ ") (" ^ of_com_span c1 ^ " " ^ of_com_span c2 ^ ")" + | Cwhile (b, c) -> "Cwhile (" ^ of_bexpr_span b ^ ") (" ^ of_com_span c ^ ")" +and of_com_span (c, (start, stop)) = + "< (" ^ string_of_int start ^ ", " ^ string_of_int stop ^ + ") " ^ of_com c ^ ">" + +let rec of_decl = function + | Dseq (d1, d2) -> "Dseq (" ^ of_decl_span d1 ^ ", " ^ of_decl_span d2 ^ ")" + | Ddecl (id, t) -> "Ddecl (" ^ of_id id ^ ", " ^ of_types t ^ ")" +and of_decl_span (d,_) = of_decl d + +let of_prog = function + | Prog (d, c) -> of_decl_span d ^ of_com_span c + diff --git a/T_Imp.ml b/T_Imp.ml new file mode 100644 index 0000000000000000000000000000000000000000..e99d71a021f6653bb75aeb4994f5686535e6a1a2 --- /dev/null +++ b/T_Imp.ml @@ -0,0 +1,76 @@ + +module Imp = Imp__Imp + +type types = + | Tsint + | Tuint32 + | Tint (* int litterals can be coersed to both Tsint and Tuint32 *) + +type span = int * int + +type aexpr = + | Anum of (Z.t) + | Avar of State__State.id + | Aadd of aexpr_span * aexpr_span + | Aaddu of aexpr_span * aexpr_span + | Asub of aexpr_span * aexpr_span +and + aexpr_span = aexpr * span + +type bexpr = + | Btrue + | Bfalse + | Band of bexpr_span * bexpr_span + | Bnot of bexpr_span + | Beq of aexpr_span * aexpr_span + | Ble of aexpr_span * aexpr_span +and bexpr_span = bexpr * span + +type com = + | Cskip + | Cseq of com_span * com_span + | Cassign of State__State.id * aexpr_span + | Cif of bexpr_span * com_span * com_span + | Cwhile of bexpr_span * com_span +and com_span = com * span + +type decl = + | Dseq of decl_span * decl_span + | Ddecl of State__State.id * types +and decl_span = decl * span + +type prog = Prog of decl_span * com_span + +(* Convert to Imp *) +let rec imp_of_aexpr = function + | Anum n -> Imp.Anum n + | Avar id -> Imp.Avar id + | Aadd (a1, a2) -> Imp.Aadd(imp_of_aexpr_span a1, imp_of_aexpr_span a2) + | Aaddu (a1, a2) -> Imp.Aaddu(imp_of_aexpr_span a1, imp_of_aexpr_span a2) + | Asub (a1, a2) -> Imp.Asub(imp_of_aexpr_span a1, imp_of_aexpr_span a2) +and + imp_of_aexpr_span (a, _) = imp_of_aexpr a + +let rec imp_of_bexpr = function + | Btrue -> Imp.Btrue + | Bfalse -> Imp.Bfalse + | Band (b1, b2) -> Imp.Band(imp_of_bexpr_span b1,imp_of_bexpr_span b2) + | Bnot b -> Imp.Bnot(imp_of_bexpr_span b) + | Beq (a1, a2) -> Imp.Beq(imp_of_aexpr_span a1, imp_of_aexpr_span a2) + | Ble (a1, a2) -> Imp.Ble(imp_of_aexpr_span a1, imp_of_aexpr_span a2) +and + imp_of_bexpr_span (b, _) = imp_of_bexpr b + +let rec imp_of_com = function + | Cskip -> Imp.Cskip + | Cseq (c1, c2) -> Imp.Cseq(imp_of_com_span c1,imp_of_com_span c2) + | Cassign (id, a) -> Imp.Cassign (id, imp_of_aexpr_span a) + | Cif (b, c1, c2) -> Imp.Cif(imp_of_bexpr_span b, imp_of_com_span c1, imp_of_com_span c2) + | Cwhile (b, c) -> Imp.Cwhile(imp_of_bexpr_span b, imp_of_com_span c) +and + imp_of_com_span (c, _) = imp_of_com c + + + + +