From 8c4e248cd89c0054a02ff7b7440edb63f66e23e0 Mon Sep 17 00:00:00 2001 From: rossberg-chromium Date: Wed, 27 Apr 2016 15:19:15 +0200 Subject: [PATCH 1/2] Allow binary modules in text format --- ml-proto/README.md | 10 +++++--- ml-proto/given/lib.ml | 9 +++++++ ml-proto/given/lib.mli | 5 ++++ ml-proto/host/format.ml | 33 ++++++++++++++++-------- ml-proto/host/parse.ml | 2 +- ml-proto/host/parse.mli | 4 +-- ml-proto/host/parser.mly | 18 ++++++++++--- ml-proto/host/run.ml | 12 ++++++++- ml-proto/host/script.ml | 54 +++++++++++++++++++++++++++++++-------- ml-proto/host/script.mli | 12 ++++++--- ml-proto/spec/decode.mli | 2 +- ml-proto/test/binary.wast | 14 ++++++++++ 12 files changed, 138 insertions(+), 37 deletions(-) create mode 100644 ml-proto/test/binary.wast diff --git a/ml-proto/README.md b/ml-proto/README.md index 40898953fc..b426ebf63c 100644 --- a/ml-proto/README.md +++ b/ml-proto/README.md @@ -147,21 +147,23 @@ param: ( param * ) | ( param ) result: ( result ) local: ( local * ) | ( local ) -module: ( module * * * * * ? ? ) +module: ( module * * * *
* ? ? ) | (module +) type: ( type ? ( func * ? ) ) import: ( import ? (param * ) (result )* ) export: ( export ) | ( export memory) start: ( start ) table: ( table * ) memory: ( memory ? * ) -segment: ( segment ) +segment: ( segment + ) ``` Here, productions marked with respective comments are abbreviation forms for equivalent expansions (see the explanation of the kernel AST below). Any form of naming via `` and `` (including expression labels) is merely notational convenience of this text format. The actual AST has no names, and all bindings are referred to via ordered numeric indices; consequently, names are immediately resolved in the parser and replaced by indices. Indices can also be used directly in the text format. -The segment string in the memory field is used to initialize the memory at the given offset. +A module of the form `(module +)` is given in binary form and will be decoded from the (concatenation of the) strings. + +The segment strings in the memory field are used to initialize the consecutive memory at the given offset. Comments can be written in one of two ways: @@ -189,7 +191,7 @@ cmd: ( assert_trap (invoke * ) ) ;; assert invocation traps with given failure string ( assert_invalid ) ;; assert invalid module with given failure string ( input ) ;; read script or module from file - ( output ) ;; output module to file + ( output ? ) ;; output module to stout or file ``` Commands are executed in sequence. Invocation, assertions, and output apply to the most recently defined module (the _current_ module), and are only possible after a module has been defined. Note that there only ever is one current module, the different module definitions cannot interact. diff --git a/ml-proto/given/lib.ml b/ml-proto/given/lib.ml index a7408a312f..789fe856ca 100644 --- a/ml-proto/given/lib.ml +++ b/ml-proto/given/lib.ml @@ -64,3 +64,12 @@ struct if x < 0L then failwith "is_power_of_two"; x <> 0L && (Int64.logand x (Int64.sub x 1L)) = 0L end + +module String = +struct + let breakup s n = + let rec loop i = + let len = min n (String.length s - i) in + if len = 0 then [] else String.sub s i len :: loop (i + len) + in loop 0 +end diff --git a/ml-proto/given/lib.mli b/ml-proto/given/lib.mli index cef55599b4..97b54f23e5 100644 --- a/ml-proto/given/lib.mli +++ b/ml-proto/given/lib.mli @@ -28,3 +28,8 @@ module Int64 : sig val is_power_of_two : int64 -> bool end + +module String : +sig + val breakup : string -> int -> string list +end diff --git a/ml-proto/host/format.ml b/ml-proto/host/format.ml index 1c0072594f..ffce4cecd2 100644 --- a/ml-proto/host/format.ml +++ b/ml-proto/host/format.ml @@ -10,7 +10,21 @@ open Sexpr let int = string_of_int let int32 = Int32.to_string let int64 = Int64.to_string -let string s = "\"" ^ String.escaped s ^ "\"" + +let string s = + let buf = Buffer.create (String.length s + 2) in + Buffer.add_char buf '\"'; + for i = 0 to String.length s - 1 do + let c = s.[i] in + if c = '\"' then + Buffer.add_string buf "\\\"" + else if '\x20' <= c && c < '\x7f' then + Buffer.add_char buf c + else + Buffer.add_string buf (Printf.sprintf "\\%02x" (Char.code c)); + done; + Buffer.add_char buf '\"'; + Buffer.contents buf let list_of_opt = function None -> [] | Some x -> [x] @@ -221,12 +235,10 @@ and block e = (* Functions *) -let func m f = +let func f = let {ftype; locals; body} = f.it in - let {ins; out} = List.nth m.it.types ftype.it in Node ("func", - decls "param" ins @ - decls "result" (list_of_opt out) @ + [Node ("type " ^ var ftype, [])] @ decls "local" locals @ block body ) @@ -240,7 +252,8 @@ let table xs = tab "table" (atom var) xs let segment seg = let {Memory.addr; data} = seg.it in - Node ("segment " ^ int64 addr, [atom string data]) + let ss = Lib.String.breakup data (!Flags.width / 2) in + Node ("segment " ^ int64 addr, list (atom string) ss) let memory mem = let {min; max; segments} = mem.it in @@ -269,10 +282,10 @@ let module_ m = Node ("module", list typedef m.it.types @ list import m.it.imports @ - list export m.it.exports @ - list (func m) m.it.funcs @ - opt start m.it.start @ + list func m.it.funcs @ table m.it.table @ - opt memory m.it.memory + opt memory m.it.memory @ + list export m.it.exports @ + opt start m.it.start ) diff --git a/ml-proto/host/parse.ml b/ml-proto/host/parse.ml index 09f3f3f3b5..f0be33701c 100644 --- a/ml-proto/host/parse.ml +++ b/ml-proto/host/parse.ml @@ -1,5 +1,5 @@ type 'a start = - | Module : Ast.module_ start + | Module : Script.definition start | Script : Script.script start | Script1 : Script.script start diff --git a/ml-proto/host/parse.mli b/ml-proto/host/parse.mli index afba014fa9..46e7e67c63 100644 --- a/ml-proto/host/parse.mli +++ b/ml-proto/host/parse.mli @@ -1,5 +1,5 @@ type 'a start = - | Module : Ast.module_ start + | Module : Script.definition start | Script : Script.script start | Script1 : Script.script start @@ -8,4 +8,4 @@ exception Syntax of Source.region * string val parse : string -> Lexing.lexbuf -> 'a start -> 'a (* raise Syntax *) val string_to_script : string -> Script.script (* raise Syntax *) -val string_to_module : string -> Ast.module_ (* raise Syntax *) +val string_to_module : string -> Script.definition (* raise Syntax *) diff --git a/ml-proto/host/parser.mly b/ml-proto/host/parser.mly index e72a360693..c0b6d5f15e 100644 --- a/ml-proto/host/parser.mly +++ b/ml-proto/host/parser.mly @@ -158,10 +158,17 @@ let implicit_decl c t at = %start script script1 module1 %type script %type script1 -%type module1 +%type module1 %% +/* Auxiliaries */ + +text_list : + | TEXT { $1 } + | text_list TEXT { $1 ^ $2 } +; + /* Types */ value_type_list : @@ -328,7 +335,7 @@ start : { fun c -> $3 c func } segment : - | LPAR SEGMENT INT TEXT RPAR + | LPAR SEGMENT INT text_list RPAR { {Memory.addr = Int64.of_string $3; Memory.data = $4} @@ at () } ; segment_list : @@ -412,7 +419,9 @@ module_fields : {m with start = Some ($1 c)} } ; module_ : - | LPAR MODULE module_fields RPAR { $3 (empty_context ()) @@ at () } + | LPAR MODULE module_fields RPAR + { Textual ($3 (empty_context ()) @@ at ()) @@ at() } + | LPAR MODULE text_list RPAR { Binary $3 @@ at() } ; @@ -429,7 +438,8 @@ cmd : | LPAR ASSERT_TRAP LPAR INVOKE TEXT const_list RPAR TEXT RPAR { AssertTrap ($5, $6, $8) @@ at () } | LPAR INPUT TEXT RPAR { Input $3 @@ at () } - | LPAR OUTPUT TEXT RPAR { Output $3 @@ at () } + | LPAR OUTPUT TEXT RPAR { Output (Some $3) @@ at () } + | LPAR OUTPUT RPAR { Output None @@ at () } ; cmd_list : | /* empty */ { [] } diff --git a/ml-proto/host/run.ml b/ml-proto/host/run.ml index c3410bb856..d545e7d5ac 100644 --- a/ml-proto/host/run.ml +++ b/ml-proto/host/run.ml @@ -42,7 +42,9 @@ let run_sexpr name lexbuf start = let run_binary name buf = let open Source in run_from - (fun _ -> let m = Decode.decode name buf in [Script.Define m @@ m.at]) + (fun _ -> + let m = Decode.decode name buf in + [Script.Define (Script.Textual m @@ m.at) @@ m.at]) let run_sexpr_file file = Script.trace ("Loading (" ^ file ^ ")..."); @@ -113,6 +115,12 @@ let rec run_stdin () = (* Output *) +let print_stdout m = + Script.trace "Formatting..."; + let sexpr = Format.module_ (Desugar.desugar m) in + Script.trace "Printing..."; + Sexpr.output stdout !Flags.width sexpr + let create_sexpr_file file m = Script.trace ("Formatting (" ^ file ^ ")..."); let sexpr = Format.module_ (Desugar.desugar m) in @@ -134,4 +142,6 @@ let create_binary_file file m = with exn -> close_out oc; raise exn let create_file = dispatch_file_ext create_sexpr_file create_binary_file + let () = Script.output_file := create_file +let () = Script.output_stdout := print_stdout diff --git a/ml-proto/host/script.ml b/ml-proto/host/script.ml index 5327b7bdd8..81712f8a1e 100644 --- a/ml-proto/host/script.ml +++ b/ml-proto/host/script.ml @@ -3,16 +3,21 @@ open Source (* Script representation *) +type definition = definition' Source.phrase +and definition' = + | Textual of Ast.module_ + | Binary of string + type command = command' Source.phrase and command' = - | Define of Ast.module_ + | Define of definition | Invoke of string * Kernel.literal list - | AssertInvalid of Ast.module_ * string + | AssertInvalid of definition * string | AssertReturn of string * Kernel.literal list * Kernel.literal option | AssertReturnNaN of string * Kernel.literal list | AssertTrap of string * Kernel.literal list * string | Input of string - | Output of string + | Output of string option type script = command list @@ -44,10 +49,19 @@ let get_instance at = match !current_instance with let input_file = ref (fun _ -> assert false) let output_file = ref (fun _ -> assert false) +let output_stdout = ref (fun _ -> assert false) + +let run_def def = + match def.it with + | Textual m -> m + | Binary bs -> + trace "Decoding..."; + Decode.decode "binary" bs let run_cmd cmd = match cmd.it with - | Define m -> + | Define def -> + let m = run_def def in let m' = Desugar.desugar m in trace "Checking..."; Check.check_module m'; @@ -66,11 +80,14 @@ let run_cmd cmd = let v = Eval.invoke m name (List.map it es) in if v <> None then Print.print_value v - | AssertInvalid (m, re) -> + | AssertInvalid (def, re) -> trace "Asserting invalid..."; - let m' = Desugar.desugar m in - (match Check.check_module m' with - | exception Check.Invalid (_, msg) -> + (match + let m = run_def def in + let m' = Desugar.desugar m in + Check.check_module m' + with + | exception (Decode.Code (_, msg) | Check.Invalid (_, msg)) -> if not (Str.string_match (Str.regexp re) msg 0) then begin print_endline ("Result: \"" ^ msg ^ "\""); print_endline ("Expect: \"" ^ re ^ "\""); @@ -126,13 +143,25 @@ let run_cmd cmd = (try if not (!input_file file) then Abort.error cmd.at "aborting" with Sys_error msg -> IO.error cmd.at msg) - | Output file -> + | Output (Some file) -> (try !output_file file (get_module cmd.at) with Sys_error msg -> IO.error cmd.at msg) + | Output None -> + (try !output_stdout (get_module cmd.at) + with Sys_error msg -> IO.error cmd.at msg) + +let dry_def def = + match def.it with + | Textual m -> m + | Binary bs -> + trace "Decoding..."; + Decode.decode "binary" bs + let dry_cmd cmd = match cmd.it with - | Define m -> + | Define def -> + let m = dry_def def in let m' = Desugar.desugar m in trace "Checking..."; Check.check_module m'; @@ -144,9 +173,12 @@ let dry_cmd cmd = | Input file -> (try if not (!input_file file) then Abort.error cmd.at "aborting" with Sys_error msg -> IO.error cmd.at msg) - | Output file -> + | Output (Some file) -> (try !output_file file (get_module cmd.at) with Sys_error msg -> IO.error cmd.at msg) + | Output None -> + (try !output_stdout (get_module cmd.at) + with Sys_error msg -> IO.error cmd.at msg) | Invoke _ | AssertInvalid _ | AssertReturn _ diff --git a/ml-proto/host/script.mli b/ml-proto/host/script.mli index 4392f3b61e..a745aa162b 100644 --- a/ml-proto/host/script.mli +++ b/ml-proto/host/script.mli @@ -1,13 +1,18 @@ +type definition = definition' Source.phrase +and definition' = + | Textual of Ast.module_ + | Binary of string + type command = command' Source.phrase and command' = - | Define of Ast.module_ + | Define of definition | Invoke of string * Kernel.literal list - | AssertInvalid of Ast.module_ * string + | AssertInvalid of definition * string | AssertReturn of string * Kernel.literal list * Kernel.literal option | AssertReturnNaN of string * Kernel.literal list | AssertTrap of string * Kernel.literal list * string | Input of string - | Output of string + | Output of string option type script = command list @@ -23,3 +28,4 @@ val trace : string -> unit val input_file : (string -> bool) ref val output_file : (string -> Ast.module_ -> unit) ref +val output_stdout : (Ast.module_ -> unit) ref diff --git a/ml-proto/spec/decode.mli b/ml-proto/spec/decode.mli index fc7a453a96..30a731b90b 100644 --- a/ml-proto/spec/decode.mli +++ b/ml-proto/spec/decode.mli @@ -1,3 +1,3 @@ exception Code of Source.region * string -val decode : string -> bytes -> Ast.module_ (* raise Code *) +val decode : string -> string -> Ast.module_ (* raise Code *) diff --git a/ml-proto/test/binary.wast b/ml-proto/test/binary.wast new file mode 100644 index 0000000000..f9c42f1d81 --- /dev/null +++ b/ml-proto/test/binary.wast @@ -0,0 +1,14 @@ +(module "\00asm\0b\00\00\00") +(module "\00asm" "\0b\00\00\00") + +(assert_invalid (module "") "unexpected end") +(assert_invalid (module "\01") "unexpected end") +(assert_invalid (module "\00as") "unexpected end") +(assert_invalid (module "\01") "unexpected end") +(assert_invalid (module "asm\00") "magic header not detected") + +(assert_invalid (module "\00asm") "unexpected end") +(assert_invalid (module "\00asm\0b") "unexpected end") +(assert_invalid (module "\00asm\0b\00\00") "unexpected end") +(assert_invalid (module "\00asm\10\00\00\00") "unknown binary version") + From 5f2ebcc84900bc3515f24e825b0c88932c2d9674 Mon Sep 17 00:00:00 2001 From: rossberg-chromium Date: Wed, 27 Apr 2016 15:32:43 +0200 Subject: [PATCH 2/2] Output indices at type and function binders --- ml-proto/host/format.ml | 21 ++++++++++++--------- 1 file changed, 12 insertions(+), 9 deletions(-) diff --git a/ml-proto/host/format.ml b/ml-proto/host/format.ml index ffce4cecd2..5443d2591c 100644 --- a/ml-proto/host/format.ml +++ b/ml-proto/host/format.ml @@ -29,6 +29,7 @@ let string s = let list_of_opt = function None -> [] | Some x -> [x] let list f xs = List.map f xs +let listi f xs = List.mapi f xs let opt f xo = list f (list_of_opt xo) let tab head f xs = if xs = [] then [] else [Node (head, list f xs)] @@ -235,9 +236,9 @@ and block e = (* Functions *) -let func f = +let func i f = let {ftype; locals; body} = f.it in - Node ("func", + Node ("func $" ^ string_of_int i, [Node ("type " ^ var ftype, [])] @ decls "local" locals @ block body @@ -262,13 +263,15 @@ let memory mem = (* Modules *) -let typedef t = - Node ("type", [struct_type t]) +let typedef i t = + Node ("type $" ^ string_of_int i, [struct_type t]) -let import im = +let import i im = let {itype; module_name; func_name} = im.it in let ty = Node ("type " ^ var itype, []) in - Node ("import", [atom string module_name; atom string func_name; ty]) + Node ("import $" ^ string_of_int i, + [atom string module_name; atom string func_name; ty] + ) let export ex = let {name; kind} = ex.it in @@ -280,9 +283,9 @@ let export ex = let module_ m = Node ("module", - list typedef m.it.types @ - list import m.it.imports @ - list func m.it.funcs @ + listi typedef m.it.types @ + listi import m.it.imports @ + listi func m.it.funcs @ table m.it.table @ opt memory m.it.memory @ list export m.it.exports @