Skip to content

Stack machine semantics #323

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 51 commits into from
Sep 9, 2016
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
51 commits
Select commit Hold shift + click to select a range
5623731
Allow nullary blocks
rossberg Jun 22, 2016
c25d4ac
Allow n-ary loop
rossberg Jun 22, 2016
c062222
Allow n-ary if
rossberg Jun 22, 2016
97c2443
Allow n-ary func bodies
rossberg Jun 22, 2016
27f4e1b
Make return primitive
rossberg Jun 22, 2016
d23a769
Make memory operators primitive
rossberg Jun 22, 2016
0a6d707
Make loop's break label primitive
rossberg Jun 22, 2016
b155302
Stack kernel
rossberg Jul 8, 2016
a706df5
Adapt AST
rossberg Jul 8, 2016
f692d30
Supprt raw stack syntax
rossberg Jul 8, 2016
a227c7d
Tiny test of stack input
rossberg Jul 8, 2016
7200155
Merge branch 'binary-0xc' into stack
rossberg Jul 12, 2016
c7ed3f6
Adjust negative tests
rossberg Jul 12, 2016
72352ac
Remove break label from loops
rossberg Jul 12, 2016
64d4132
Make If block semantics primitive
rossberg Jul 12, 2016
96f233a
Reunify ASTs
rossberg Jul 12, 2016
8ca2e77
Clean up naming conventions
rossberg Jul 13, 2016
d8f7bd1
Remove some code duplication
rossberg Jul 13, 2016
2647de7
Adapt encoder
rossberg Jul 13, 2016
4dd20ef
Adjust text conversion
rossberg Jul 13, 2016
67718f7
Sketch of formal spec
rossberg Jul 13, 2016
21ceedf
Formal rules for calls, returns, locals
rossberg Jul 14, 2016
7353bc6
Convert calls to small-step
rossberg Jul 15, 2016
dd5dd72
New tests for stack machine
rossberg Jul 18, 2016
90e7a47
Dead code is dead to the spec
rossberg Jul 18, 2016
8a04907
Don't type unreachable operators; simplify typing
rossberg Aug 4, 2016
a0d777c
Clean up arity checking
rossberg Aug 5, 2016
1b364d9
Merge branch 'binary-0xc' into stack
rossberg Aug 16, 2016
da5f917
Merge branch 'binary-0xc' into stack
rossberg Aug 16, 2016
d7f4d02
Tweak S-expr grammar
rossberg Aug 23, 2016
6e8d71e
Eliminate second loop label
rossberg Aug 24, 2016
7b9f237
Tweak
rossberg Aug 24, 2016
d924eff
Tweak
rossberg Aug 24, 2016
4dc09e3
Simplify memop
rossberg Aug 24, 2016
cf2d51d
Rename expressions to instructions
rossberg Aug 24, 2016
812b5a0
Added todos
rossberg Aug 24, 2016
527d36e
Merge branch 'binary-0xc' into stack
rossberg Aug 24, 2016
ceec18c
Merge branch 'binary-0xc' into stack
rossberg Aug 24, 2016
4a63a5c
Tweak error message
rossberg Aug 30, 2016
0acc8b0
Numeric section names (PR 740)
rossberg Sep 2, 2016
2482e44
Move element section before code section (PR 779)
rossberg Sep 2, 2016
fb3be97
Require END opcode for functions; simplify streams
rossberg Sep 2, 2016
ab65e4d
Check length & value of var(u)ints
rossberg Sep 5, 2016
025fa0b
Abstract length limit
rossberg Sep 5, 2016
cddb36b
Make variables i32
rossberg Sep 5, 2016
9e4b6a9
Remove kernel.ml
rossberg Sep 8, 2016
5440555
Eliminate administrative expressions, in preparation of import/export…
rossberg Sep 8, 2016
de0af62
Merge branch 'binary-0xc' into stack; resolve many conflicts with imp…
rossberg Sep 9, 2016
f99e5ac
Fix remaining merge fall-out
rossberg Sep 9, 2016
47420c8
Merge branch 'mo-0xc' into stack
rossberg Sep 9, 2016
a7a5fef
Merge branch 'binary-0xc' into stack
rossberg Sep 9, 2016
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
99 changes: 58 additions & 41 deletions ml-proto/README.md
Original file line number Diff line number Diff line change
Expand Up @@ -116,39 +116,54 @@ unop: ctz | clz | popcnt | ...
binop: add | sub | mul | ...
relop: eq | ne | lt | ...
sign: s|u
offset: offset=<uint>
offset: offset=<nat>
align: align=(1|2|4|8|...)
cvtop: trunc_s | trunc_u | extend_s | extend_u | ...

expr:
( nop )
( block <name>? <expr>* )
( loop <name>? <name>? <expr>* ) ;; = (block <name>? (loop <name>? (block <expr>*)))
( select <expr> <expr> <expr> )
( if <expr> ( then <name>? <expr>* ) ( else <name>? <expr>* )? )
( if <expr> <expr> <expr>? ) ;; = (if <expr> (then <expr>) (else <expr>?))
( br <var> <expr>? )
( br_if <var> <expr>? <expr> )
( br_table <var> <var> <expr>? <expr> )
( return <expr>? ) ;; = (br <current_depth> <expr>?)
( call <var> <expr>* )
( call_import <var> <expr>* )
( call_indirect <var> <expr> <expr>* )
( get_local <var> )
( set_local <var> <expr> )
( <type>.load((8|16|32)_<sign>)? <offset>? <align>? <expr> )
( <type>.store(8|16|32)? <offset>? <align>? <expr> <expr> )
( <type>.const <value> )
( <type>.<unop> <expr> )
( <type>.<binop> <expr> <expr> )
( <type>.<relop> <expr> <expr> )
( <type>.<cvtop>/<type> <expr> )
( unreachable )
( current_memory )
( grow_memory <expr> )

func: ( func <name>? <func_sig> <local>* <expr>* )
( func <name>? ( export <string> ) <func_sig> <local>* <expr>* ) ;; = (export <string> (func <N>) (func <name>? <func_sig> <local>* <expr>*)
( <op> )
( <op> <expr>+ ) ;; = <expr>+ (<op>)
( block <name>? <instr>* )
( loop <name>? <instr>* )
( if ( then <name>? <instr>* ) ( else <name>? <instr>* )? )
( if <expr> ( then <name>? <instr>* ) ( else <name>? <instr>* )? ) ;; = (if <expr> (then <name>? <instr>*) (else <name>? <instr>*)?)
( if <expr> <expr> <expr>? ) ;; = (if <expr> (then <expr>) (else <expr>?))

instr:
<expr>
<op> ;; = (<op>)
block <name>? <instr>* end ;; = (block <name>? <instr>*)
loop <name>? <instr>* end ;; = (loop <name>? <instr>*)
if <name>? <instr>* end ;; = (if (then <name>? <instr>*))
if <name>? <instr>* else <name>? <instr>* end ;; = (if (then <name>? instr>*) (else <name>? <instr>*))

op:
unreachable
nop
drop
select
br <var>
br_if <var>
br_table <var>+
return
call <var>
call_indirect <var>
get_local <var>
set_local <var>
tee_local <var>
<type>.const <value>
<type>.<unop>
<type>.<binop>
<type>.<testop>
<type>.<relop>
<type>.<cvtop>/<type>
<type>.load((8|16|32)_<sign>)? <offset>? <align>?
<type>.store(8|16|32)? <offset>? <align>?
current_memory
grow_memory

func: ( func <name>? <func_sig> <local>* <instr>* )
( func <name>? ( export <string> ) <func_sig> <local>* <instrr>* ) ;; = (export <string> (func <N>) (func <name>? <func_sig> <local>* <instr>*)
( func <name>? ( import <string> <string> ) <func_sig>) ;; = (import <name>? <string> <string> (func <func_sig>))
param: ( param <type>* ) | ( param <name> <type> )
result: ( result <type> )
Expand All @@ -166,12 +181,14 @@ table: ( table <name>? <table_sig> )
( table <name>? ( export <string> ) <table_sig> ) ;; = (export <string> (table <N>)) (table <name>? <table_sig>)
( table <name>? ( import <string> <string> ) <table_sig> ) ;; = (import <name>? <string> <string> (table <table_sig>))
( table <name>? ( export <string> )? <elem_type> ( elem <var>* ) ) ;; = (table <name>? ( export <string> )? <size> <size> <elem_type>) (elem (i32.const 0) <var>*)
elem: ( elem <expr> <var>* )
elem: ( elem <var>? (offset <instr>* ) <var>* )
( elem <var>? <expr> <var>* ) ;; = (elem <var>? (offset <expr>) <var>*)
memory: ( memory <name>? <memory_sig> )
( memory <name>? ( export <string> ) <memory_sig> ) ;; = (export <string> (memory <N>)) (memory <name>? <memory_sig>)
( memory <name>? ( import <string> <string> ) <memory_sig> ) ;; = (import <name>? <string> <string> (memory <memory_sig>))
( memory <name>? ( export <string> )? ( data <string>* ) ;; = (memory <name>? ( export <string> )? <size> <size>) (data (i32.const 0) <string>*)
data: ( data <expr> <string>* )
data: ( data <var>? ( offset <instr>* ) <string>* )
( data <var>? <expr> <string>* ) ;; = (data <var>? (offset <expr>) <string>*)

start: ( start <var> )

Expand All @@ -192,7 +209,9 @@ module: ( module <name>? <typedef>* <func>* <import>* <export>* <table>? <memor
( module <name>? <string>+ )
```

Here, productions marked with respective comments are abbreviation forms for equivalent expansions (see the explanation of the kernel AST below).
Here, productions marked with respective comments are abbreviation forms for equivalent expansions (see the explanation of the AST below).
In particular, WebAssembly is a stack machine, so that all expressions of the form `(<op> <expr>+)` are merely abbreviations of a corresponding post-order sequence of instructions.
For raw instructions, the syntax allows omitting the parentheses around the operator name and its immediate operands. In the case of control operators (`block`, `loop`, `if`), this requires marking the end of the nested sequence with an explicit `end` keyword.

Any form of naming via `<name>` and `<var>` (including expression labels) is merely notational convenience of this text format. The actual AST has no names, and all bindings are referred to via ordered numeric indices; consequently, names are immediately resolved in the parser and replaced by indices. Indices can also be used directly in the text format.

Expand Down Expand Up @@ -223,7 +242,7 @@ cmd:
<module> ;; define, validate, and initialize module
<action> ;; perform action and print results
( register <string> <name>? ) ;; register module for imports
( assert_return <action> <expr>? ) ;; assert action has expected results
( assert_return <action> <expr>* ) ;; assert action has expected results
( assert_return_nan <action> ) ;; assert action results in NaN
( assert_trap <action> <failure> ) ;; assert action traps with given failure string
( assert_invalid <module> <failure> ) ;; assert module is invalid with given failure string
Expand All @@ -247,13 +266,11 @@ Again, this is only a meta-level for testing, and not a part of the language pro
The interpreter also supports a "dry" mode (flag `-d`), in which modules are only validated. In this mode, `invoke` commands are ignored (and not needed).


## Abstract Syntax and Kernel Syntax
## Abstract Syntax

The abstract WebAssembly syntax, as described above and in the [design doc](https://github.com/WebAssembly/design/blob/master/AstSemantics.md), is defined in [ast.ml](https://github.com/WebAssembly/spec/blob/master/ml-proto/spec/ast.ml).

However, to simplify the implementation, this AST representation is first "desugared" into a more minimal <i>kernel</i> language that is a subset of the full language. For example, conditionals with no else-branch are desugared into conditionals with `nop` for their else-branch, such that in the kernel language, all conditionals have two branches. The desugaring rules are sketched in the comments of the S-expression grammar given above.

The representation for that kernel language AST is defined in [kernel.ml](https://github.com/WebAssembly/spec/blob/master/ml-proto/spec/kernel.ml). Besides having fewer constructs, it also raises the level of abstraction further, e.g., by grouping related operators, or decomposing the syntactic structure of operators themselves.
However, to simplify the implementation, this AST representation represents some of the inner structure of the operators more explicitly. The mapping from the operators as given in the design doc to their structured form is defined in [operators.ml](https://github.com/WebAssembly/spec/blob/master/ml-proto/spec/operators.ml).


## Implementation
Expand All @@ -268,17 +285,17 @@ The implementation is split into three directories:

The implementation consists of the following parts:

* *Abstract Syntax* (`ast.ml`, `kernel.ml`, `types.ml`, `source.ml[i]`). Notably, the `phrase` wrapper type around each AST node carries the source position information.
* *Abstract Syntax* (`ast.ml`, `operators.ml`, `types.ml`, `source.ml[i]`). Notably, the `phrase` wrapper type around each AST node carries the source position information.

* *Parser* (`lexer.mll`, `parser.mly`, `desguar.ml[i]`). Generated with ocamllex and ocamlyacc. The lexer does the opcode encoding (non-trivial tokens carry e.g. type information as semantic values, as declared in `parser.mly`), the parser the actual S-expression parsing. The parser generates a full AST that is desugared into the kernel AST in a separate pass.
* *Parser* (`lexer.mll`, `parser.mly`). Generated with ocamllex and ocamlyacc. The lexer does the opcode encoding (non-trivial tokens carry e.g. type information as semantic values, as declared in `parser.mly`), the parser the actual S-expression parsing.

* *Pretty Printer* (`arrange.ml[i]`, `sexpr.ml[i]`). Turns a module AST back into the textual S-expression format.

* *Decoder*/*Encoder* (`decode.ml[i]`, `encode.ml[i]`). The former parses the binary format and turns it into an AST, the latter does the inverse.

* *Validator* (`check.ml[i]`). Does a recursive walk of the kernel AST, passing down the *expected* type for expressions, and checking each expression against that. An expected empty type can be matched by any result, corresponding to implicit dropping of unused values (e.g. in a block).
* *Validator* (`check.ml[i]`). Does a recursive walk of the AST, passing down the *expected* type for expressions, and checking each expression against that. An expected empty type can be matched by any result, corresponding to implicit dropping of unused values (e.g. in a block).

* *Evaluator* (`eval.ml[i]`, `values.ml`, `arithmetic.ml[i]`, `int.ml`, `float.ml`, `memory.ml[i]`, and a few more). Evaluation of control transfer (`br` and `return`) is implemented using local exceptions as "labels". While these are allocated dynamically in the code and addressed via a stack, that is merely to simplify the code. In reality, these would be static jumps.
* *Evaluator* (`eval.ml[i]`, `values.ml`, `eval_numeric.ml[i]`, `int.ml`, `float.ml`, `memory.ml[i]`, and a few more). Evaluation of control transfer (`br` and `return`) is implemented using local exceptions as "labels". While these are allocated dynamically in the code and addressed via a stack, that is merely to simplify the code. In reality, these would be static jumps.

* *Driver* (`main.ml`, `run.ml[i]`, `script.ml[i]`, `error.ml`, `print.ml[i]`, `flags.ml`). Executes scripts, reports results or errors, etc.

Expand Down
38 changes: 34 additions & 4 deletions ml-proto/given/lib.ml
Original file line number Diff line number Diff line change
Expand Up @@ -9,11 +9,15 @@ struct
let rec make n x =
if n = 0 then [] else x :: make (n - 1) x

let rec table n f = table' 0 n f
and table' i n f =
if i = n then [] else f i :: table' (i + 1) n f

let rec take n xs =
match n, xs with
| 0, _ -> []
| n, x::xs' when n > 0 -> x :: take (n - 1) xs'
| _ -> failwith "drop"
| _ -> failwith "take"

let rec drop n xs =
match n, xs with
Expand All @@ -31,16 +35,42 @@ struct
| x::xs -> let ys, y = split_last xs in x::ys, y
| [] -> failwith "split_last"

let rec index_of x xs =
index_of' x xs 0

let rec index_of x xs = index_of' x xs 0
and index_of' x xs i =
match xs with
| [] -> None
| y::xs' when x = y -> Some i
| y::xs' -> index_of' x xs' (i+1)
end

module List32 =
struct
let rec length xs = length' xs 0l
and length' xs n =
match xs with
| [] -> n
| _::xs' when n < Int32.max_int -> length' xs' (Int32.add n 1l)
| _ -> failwith "length"

let rec nth xs n =
match n, xs with
| 0l, x::_ -> x
| n, _::xs' when n > 0l -> nth xs' (Int32.sub n 1l)
| _ -> failwith "nth"

let rec take n xs =
match n, xs with
| 0l, _ -> []
| n, x::xs' when n > 0l -> x :: take (Int32.sub n 1l) xs'
| _ -> failwith "take"

let rec drop n xs =
match n, xs with
| 0l, _ -> xs
| n, _::xs' when n > 0l -> drop (Int32.sub n 1l) xs'
| _ -> failwith "drop"
end

module Option =
struct
let get o x =
Expand Down
13 changes: 11 additions & 2 deletions ml-proto/given/lib.mli
Original file line number Diff line number Diff line change
Expand Up @@ -8,15 +8,24 @@ end
module List :
sig
val make : int -> 'a -> 'a list
val take : int -> 'a list -> 'a list
val drop : int -> 'a list -> 'a list
val table : int -> (int -> 'a) -> 'a list
val take : int -> 'a list -> 'a list (* raise Failure *)
val drop : int -> 'a list -> 'a list (* raise Failure *)

val last : 'a list -> 'a (* raise Failure *)
val split_last : 'a list -> 'a list * 'a (* raise Failure *)

val index_of : 'a -> 'a list -> int option
end

module List32 :
sig
val length : 'a list -> int32
val nth : 'a list -> int32 -> 'a (* raise Failure *)
val take : int32 -> 'a list -> 'a list (* raise Failure *)
val drop : int32 -> 'a list -> 'a list (* raise Failure *)
end

module Option :
sig
val get : 'a option -> 'a -> 'a
Expand Down
Loading