WebAssembly
diff --git a/‎src/ir/abstract.h
Lines changed: 40 additions & 0 deletions b/‎src/ir/abstract.h
Lines changed: 40 additions & 0 deletions
diff --git a/‎src/passes/OptimizeInstructions.cpp
Lines changed: 101 additions & 18 deletions b/‎src/passes/OptimizeInstructions.cpp
Lines changed: 101 additions & 18 deletions
diff --git a/‎test/binaryen.js/sieve.js.txt
Lines changed: 2 additions & 2 deletions b/‎test/binaryen.js/sieve.js.txt
Lines changed: 2 additions & 2 deletions
diff --git a/‎test/emcc_hello_world.fromasm
Lines changed: 31 additions & 61 deletions b/‎test/emcc_hello_world.fromasm
Lines changed: 31 additions & 61 deletions
@@ -46,6 +46,14 @@ enum Op {
   // Relational
   Eq,
   Ne,
+  LtS,
+  LtU,
+  LeS,
+  LeU,
+  GtS,
+  GtU,
+  GeS,
+  GeU
 };
 
 // Provide a wasm type and an abstract op and get the concrete one. For example,
@@ -126,6 +134,22 @@ inline BinaryOp getBinary(Type type, Op op) {
           return EqInt32;
         case Ne:
           return NeInt32;
+        case LtS:
+          return LtSInt32;
+        case LtU:
+          return LtUInt32;
+        case LeS:
+          return LeSInt32;
+        case LeU:
+          return LeUInt32;
+        case GtS:
+          return GtSInt32;
+        case GtU:
+          return GtUInt32;
+        case GeS:
+          return GeSInt32;
+        case GeU:
+          return GeUInt32;
         default:
           return InvalidBinary;
       }
@@ -163,6 +187,22 @@ inline BinaryOp getBinary(Type type, Op op) {
           return EqInt64;
         case Ne:
           return NeInt64;
+        case LtS:
+          return LtSInt64;
+        case LtU:
+          return LtUInt64;
+        case LeS:
+          return LeSInt64;
+        case LeU:
+          return LeUInt64;
+        case GtS:
+          return GtSInt64;
+        case GtU:
+          return GtUInt64;
+        case GeS:
+          return GeSInt64;
+        case GeU:
+          return GeUInt64;
         default:
           return InvalidBinary;
       }
 
@@ -19,6 +19,7 @@
 //
 
 #include <algorithm>
+#include <type_traits>
 
 #include <ir/abstract.h>
 #include <ir/cost.h>
@@ -578,10 +579,30 @@ struct OptimizeInstructions
         if (right->type == Type::i32) {
           uint32_t c = right->value.geti32();
           if (IsPowerOf2(c)) {
-            if (binary->op == MulInt32) {
-              return optimizePowerOf2Mul(binary, c);
-            } else if (binary->op == RemUInt32) {
-              return optimizePowerOf2URem(binary, c);
+            switch (binary->op) {
+              case MulInt32:
+                return optimizePowerOf2Mul(binary, c);
+              case RemUInt32:
+                return optimizePowerOf2URem(binary, c);
+              case DivUInt32:
+                return optimizePowerOf2UDiv(binary, c);
+              default:
+                break;
+            }
+          }
+        }
+        if (right->type == Type::i64) {
+          uint64_t c = right->value.geti64();
+          if (IsPowerOf2(c)) {
+            switch (binary->op) {
+              case MulInt64:
+                return optimizePowerOf2Mul(binary, c);
+              case RemUInt64:
+                return optimizePowerOf2URem(binary, c);
+              case DivUInt64:
+                return optimizePowerOf2UDiv(binary, c);
+              default:
+                break;
             }
           }
         }
@@ -1265,22 +1286,37 @@ struct OptimizeInstructions
   // but it's still worth doing since
   //  * Often shifts are more common than muls.
   //  * The constant is smaller.
-  Expression* optimizePowerOf2Mul(Binary* binary, uint32_t c) {
-    uint32_t shifts = CountTrailingZeroes(c);
-    binary->op = ShlInt32;
-    binary->right->cast<Const>()->value = Literal(int32_t(shifts));
+  template<typename T> Expression* optimizePowerOf2Mul(Binary* binary, T c) {
+    static_assert(std::is_same<T, uint32_t>::value ||
+                    std::is_same<T, uint64_t>::value,
+                  "type mismatch");
+    auto shifts = CountTrailingZeroes<T>(c);
+    binary->op = std::is_same<T, uint32_t>::value ? ShlInt32 : ShlInt64;
+    binary->right->cast<Const>()->value = Literal(static_cast<T>(shifts));
     return binary;
   }
 
-  // Optimize an unsigned divide by a power of two on the right,
-  // which can be an AND mask
+  // Optimize an unsigned divide / remainder by a power of two on the right
   // This doesn't shrink code size, and VMs likely optimize it anyhow,
   // but it's still worth doing since
   //  * Usually ands are more common than urems.
   //  * The constant is slightly smaller.
-  Expression* optimizePowerOf2URem(Binary* binary, uint32_t c) {
-    binary->op = AndInt32;
-    binary->right->cast<Const>()->value = Literal(int32_t(c - 1));
+  template<typename T> Expression* optimizePowerOf2UDiv(Binary* binary, T c) {
+    static_assert(std::is_same<T, uint32_t>::value ||
+                    std::is_same<T, uint64_t>::value,
+                  "type mismatch");
+    auto shifts = CountTrailingZeroes<T>(c);
+    binary->op = std::is_same<T, uint32_t>::value ? ShrUInt32 : ShrUInt64;
+    binary->right->cast<Const>()->value = Literal(static_cast<T>(shifts));
+    return binary;
+  }
+
+  template<typename T> Expression* optimizePowerOf2URem(Binary* binary, T c) {
+    static_assert(std::is_same<T, uint32_t>::value ||
+                    std::is_same<T, uint64_t>::value,
+                  "type mismatch");
+    binary->op = std::is_same<T, uint32_t>::value ? AndInt32 : AndInt64;
+    binary->right->cast<Const>()->value = Literal(c - 1);
     return binary;
   }
 
@@ -1327,8 +1363,9 @@ struct OptimizeInstructions
     auto type = binary->right->type;
     auto* right = binary->right->cast<Const>();
     if (type.isInteger()) {
+      auto constRight = right->value.getInteger();
       // operations on zero
-      if (right->value == Literal::makeFromInt32(0, type)) {
+      if (constRight == 0LL) {
         if (binary->op == Abstract::getBinary(type, Abstract::Shl) ||
             binary->op == Abstract::getBinary(type, Abstract::ShrU) ||
             binary->op == Abstract::getBinary(type, Abstract::ShrS) ||
@@ -1344,16 +1381,62 @@ struct OptimizeInstructions
           return Builder(*getModule()).makeUnary(EqZInt64, binary->left);
         }
       }
+      // operations on one
+      if (constRight == 1LL) {
+        // (signed)x % 1   ==>   0
+        if (binary->op == Abstract::getBinary(type, Abstract::RemS) &&
+            !EffectAnalyzer(getPassOptions(), features, binary->left)
+               .hasSideEffects()) {
+          right->value = Literal::makeSingleZero(type);
+          return right;
+        }
+      }
       // operations on all 1s
-      // TODO: shortcut method to create an all-ones?
-      if (right->value == Literal(int32_t(-1)) ||
-          right->value == Literal(int64_t(-1))) {
+      if (constRight == -1LL) {
         if (binary->op == Abstract::getBinary(type, Abstract::And)) {
+          // x & -1   ==>   x
           return binary->left;
         } else if (binary->op == Abstract::getBinary(type, Abstract::Or) &&
                    !EffectAnalyzer(getPassOptions(), features, binary->left)
                       .hasSideEffects()) {
+          // x | -1   ==>   -1
           return binary->right;
+        } else if (binary->op == Abstract::getBinary(type, Abstract::RemS) &&
+                   !EffectAnalyzer(getPassOptions(), features, binary->left)
+                      .hasSideEffects()) {
+          // (signed)x % -1     ==>   0
+          right->value = Literal::makeSingleZero(type);
+          return right;
+        } else if (binary->op == Abstract::getBinary(type, Abstract::GtU) &&
+                   !EffectAnalyzer(getPassOptions(), features, binary->left)
+                      .hasSideEffects()) {
+          // (unsigned)x > -1   ==>   0
+          right->value = Literal::makeSingleZero(Type::i32);
+          right->type = Type::i32;
+          return right;
+        } else if (binary->op == Abstract::getBinary(type, Abstract::LtU)) {
+          // (unsigned)x < -1   ==>   x != -1
+          // friendlier to JS emitting as we don't need to write an unsigned
+          // -1 value which is large.
+          binary->op = Abstract::getBinary(type, Abstract::Ne);
+          return binary;
+        } else if (binary->op == DivUInt32) {
+          // (unsigned)x / -1   ==>   x == -1
+          binary->op = Abstract::getBinary(type, Abstract::Eq);
+          return binary;
+        } else if (binary->op == Abstract::getBinary(type, Abstract::Mul)) {
+          // x * -1   ==>   0 - x
+          binary->op = Abstract::getBinary(type, Abstract::Sub);
+          right->value = Literal::makeSingleZero(type);
+          std::swap(binary->left, binary->right);
+          return binary;
+        } else if (binary->op == Abstract::getBinary(type, Abstract::LeU) &&
+                   !EffectAnalyzer(getPassOptions(), features, binary->left)
+                      .hasSideEffects()) {
+          // (unsigned)x <= -1   ==>   1
+          right->value = Literal::makeFromInt32(1, Type::i32);
+          right->type = Type::i32;
+          return right;
         }
       }
       // wasm binary encoding uses signed LEBs, which slightly favor negative
@@ -1364,7 +1447,7 @@ struct OptimizeInstructions
       // subtractions than the more common additions).
       if (binary->op == Abstract::getBinary(type, Abstract::Add) ||
           binary->op == Abstract::getBinary(type, Abstract::Sub)) {
-        auto value = right->value.getInteger();
+        auto value = constRight;
         if (value == 0x40 || value == 0x2000 || value == 0x100000 ||
             value == 0x8000000 || value == 0x400000000LL ||
             value == 0x20000000000LL || value == 0x1000000000000LL ||
 
@@ -73,12 +73,12 @@ optimized:
    (drop
     (memory.grow
      (i32.sub
-      (i32.div_u
+      (i32.shr_u
        (i32.add
         (local.get $0)
         (i32.const 65535)
        )
-       (i32.const 65536)
+       (i32.const 16)
       )
       (memory.size)
      )
 
@@ -7118,78 +7118,48 @@
  )
  (func $_fmt_u (; has Stack IR ;) (param $0 i32) (param $1 i32) (param $2 i32) (result i32)
   (local $3 i32)
-  (local $4 i32)
   (if
-   (i32.or
-    (i32.and
-     (i32.eqz
-      (local.get $1)
-     )
-     (i32.gt_u
-      (local.get $0)
-      (i32.const -1)
-     )
-    )
-    (i32.gt_u
-     (local.get $1)
-     (i32.const 0)
-    )
+   (i32.gt_u
+    (local.get $1)
+    (i32.const 0)
    )
-   (local.set $0
-    (loop $while-in (result i32)
-     (i32.store8
-      (local.tee $2
-       (i32.add
-        (local.get $2)
-        (i32.const -1)
-       )
-      )
-      (i32.or
-       (call $___uremdi3
-        (local.get $0)
-        (local.get $1)
-        (i32.const 10)
-       )
-       (i32.const 48)
+   (loop $while-in
+    (i32.store8
+     (local.tee $2
+      (i32.add
+       (local.get $2)
+       (i32.const -1)
       )
      )
-     (local.set $3
-      (call $___udivdi3
+     (i32.or
+      (call $___uremdi3
        (local.get $0)
        (local.get $1)
        (i32.const 10)
       )
+      (i32.const 48)
      )
-     (local.set $4
-      (global.get $tempRet0)
+    )
+    (local.set $0
+     (call $___udivdi3
+      (local.get $0)
+      (local.get $1)
+      (i32.const 10)
      )
-     (if (result i32)
-      (i32.or
-       (i32.and
-        (i32.eq
-         (local.get $1)
-         (i32.const 9)
-        )
-        (i32.gt_u
-         (local.get $0)
-         (i32.const -1)
-        )
-       )
-       (i32.gt_u
-        (local.get $1)
-        (i32.const 9)
-       )
-      )
-      (block
-       (local.set $0
-        (local.get $3)
-       )
-       (local.set $1
-        (local.get $4)
-       )
-       (br $while-in)
+    )
+    (local.set $3
+     (global.get $tempRet0)
+    )
+    (if
+     (i32.gt_u
+      (local.get $1)
+      (i32.const 9)
+     )
+     (block
+      (local.set $1
+       (local.get $3)
       )
-      (local.get $3)
+      (br $while-in)
      )
     )
    )
Original file line number	Diff line number	Diff line change
`@@ -73,12 +73,12 @@ optimized:`
`73`	`73`	`(drop`
`74`	`74`	`(memory.grow`
`75`	`75`	`(i32.sub`
`76`		`- (i32.div_u`
	`76`	`+ (i32.shr_u`
`77`	`77`	`(i32.add`
`78`	`78`	`(local.get $0)`
`79`	`79`	`(i32.const 65535)`
`80`	`80`	`)`
`81`		`- (i32.const 65536)`
	`81`	`+ (i32.const 16)`
`82`	`82`	`)`
`83`	`83`	`(memory.size)`
`84`	`84`	`)`