Skip to content

Commit a3acdae

Browse files
authored
More optimizations for pow of two and pos/neg one const on the right (#2870)
1 parent 721f158 commit a3acdae

11 files changed

+749
-318
lines changed

src/ir/abstract.h

Lines changed: 40 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -46,6 +46,14 @@ enum Op {
4646
// Relational
4747
Eq,
4848
Ne,
49+
LtS,
50+
LtU,
51+
LeS,
52+
LeU,
53+
GtS,
54+
GtU,
55+
GeS,
56+
GeU
4957
};
5058

5159
// Provide a wasm type and an abstract op and get the concrete one. For example,
@@ -126,6 +134,22 @@ inline BinaryOp getBinary(Type type, Op op) {
126134
return EqInt32;
127135
case Ne:
128136
return NeInt32;
137+
case LtS:
138+
return LtSInt32;
139+
case LtU:
140+
return LtUInt32;
141+
case LeS:
142+
return LeSInt32;
143+
case LeU:
144+
return LeUInt32;
145+
case GtS:
146+
return GtSInt32;
147+
case GtU:
148+
return GtUInt32;
149+
case GeS:
150+
return GeSInt32;
151+
case GeU:
152+
return GeUInt32;
129153
default:
130154
return InvalidBinary;
131155
}
@@ -163,6 +187,22 @@ inline BinaryOp getBinary(Type type, Op op) {
163187
return EqInt64;
164188
case Ne:
165189
return NeInt64;
190+
case LtS:
191+
return LtSInt64;
192+
case LtU:
193+
return LtUInt64;
194+
case LeS:
195+
return LeSInt64;
196+
case LeU:
197+
return LeUInt64;
198+
case GtS:
199+
return GtSInt64;
200+
case GtU:
201+
return GtUInt64;
202+
case GeS:
203+
return GeSInt64;
204+
case GeU:
205+
return GeUInt64;
166206
default:
167207
return InvalidBinary;
168208
}

src/passes/OptimizeInstructions.cpp

Lines changed: 101 additions & 18 deletions
Original file line numberDiff line numberDiff line change
@@ -19,6 +19,7 @@
1919
//
2020

2121
#include <algorithm>
22+
#include <type_traits>
2223

2324
#include <ir/abstract.h>
2425
#include <ir/cost.h>
@@ -578,10 +579,30 @@ struct OptimizeInstructions
578579
if (right->type == Type::i32) {
579580
uint32_t c = right->value.geti32();
580581
if (IsPowerOf2(c)) {
581-
if (binary->op == MulInt32) {
582-
return optimizePowerOf2Mul(binary, c);
583-
} else if (binary->op == RemUInt32) {
584-
return optimizePowerOf2URem(binary, c);
582+
switch (binary->op) {
583+
case MulInt32:
584+
return optimizePowerOf2Mul(binary, c);
585+
case RemUInt32:
586+
return optimizePowerOf2URem(binary, c);
587+
case DivUInt32:
588+
return optimizePowerOf2UDiv(binary, c);
589+
default:
590+
break;
591+
}
592+
}
593+
}
594+
if (right->type == Type::i64) {
595+
uint64_t c = right->value.geti64();
596+
if (IsPowerOf2(c)) {
597+
switch (binary->op) {
598+
case MulInt64:
599+
return optimizePowerOf2Mul(binary, c);
600+
case RemUInt64:
601+
return optimizePowerOf2URem(binary, c);
602+
case DivUInt64:
603+
return optimizePowerOf2UDiv(binary, c);
604+
default:
605+
break;
585606
}
586607
}
587608
}
@@ -1265,22 +1286,37 @@ struct OptimizeInstructions
12651286
// but it's still worth doing since
12661287
// * Often shifts are more common than muls.
12671288
// * The constant is smaller.
1268-
Expression* optimizePowerOf2Mul(Binary* binary, uint32_t c) {
1269-
uint32_t shifts = CountTrailingZeroes(c);
1270-
binary->op = ShlInt32;
1271-
binary->right->cast<Const>()->value = Literal(int32_t(shifts));
1289+
template<typename T> Expression* optimizePowerOf2Mul(Binary* binary, T c) {
1290+
static_assert(std::is_same<T, uint32_t>::value ||
1291+
std::is_same<T, uint64_t>::value,
1292+
"type mismatch");
1293+
auto shifts = CountTrailingZeroes<T>(c);
1294+
binary->op = std::is_same<T, uint32_t>::value ? ShlInt32 : ShlInt64;
1295+
binary->right->cast<Const>()->value = Literal(static_cast<T>(shifts));
12721296
return binary;
12731297
}
12741298

1275-
// Optimize an unsigned divide by a power of two on the right,
1276-
// which can be an AND mask
1299+
// Optimize an unsigned divide / remainder by a power of two on the right
12771300
// This doesn't shrink code size, and VMs likely optimize it anyhow,
12781301
// but it's still worth doing since
12791302
// * Usually ands are more common than urems.
12801303
// * The constant is slightly smaller.
1281-
Expression* optimizePowerOf2URem(Binary* binary, uint32_t c) {
1282-
binary->op = AndInt32;
1283-
binary->right->cast<Const>()->value = Literal(int32_t(c - 1));
1304+
template<typename T> Expression* optimizePowerOf2UDiv(Binary* binary, T c) {
1305+
static_assert(std::is_same<T, uint32_t>::value ||
1306+
std::is_same<T, uint64_t>::value,
1307+
"type mismatch");
1308+
auto shifts = CountTrailingZeroes<T>(c);
1309+
binary->op = std::is_same<T, uint32_t>::value ? ShrUInt32 : ShrUInt64;
1310+
binary->right->cast<Const>()->value = Literal(static_cast<T>(shifts));
1311+
return binary;
1312+
}
1313+
1314+
template<typename T> Expression* optimizePowerOf2URem(Binary* binary, T c) {
1315+
static_assert(std::is_same<T, uint32_t>::value ||
1316+
std::is_same<T, uint64_t>::value,
1317+
"type mismatch");
1318+
binary->op = std::is_same<T, uint32_t>::value ? AndInt32 : AndInt64;
1319+
binary->right->cast<Const>()->value = Literal(c - 1);
12841320
return binary;
12851321
}
12861322

@@ -1327,8 +1363,9 @@ struct OptimizeInstructions
13271363
auto type = binary->right->type;
13281364
auto* right = binary->right->cast<Const>();
13291365
if (type.isInteger()) {
1366+
auto constRight = right->value.getInteger();
13301367
// operations on zero
1331-
if (right->value == Literal::makeFromInt32(0, type)) {
1368+
if (constRight == 0LL) {
13321369
if (binary->op == Abstract::getBinary(type, Abstract::Shl) ||
13331370
binary->op == Abstract::getBinary(type, Abstract::ShrU) ||
13341371
binary->op == Abstract::getBinary(type, Abstract::ShrS) ||
@@ -1344,16 +1381,62 @@ struct OptimizeInstructions
13441381
return Builder(*getModule()).makeUnary(EqZInt64, binary->left);
13451382
}
13461383
}
1384+
// operations on one
1385+
if (constRight == 1LL) {
1386+
// (signed)x % 1 ==> 0
1387+
if (binary->op == Abstract::getBinary(type, Abstract::RemS) &&
1388+
!EffectAnalyzer(getPassOptions(), features, binary->left)
1389+
.hasSideEffects()) {
1390+
right->value = Literal::makeSingleZero(type);
1391+
return right;
1392+
}
1393+
}
13471394
// operations on all 1s
1348-
// TODO: shortcut method to create an all-ones?
1349-
if (right->value == Literal(int32_t(-1)) ||
1350-
right->value == Literal(int64_t(-1))) {
1395+
if (constRight == -1LL) {
13511396
if (binary->op == Abstract::getBinary(type, Abstract::And)) {
1397+
// x & -1 ==> x
13521398
return binary->left;
13531399
} else if (binary->op == Abstract::getBinary(type, Abstract::Or) &&
13541400
!EffectAnalyzer(getPassOptions(), features, binary->left)
13551401
.hasSideEffects()) {
1402+
// x | -1 ==> -1
13561403
return binary->right;
1404+
} else if (binary->op == Abstract::getBinary(type, Abstract::RemS) &&
1405+
!EffectAnalyzer(getPassOptions(), features, binary->left)
1406+
.hasSideEffects()) {
1407+
// (signed)x % -1 ==> 0
1408+
right->value = Literal::makeSingleZero(type);
1409+
return right;
1410+
} else if (binary->op == Abstract::getBinary(type, Abstract::GtU) &&
1411+
!EffectAnalyzer(getPassOptions(), features, binary->left)
1412+
.hasSideEffects()) {
1413+
// (unsigned)x > -1 ==> 0
1414+
right->value = Literal::makeSingleZero(Type::i32);
1415+
right->type = Type::i32;
1416+
return right;
1417+
} else if (binary->op == Abstract::getBinary(type, Abstract::LtU)) {
1418+
// (unsigned)x < -1 ==> x != -1
1419+
// friendlier to JS emitting as we don't need to write an unsigned
1420+
// -1 value which is large.
1421+
binary->op = Abstract::getBinary(type, Abstract::Ne);
1422+
return binary;
1423+
} else if (binary->op == DivUInt32) {
1424+
// (unsigned)x / -1 ==> x == -1
1425+
binary->op = Abstract::getBinary(type, Abstract::Eq);
1426+
return binary;
1427+
} else if (binary->op == Abstract::getBinary(type, Abstract::Mul)) {
1428+
// x * -1 ==> 0 - x
1429+
binary->op = Abstract::getBinary(type, Abstract::Sub);
1430+
right->value = Literal::makeSingleZero(type);
1431+
std::swap(binary->left, binary->right);
1432+
return binary;
1433+
} else if (binary->op == Abstract::getBinary(type, Abstract::LeU) &&
1434+
!EffectAnalyzer(getPassOptions(), features, binary->left)
1435+
.hasSideEffects()) {
1436+
// (unsigned)x <= -1 ==> 1
1437+
right->value = Literal::makeFromInt32(1, Type::i32);
1438+
right->type = Type::i32;
1439+
return right;
13571440
}
13581441
}
13591442
// wasm binary encoding uses signed LEBs, which slightly favor negative
@@ -1364,7 +1447,7 @@ struct OptimizeInstructions
13641447
// subtractions than the more common additions).
13651448
if (binary->op == Abstract::getBinary(type, Abstract::Add) ||
13661449
binary->op == Abstract::getBinary(type, Abstract::Sub)) {
1367-
auto value = right->value.getInteger();
1450+
auto value = constRight;
13681451
if (value == 0x40 || value == 0x2000 || value == 0x100000 ||
13691452
value == 0x8000000 || value == 0x400000000LL ||
13701453
value == 0x20000000000LL || value == 0x1000000000000LL ||

test/binaryen.js/sieve.js.txt

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -73,12 +73,12 @@ optimized:
7373
(drop
7474
(memory.grow
7575
(i32.sub
76-
(i32.div_u
76+
(i32.shr_u
7777
(i32.add
7878
(local.get $0)
7979
(i32.const 65535)
8080
)
81-
(i32.const 65536)
81+
(i32.const 16)
8282
)
8383
(memory.size)
8484
)

test/emcc_hello_world.fromasm

Lines changed: 31 additions & 61 deletions
Original file line numberDiff line numberDiff line change
@@ -7118,78 +7118,48 @@
71187118
)
71197119
(func $_fmt_u (; has Stack IR ;) (param $0 i32) (param $1 i32) (param $2 i32) (result i32)
71207120
(local $3 i32)
7121-
(local $4 i32)
71227121
(if
7123-
(i32.or
7124-
(i32.and
7125-
(i32.eqz
7126-
(local.get $1)
7127-
)
7128-
(i32.gt_u
7129-
(local.get $0)
7130-
(i32.const -1)
7131-
)
7132-
)
7133-
(i32.gt_u
7134-
(local.get $1)
7135-
(i32.const 0)
7136-
)
7122+
(i32.gt_u
7123+
(local.get $1)
7124+
(i32.const 0)
71377125
)
7138-
(local.set $0
7139-
(loop $while-in (result i32)
7140-
(i32.store8
7141-
(local.tee $2
7142-
(i32.add
7143-
(local.get $2)
7144-
(i32.const -1)
7145-
)
7146-
)
7147-
(i32.or
7148-
(call $___uremdi3
7149-
(local.get $0)
7150-
(local.get $1)
7151-
(i32.const 10)
7152-
)
7153-
(i32.const 48)
7126+
(loop $while-in
7127+
(i32.store8
7128+
(local.tee $2
7129+
(i32.add
7130+
(local.get $2)
7131+
(i32.const -1)
71547132
)
71557133
)
7156-
(local.set $3
7157-
(call $___udivdi3
7134+
(i32.or
7135+
(call $___uremdi3
71587136
(local.get $0)
71597137
(local.get $1)
71607138
(i32.const 10)
71617139
)
7140+
(i32.const 48)
71627141
)
7163-
(local.set $4
7164-
(global.get $tempRet0)
7142+
)
7143+
(local.set $0
7144+
(call $___udivdi3
7145+
(local.get $0)
7146+
(local.get $1)
7147+
(i32.const 10)
71657148
)
7166-
(if (result i32)
7167-
(i32.or
7168-
(i32.and
7169-
(i32.eq
7170-
(local.get $1)
7171-
(i32.const 9)
7172-
)
7173-
(i32.gt_u
7174-
(local.get $0)
7175-
(i32.const -1)
7176-
)
7177-
)
7178-
(i32.gt_u
7179-
(local.get $1)
7180-
(i32.const 9)
7181-
)
7182-
)
7183-
(block
7184-
(local.set $0
7185-
(local.get $3)
7186-
)
7187-
(local.set $1
7188-
(local.get $4)
7189-
)
7190-
(br $while-in)
7149+
)
7150+
(local.set $3
7151+
(global.get $tempRet0)
7152+
)
7153+
(if
7154+
(i32.gt_u
7155+
(local.get $1)
7156+
(i32.const 9)
7157+
)
7158+
(block
7159+
(local.set $1
7160+
(local.get $3)
71917161
)
7192-
(local.get $3)
7162+
(br $while-in)
71937163
)
71947164
)
71957165
)

0 commit comments

Comments
 (0)