Skip to content

Commit 72dc26e

Browse files
MaxGraeydcodeIO
authored andcommitted
Optimize builtin abs for integers (#420)
1 parent 2aa5c43 commit 72dc26e

File tree

3 files changed

+866
-854
lines changed

3 files changed

+866
-854
lines changed

src/builtins.ts

Lines changed: 61 additions & 36 deletions
Original file line numberDiff line numberDiff line change
@@ -640,55 +640,80 @@ export function compileCall(
640640
case TypeKind.I8:
641641
case TypeKind.I16:
642642
case TypeKind.I32: {
643+
let currentFunction = compiler.currentFunction;
644+
643645
// possibly overflows, e.g. abs<i8>(-128) == 128
644-
let tempLocal = compiler.currentFunction.getAndFreeTempLocal(Type.i32, false);
645-
ret = module.createSelect( // x > 0 ? x : 0-x
646-
module.createTeeLocal(tempLocal.index, arg0),
647-
module.createBinary(BinaryOp.SubI32, // ifFalse
648-
module.createI32(0),
649-
module.createGetLocal(tempLocal.index, NativeType.I32)
646+
let tempLocal1 = currentFunction.getTempLocal(Type.i32, false);
647+
let tempLocalIndex2 = currentFunction.getAndFreeTempLocal(Type.i32, false).index;
648+
let tempLocalIndex1 = tempLocal1.index;
649+
650+
// (x + (x >> 31)) ^ (x >> 31)
651+
ret = module.createBinary(BinaryOp.XorI32,
652+
module.createBinary(BinaryOp.AddI32,
653+
module.createTeeLocal(
654+
tempLocalIndex2,
655+
module.createBinary(BinaryOp.ShrI32,
656+
module.createTeeLocal(tempLocalIndex1, arg0),
657+
module.createI32(31)
658+
)
659+
),
660+
module.createGetLocal(tempLocalIndex1, NativeType.I32)
650661
),
651-
module.createBinary(BinaryOp.GtI32,
652-
module.createGetLocal(tempLocal.index, NativeType.I32),
653-
module.createI32(0)
654-
)
662+
module.createGetLocal(tempLocalIndex2, NativeType.I32)
655663
);
664+
665+
currentFunction.freeTempLocal(tempLocal1);
656666
break;
657667
}
658668
case TypeKind.ISIZE: {
659-
let tempLocal = compiler.currentFunction.getAndFreeTempLocal(compiler.options.usizeType, false);
660-
ret = module.createSelect(
661-
module.createTeeLocal(tempLocal.index, arg0),
662-
module.createBinary(
663-
compiler.options.isWasm64
664-
? BinaryOp.SubI64
665-
: BinaryOp.SubI32,
666-
compiler.options.usizeType.toNativeZero(module),
667-
module.createGetLocal(tempLocal.index, compiler.options.nativeSizeType)
669+
let options = compiler.options;
670+
let currentFunction = compiler.currentFunction;
671+
let wasm64 = options.isWasm64;
672+
673+
let tempLocal1 = currentFunction.getTempLocal(options.usizeType, false);
674+
let tempLocalIndex2 = currentFunction.getAndFreeTempLocal(options.usizeType, false).index;
675+
let tempLocalIndex1 = tempLocal1.index;
676+
677+
ret = module.createBinary(wasm64 ? BinaryOp.XorI64 : BinaryOp.XorI32,
678+
module.createBinary(wasm64 ? BinaryOp.AddI64 : BinaryOp.AddI32,
679+
module.createTeeLocal(
680+
tempLocalIndex2,
681+
module.createBinary(wasm64 ? BinaryOp.ShrI64 : BinaryOp.ShrI32,
682+
module.createTeeLocal(tempLocalIndex1, arg0),
683+
wasm64 ? module.createI64(63) : module.createI32(31)
684+
)
685+
),
686+
module.createGetLocal(tempLocalIndex1, options.nativeSizeType)
668687
),
669-
module.createBinary(
670-
compiler.options.isWasm64
671-
? BinaryOp.GtI64
672-
: BinaryOp.GtI32,
673-
module.createGetLocal(tempLocal.index, compiler.options.nativeSizeType),
674-
compiler.options.usizeType.toNativeZero(module)
675-
)
688+
module.createGetLocal(tempLocalIndex2, options.nativeSizeType)
676689
);
690+
691+
currentFunction.freeTempLocal(tempLocal1);
677692
break;
678693
}
679694
case TypeKind.I64: {
680-
let tempLocal = compiler.currentFunction.getAndFreeTempLocal(Type.i64, false);
681-
ret = module.createSelect(
682-
module.createTeeLocal(tempLocal.index, arg0),
683-
module.createBinary(BinaryOp.SubI64,
684-
module.createI64(0, 0),
685-
module.createGetLocal(tempLocal.index, NativeType.I64),
695+
let currentFunction = compiler.currentFunction;
696+
697+
let tempLocal1 = currentFunction.getTempLocal(Type.i64, false);
698+
let tempLocalIndex2 = currentFunction.getAndFreeTempLocal(Type.i64, false).index;
699+
let tempLocalIndex1 = tempLocal1.index;
700+
701+
// (x + (x >> 63)) ^ (x >> 63)
702+
ret = module.createBinary(BinaryOp.XorI64,
703+
module.createBinary(BinaryOp.AddI64,
704+
module.createTeeLocal(
705+
tempLocalIndex2,
706+
module.createBinary(BinaryOp.ShrI64,
707+
module.createTeeLocal(tempLocalIndex1, arg0),
708+
module.createI64(63)
709+
)
710+
),
711+
module.createGetLocal(tempLocalIndex1, NativeType.I64)
686712
),
687-
module.createBinary(BinaryOp.GtI64,
688-
module.createGetLocal(tempLocal.index, NativeType.I64),
689-
module.createI64(0, 0)
690-
)
713+
module.createGetLocal(tempLocalIndex2, NativeType.I64)
691714
);
715+
716+
currentFunction.freeTempLocal(tempLocal1);
692717
break;
693718
}
694719
case TypeKind.USIZE: {

tests/compiler/builtins.untouched.wat

Lines changed: 24 additions & 24 deletions
Original file line numberDiff line numberDiff line change
@@ -318,13 +318,13 @@
318318
drop
319319
i32.const -42
320320
tee_local $0
321-
i32.const 0
322-
get_local $0
323-
i32.sub
321+
i32.const 31
322+
i32.shr_s
323+
tee_local $1
324324
get_local $0
325-
i32.const 0
326-
i32.gt_s
327-
select
325+
i32.add
326+
get_local $1
327+
i32.xor
328328
drop
329329
i32.const 1
330330
tee_local $0
@@ -363,13 +363,13 @@
363363
set_global $builtins/i
364364
i32.const -42
365365
tee_local $0
366-
i32.const 0
367-
get_local $0
368-
i32.sub
366+
i32.const 31
367+
i32.shr_s
368+
tee_local $1
369369
get_local $0
370-
i32.const 0
371-
i32.gt_s
372-
select
370+
i32.add
371+
get_local $1
372+
i32.xor
373373
set_global $builtins/i
374374
get_global $builtins/i
375375
i32.const 42
@@ -444,13 +444,13 @@
444444
drop
445445
i64.const -42
446446
tee_local $2
447-
i64.const 0
448-
get_local $2
449-
i64.sub
447+
i64.const 63
448+
i64.shr_s
449+
tee_local $3
450450
get_local $2
451-
i64.const 0
452-
i64.gt_s
453-
select
451+
i64.add
452+
get_local $3
453+
i64.xor
454454
drop
455455
i64.const 1
456456
i64.clz
@@ -471,13 +471,13 @@
471471
set_global $builtins/I
472472
i64.const -42
473473
tee_local $2
474-
i64.const 0
475-
get_local $2
476-
i64.sub
474+
i64.const 63
475+
i64.shr_s
476+
tee_local $3
477477
get_local $2
478-
i64.const 0
479-
i64.gt_s
480-
select
478+
i64.add
479+
get_local $3
480+
i64.xor
481481
set_global $builtins/I
482482
get_global $builtins/I
483483
i64.const 42

0 commit comments

Comments
 (0)