Skip to content

Commit 2175e6f

Browse files
MaxGraeydcodeIO
authored andcommitted
Optimize some std mem methods by replacing expensive rem operations to fast bit logic (#24)
1 parent 74f8189 commit 2175e6f

File tree

8 files changed

+193
-172
lines changed

8 files changed

+193
-172
lines changed

NOTICE

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -4,6 +4,7 @@ The following authors have all licensed their contributions to AssemblyScript
44
under the licensing terms detailed in LICENSE.
55

66
* Daniel Wirtz <[email protected]>
7+
* Max Graey <[email protected]>
78

89
================================================================================
910

std/assembly/memory.ts

Lines changed: 10 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -5,13 +5,13 @@ function copy_memory(dest: usize, src: usize, n: usize): void {
55
var w: u32, x: u32;
66

77
// copy 1 byte each until src is aligned to 4 bytes
8-
while (n && src % 4) {
8+
while (n && (src & 3)) {
99
store<u8>(dest++, load<u8>(src++));
1010
n--;
1111
}
1212

1313
// if dst is aligned to 4 bytes as well, copy 4 bytes each
14-
if (dest % 4 == 0) {
14+
if ((dest & 3) == 0) {
1515
while (n >= 16) {
1616
store<u32>(dest , load<u32>(src ));
1717
store<u32>(dest + 4, load<u32>(src + 4));
@@ -41,7 +41,7 @@ function copy_memory(dest: usize, src: usize, n: usize): void {
4141
// if dst is not aligned to 4 bytes, use alternating shifts to copy 4 bytes each
4242
// doing shifts if faster when copying enough bytes (here: 32 or more)
4343
if (n >= 32) {
44-
switch (dest % 4) {
44+
switch (dest & 3) {
4545
// known to be != 0
4646
case 1:
4747
w = load<u32>(src);
@@ -152,27 +152,27 @@ export function move_memory(dest: usize, src: usize, n: usize): void {
152152
return;
153153
}
154154
if (dest < src) {
155-
if (src % 8 == dest % 8) {
156-
while (dest % 8) {
155+
if ((src & 7) == (dest & 7)) {
156+
while (dest & 7) {
157157
if (!n)
158158
return;
159159
--n;
160160
store<u8>(dest++, load<u8>(src++));
161161
}
162162
while (n >= 8) {
163163
store<u64>(dest, load<u64>(src));
164-
n -= 8;
164+
n -= 8;
165165
dest += 8;
166-
src += 8;
166+
src += 8;
167167
}
168168
}
169169
while (n) {
170170
store<u8>(dest++, load<u8>(src++));
171171
--n;
172172
}
173173
} else {
174-
if (src % 8 == dest % 8) {
175-
while ((dest + n) % 8) {
174+
if ((src & 7) == (dest & 7)) {
175+
while ((dest + n) & 7) {
176176
if (!n)
177177
return;
178178
store<u8>(dest + --n, load<u8>(src + n));
@@ -217,7 +217,7 @@ export function set_memory(dest: usize, c: u8, n: usize): void {
217217
n -= k;
218218
n &= -4;
219219

220-
var c32: u32 = -1 / 255 * c;
220+
var c32: u32 = (-1 / 255) * c;
221221

222222
// fill head/tail up to 28 bytes each in preparation
223223
store<u32>(dest, c32);

tests/compiler/std/allocator_arena.optimized.wast

Lines changed: 20 additions & 20 deletions
Original file line numberDiff line numberDiff line change
@@ -435,13 +435,13 @@
435435
(local $4 i32)
436436
(loop $continue|0
437437
(if
438-
(if (result i32)
439-
(get_local $2)
440-
(i32.rem_u
438+
(select
439+
(i32.and
441440
(get_local $1)
442-
(i32.const 4)
441+
(i32.const 3)
443442
)
444443
(get_local $2)
444+
(get_local $2)
445445
)
446446
(block
447447
(set_local $0
@@ -480,9 +480,9 @@
480480
)
481481
(if
482482
(i32.eqz
483-
(i32.rem_u
483+
(i32.and
484484
(get_local $0)
485-
(i32.const 4)
485+
(i32.const 3)
486486
)
487487
)
488488
(block
@@ -684,9 +684,9 @@
684684
(block $tablify|0
685685
(br_table $case0|2 $case1|2 $case2|2 $tablify|0
686686
(i32.sub
687-
(i32.rem_u
687+
(i32.and
688688
(get_local $0)
689-
(i32.const 4)
689+
(i32.const 3)
690690
)
691691
(i32.const 1)
692692
)
@@ -2055,21 +2055,21 @@
20552055
(block
20562056
(if
20572057
(i32.eq
2058-
(i32.rem_u
2058+
(i32.and
20592059
(get_local $1)
2060-
(i32.const 8)
2060+
(i32.const 7)
20612061
)
2062-
(i32.rem_u
2062+
(i32.and
20632063
(get_local $0)
2064-
(i32.const 8)
2064+
(i32.const 7)
20652065
)
20662066
)
20672067
(block
20682068
(loop $continue|0
20692069
(if
2070-
(i32.rem_u
2070+
(i32.and
20712071
(get_local $0)
2072-
(i32.const 8)
2072+
(i32.const 7)
20732073
)
20742074
(block
20752075
(if
@@ -2191,24 +2191,24 @@
21912191
(block
21922192
(if
21932193
(i32.eq
2194-
(i32.rem_u
2194+
(i32.and
21952195
(get_local $1)
2196-
(i32.const 8)
2196+
(i32.const 7)
21972197
)
2198-
(i32.rem_u
2198+
(i32.and
21992199
(get_local $0)
2200-
(i32.const 8)
2200+
(i32.const 7)
22012201
)
22022202
)
22032203
(block
22042204
(loop $continue|3
22052205
(if
2206-
(i32.rem_u
2206+
(i32.and
22072207
(i32.add
22082208
(get_local $0)
22092209
(get_local $2)
22102210
)
2211-
(i32.const 8)
2211+
(i32.const 7)
22122212
)
22132213
(block
22142214
(if

tests/compiler/std/allocator_arena.wast

Lines changed: 42 additions & 35 deletions
Original file line numberDiff line numberDiff line change
@@ -348,15 +348,16 @@
348348
;;@ (lib)/memory.ts:220:17
349349
(i32.mul
350350
(i32.div_u
351+
;;@ (lib)/memory.ts:220:18
351352
(i32.sub
352353
(i32.const 0)
353-
;;@ (lib)/memory.ts:220:18
354+
;;@ (lib)/memory.ts:220:19
354355
(i32.const 1)
355356
)
356-
;;@ (lib)/memory.ts:220:22
357+
;;@ (lib)/memory.ts:220:23
357358
(i32.const 255)
358359
)
359-
;;@ (lib)/memory.ts:220:28
360+
;;@ (lib)/memory.ts:220:30
360361
(get_local $1)
361362
)
362363
)
@@ -700,10 +701,11 @@
700701
(i32.const 0)
701702
)
702703
;;@ (lib)/memory.ts:8:14
703-
(i32.rem_u
704+
(i32.and
705+
;;@ (lib)/memory.ts:8:15
704706
(get_local $1)
705-
;;@ (lib)/memory.ts:8:20
706-
(i32.const 4)
707+
;;@ (lib)/memory.ts:8:21
708+
(i32.const 3)
707709
)
708710
(get_local $2)
709711
)
@@ -758,15 +760,16 @@
758760
(if
759761
;;@ (lib)/memory.ts:14:6
760762
(i32.eq
761-
(i32.rem_u
763+
(i32.and
764+
;;@ (lib)/memory.ts:14:7
762765
(get_local $0)
763-
;;@ (lib)/memory.ts:14:13
764-
(i32.const 4)
766+
;;@ (lib)/memory.ts:14:14
767+
(i32.const 3)
765768
)
766-
;;@ (lib)/memory.ts:14:18
769+
;;@ (lib)/memory.ts:14:20
767770
(i32.const 0)
768771
)
769-
;;@ (lib)/memory.ts:14:21
772+
;;@ (lib)/memory.ts:14:23
770773
(block
771774
(block $break|1
772775
(loop $continue|1
@@ -1065,10 +1068,10 @@
10651068
(block $case0|2
10661069
(set_local $5
10671070
;;@ (lib)/memory.ts:44:12
1068-
(i32.rem_u
1071+
(i32.and
10691072
(get_local $0)
10701073
;;@ (lib)/memory.ts:44:19
1071-
(i32.const 4)
1074+
(i32.const 3)
10721075
)
10731076
)
10741077
(br_if $case0|2
@@ -3003,28 +3006,30 @@
30033006
(if
30043007
;;@ (lib)/memory.ts:155:8
30053008
(i32.eq
3006-
(i32.rem_u
3009+
(i32.and
3010+
;;@ (lib)/memory.ts:155:9
30073011
(get_local $1)
3008-
;;@ (lib)/memory.ts:155:14
3009-
(i32.const 8)
3012+
;;@ (lib)/memory.ts:155:15
3013+
(i32.const 7)
30103014
)
3011-
;;@ (lib)/memory.ts:155:19
3012-
(i32.rem_u
3015+
;;@ (lib)/memory.ts:155:21
3016+
(i32.and
3017+
;;@ (lib)/memory.ts:155:22
30133018
(get_local $0)
3014-
;;@ (lib)/memory.ts:155:26
3015-
(i32.const 8)
3019+
;;@ (lib)/memory.ts:155:29
3020+
(i32.const 7)
30163021
)
30173022
)
3018-
;;@ (lib)/memory.ts:155:29
3023+
;;@ (lib)/memory.ts:155:33
30193024
(block
30203025
(block $break|0
30213026
(loop $continue|0
30223027
(if
30233028
;;@ (lib)/memory.ts:156:13
3024-
(i32.rem_u
3029+
(i32.and
30253030
(get_local $0)
30263031
;;@ (lib)/memory.ts:156:20
3027-
(i32.const 8)
3032+
(i32.const 7)
30283033
)
30293034
(block
30303035
(block
@@ -3110,7 +3115,7 @@
31103115
(set_local $2
31113116
(i32.sub
31123117
(get_local $2)
3113-
;;@ (lib)/memory.ts:164:13
3118+
;;@ (lib)/memory.ts:164:16
31143119
(i32.const 8)
31153120
)
31163121
)
@@ -3126,7 +3131,7 @@
31263131
(set_local $1
31273132
(i32.add
31283133
(get_local $1)
3129-
;;@ (lib)/memory.ts:166:15
3134+
;;@ (lib)/memory.ts:166:16
31303135
(i32.const 8)
31313136
)
31323137
)
@@ -3199,33 +3204,35 @@
31993204
(if
32003205
;;@ (lib)/memory.ts:174:8
32013206
(i32.eq
3202-
(i32.rem_u
3207+
(i32.and
3208+
;;@ (lib)/memory.ts:174:9
32033209
(get_local $1)
3204-
;;@ (lib)/memory.ts:174:14
3205-
(i32.const 8)
3210+
;;@ (lib)/memory.ts:174:15
3211+
(i32.const 7)
32063212
)
3207-
;;@ (lib)/memory.ts:174:19
3208-
(i32.rem_u
3213+
;;@ (lib)/memory.ts:174:21
3214+
(i32.and
3215+
;;@ (lib)/memory.ts:174:22
32093216
(get_local $0)
3210-
;;@ (lib)/memory.ts:174:26
3211-
(i32.const 8)
3217+
;;@ (lib)/memory.ts:174:29
3218+
(i32.const 7)
32123219
)
32133220
)
3214-
;;@ (lib)/memory.ts:174:29
3221+
;;@ (lib)/memory.ts:174:33
32153222
(block
32163223
(block $break|3
32173224
(loop $continue|3
32183225
(if
32193226
;;@ (lib)/memory.ts:175:13
3220-
(i32.rem_u
3227+
(i32.and
32213228
(i32.add
32223229
;;@ (lib)/memory.ts:175:14
32233230
(get_local $0)
32243231
;;@ (lib)/memory.ts:175:21
32253232
(get_local $2)
32263233
)
32273234
;;@ (lib)/memory.ts:175:26
3228-
(i32.const 8)
3235+
(i32.const 7)
32293236
)
32303237
(block
32313238
(block

0 commit comments

Comments
 (0)