@@ -282,21 +282,21 @@ define i128 @v_sdiv_i128_vv(i128 %lhs, i128 %rhs) {
282
282
; GFX9-O0-NEXT: ; kill: def $vgpr15 killed $vgpr15 def $vgpr15_vgpr16 killed $exec
283
283
; GFX9-O0-NEXT: v_mov_b32_e32 v16, v1
284
284
; GFX9-O0-NEXT: v_mov_b32_e32 v9, v15
285
- ; GFX9-O0-NEXT: v_mov_b32_e32 v1 , v16
285
+ ; GFX9-O0-NEXT: v_mov_b32_e32 v5 , v16
286
286
; GFX9-O0-NEXT: v_mov_b32_e32 v10, v13
287
- ; GFX9-O0-NEXT: v_mov_b32_e32 v5 , v14
287
+ ; GFX9-O0-NEXT: v_mov_b32_e32 v1 , v14
288
288
; GFX9-O0-NEXT: v_sub_co_u32_e32 v9, vcc, v9, v4
289
- ; GFX9-O0-NEXT: v_subb_co_u32_e32 v1, vcc, v1, v6, vcc
290
- ; GFX9-O0-NEXT: v_subb_co_u32_e32 v13, vcc, v10, v4, vcc
291
289
; GFX9-O0-NEXT: v_subb_co_u32_e32 v5, vcc, v5, v6, vcc
290
+ ; GFX9-O0-NEXT: v_subb_co_u32_e32 v13, vcc, v10, v4, vcc
291
+ ; GFX9-O0-NEXT: v_subb_co_u32_e32 v1, vcc, v1, v6, vcc
292
292
; GFX9-O0-NEXT: ; implicit-def: $sgpr4
293
293
; GFX9-O0-NEXT: ; implicit-def: $sgpr4
294
- ; GFX9-O0-NEXT: ; kill: def $vgpr13 killed $vgpr13 def $vgpr13_vgpr14 killed $exec
295
- ; GFX9-O0-NEXT: v_mov_b32_e32 v14 , v5
294
+ ; GFX9-O0-NEXT: ; kill: def $vgpr9 killed $vgpr9 def $vgpr9_vgpr10 killed $exec
295
+ ; GFX9-O0-NEXT: v_mov_b32_e32 v10 , v5
296
296
; GFX9-O0-NEXT: ; implicit-def: $sgpr4
297
297
; GFX9-O0-NEXT: ; implicit-def: $sgpr4
298
- ; GFX9-O0-NEXT: ; kill: def $vgpr9 killed $vgpr9 def $vgpr9_vgpr10 killed $exec
299
- ; GFX9-O0-NEXT: v_mov_b32_e32 v10 , v1
298
+ ; GFX9-O0-NEXT: ; kill: def $vgpr13 killed $vgpr13 def $vgpr13_vgpr14 killed $exec
299
+ ; GFX9-O0-NEXT: v_mov_b32_e32 v14 , v1
300
300
; GFX9-O0-NEXT: v_mov_b32_e32 v1, v3
301
301
; GFX9-O0-NEXT: v_mov_b32_e32 v5, v12
302
302
; GFX9-O0-NEXT: v_xor_b32_e64 v1, v5, v1
@@ -312,21 +312,21 @@ define i128 @v_sdiv_i128_vv(i128 %lhs, i128 %rhs) {
312
312
; GFX9-O0-NEXT: ; kill: def $vgpr7 killed $vgpr7 def $vgpr7_vgpr8 killed $exec
313
313
; GFX9-O0-NEXT: v_mov_b32_e32 v8, v1
314
314
; GFX9-O0-NEXT: v_mov_b32_e32 v1, v7
315
- ; GFX9-O0-NEXT: v_mov_b32_e32 v7, v8
316
- ; GFX9-O0-NEXT: v_mov_b32_e32 v8 , v11
315
+ ; GFX9-O0-NEXT: ; kill: def $vgpr8 killed $vgpr8 killed $vgpr7_vgpr8 killed $exec
316
+ ; GFX9-O0-NEXT: v_mov_b32_e32 v7 , v11
317
317
; GFX9-O0-NEXT: v_mov_b32_e32 v2, v12
318
318
; GFX9-O0-NEXT: v_sub_co_u32_e32 v1, vcc, v1, v3
319
- ; GFX9-O0-NEXT: v_subb_co_u32_e32 v7 , vcc, v7 , v5, vcc
320
- ; GFX9-O0-NEXT: v_subb_co_u32_e32 v11, vcc, v8 , v3, vcc
321
- ; GFX9-O0-NEXT: v_subb_co_u32_e32 v2 , vcc, v2, v5, vcc
319
+ ; GFX9-O0-NEXT: v_subb_co_u32_e32 v8 , vcc, v8 , v5, vcc
320
+ ; GFX9-O0-NEXT: v_subb_co_u32_e32 v11, vcc, v7 , v3, vcc
321
+ ; GFX9-O0-NEXT: v_subb_co_u32_e32 v7 , vcc, v2, v5, vcc
322
322
; GFX9-O0-NEXT: ; implicit-def: $sgpr4
323
323
; GFX9-O0-NEXT: ; implicit-def: $sgpr4
324
- ; GFX9-O0-NEXT: ; kill: def $vgpr11 killed $vgpr11 def $vgpr11_vgpr12 killed $exec
325
- ; GFX9-O0-NEXT: v_mov_b32_e32 v12, v2
324
+ ; GFX9-O0-NEXT: ; kill: def $vgpr1 killed $vgpr1 def $vgpr1_vgpr2 killed $exec
325
+ ; GFX9-O0-NEXT: v_mov_b32_e32 v2, v8
326
326
; GFX9-O0-NEXT: ; implicit-def: $sgpr4
327
327
; GFX9-O0-NEXT: ; implicit-def: $sgpr4
328
- ; GFX9-O0-NEXT: ; kill: def $vgpr1 killed $vgpr1 def $vgpr1_vgpr2 killed $exec
329
- ; GFX9-O0-NEXT: v_mov_b32_e32 v2 , v7
328
+ ; GFX9-O0-NEXT: ; kill: def $vgpr11 killed $vgpr11 def $vgpr11_vgpr12 killed $exec
329
+ ; GFX9-O0-NEXT: v_mov_b32_e32 v12 , v7
330
330
; GFX9-O0-NEXT: v_xor_b32_e64 v5, v5, v6
331
331
; GFX9-O0-NEXT: v_xor_b32_e64 v3, v3, v4
332
332
; GFX9-O0-NEXT: ; kill: def $vgpr3 killed $vgpr3 def $vgpr3_vgpr4 killed $exec
@@ -339,18 +339,26 @@ define i128 @v_sdiv_i128_vv(i128 %lhs, i128 %rhs) {
339
339
; GFX9-O0-NEXT: buffer_store_dword v3, off, s[0:3], s32 offset:68 ; 4-byte Folded Spill
340
340
; GFX9-O0-NEXT: s_waitcnt vmcnt(0)
341
341
; GFX9-O0-NEXT: buffer_store_dword v4, off, s[0:3], s32 offset:72 ; 4-byte Folded Spill
342
- ; GFX9-O0-NEXT: buffer_store_dword v1, off, s[0:3], s32 offset:60 ; 4-byte Folded Spill
342
+ ; GFX9-O0-NEXT: v_mov_b32_e32 v3, v11
343
+ ; GFX9-O0-NEXT: v_mov_b32_e32 v4, v12
344
+ ; GFX9-O0-NEXT: buffer_store_dword v3, off, s[0:3], s32 offset:60 ; 4-byte Folded Spill
343
345
; GFX9-O0-NEXT: s_waitcnt vmcnt(0)
344
- ; GFX9-O0-NEXT: buffer_store_dword v2, off, s[0:3], s32 offset:64 ; 4-byte Folded Spill
345
- ; GFX9-O0-NEXT: buffer_store_dword v11, off, s[0:3], s32 offset:52 ; 4-byte Folded Spill
346
+ ; GFX9-O0-NEXT: buffer_store_dword v4, off, s[0:3], s32 offset:64 ; 4-byte Folded Spill
347
+ ; GFX9-O0-NEXT: v_mov_b32_e32 v4, v2
348
+ ; GFX9-O0-NEXT: v_mov_b32_e32 v3, v1
349
+ ; GFX9-O0-NEXT: buffer_store_dword v3, off, s[0:3], s32 offset:52 ; 4-byte Folded Spill
346
350
; GFX9-O0-NEXT: s_waitcnt vmcnt(0)
347
- ; GFX9-O0-NEXT: buffer_store_dword v12, off, s[0:3], s32 offset:56 ; 4-byte Folded Spill
348
- ; GFX9-O0-NEXT: buffer_store_dword v9, off, s[0:3], s32 offset:44 ; 4-byte Folded Spill
351
+ ; GFX9-O0-NEXT: buffer_store_dword v4, off, s[0:3], s32 offset:56 ; 4-byte Folded Spill
352
+ ; GFX9-O0-NEXT: v_mov_b32_e32 v3, v13
353
+ ; GFX9-O0-NEXT: v_mov_b32_e32 v4, v14
354
+ ; GFX9-O0-NEXT: buffer_store_dword v3, off, s[0:3], s32 offset:44 ; 4-byte Folded Spill
349
355
; GFX9-O0-NEXT: s_waitcnt vmcnt(0)
350
- ; GFX9-O0-NEXT: buffer_store_dword v10, off, s[0:3], s32 offset:48 ; 4-byte Folded Spill
351
- ; GFX9-O0-NEXT: buffer_store_dword v13, off, s[0:3], s32 offset:36 ; 4-byte Folded Spill
356
+ ; GFX9-O0-NEXT: buffer_store_dword v4, off, s[0:3], s32 offset:48 ; 4-byte Folded Spill
357
+ ; GFX9-O0-NEXT: v_mov_b32_e32 v3, v9
358
+ ; GFX9-O0-NEXT: v_mov_b32_e32 v4, v10
359
+ ; GFX9-O0-NEXT: buffer_store_dword v3, off, s[0:3], s32 offset:36 ; 4-byte Folded Spill
352
360
; GFX9-O0-NEXT: s_waitcnt vmcnt(0)
353
- ; GFX9-O0-NEXT: buffer_store_dword v14 , off, s[0:3], s32 offset:40 ; 4-byte Folded Spill
361
+ ; GFX9-O0-NEXT: buffer_store_dword v4 , off, s[0:3], s32 offset:40 ; 4-byte Folded Spill
354
362
; GFX9-O0-NEXT: v_mov_b32_e32 v7, v12
355
363
; GFX9-O0-NEXT: v_mov_b32_e32 v8, v2
356
364
; GFX9-O0-NEXT: v_or_b32_e64 v3, v8, v7
@@ -403,7 +411,8 @@ define i128 @v_sdiv_i128_vv(i128 %lhs, i128 %rhs) {
403
411
; GFX9-O0-NEXT: ; kill: def $vgpr8 killed $vgpr8 def $vgpr8_vgpr9 killed $exec
404
412
; GFX9-O0-NEXT: v_mov_b32_e32 v9, v5
405
413
; GFX9-O0-NEXT: v_mov_b32_e32 v5, v9
406
- ; GFX9-O0-NEXT: v_cmp_ne_u64_e64 s[12:13], v[11:12], s[6:7]
414
+ ; GFX9-O0-NEXT: s_mov_b64 s[12:13], s[6:7]
415
+ ; GFX9-O0-NEXT: v_cmp_ne_u64_e64 s[12:13], v[11:12], s[12:13]
407
416
; GFX9-O0-NEXT: v_cndmask_b32_e64 v5, v5, v10, s[12:13]
408
417
; GFX9-O0-NEXT: v_mov_b32_e32 v7, v6
409
418
; GFX9-O0-NEXT: v_mov_b32_e32 v6, v8
@@ -439,7 +448,8 @@ define i128 @v_sdiv_i128_vv(i128 %lhs, i128 %rhs) {
439
448
; GFX9-O0-NEXT: ; kill: def $vgpr11 killed $vgpr11 def $vgpr11_vgpr12 killed $exec
440
449
; GFX9-O0-NEXT: v_mov_b32_e32 v12, v5
441
450
; GFX9-O0-NEXT: v_mov_b32_e32 v5, v12
442
- ; GFX9-O0-NEXT: v_cmp_ne_u64_e64 s[8:9], v[13:14], s[6:7]
451
+ ; GFX9-O0-NEXT: s_mov_b64 s[8:9], s[6:7]
452
+ ; GFX9-O0-NEXT: v_cmp_ne_u64_e64 s[8:9], v[13:14], s[8:9]
443
453
; GFX9-O0-NEXT: v_cndmask_b32_e64 v5, v5, v8, s[8:9]
444
454
; GFX9-O0-NEXT: v_mov_b32_e32 v7, v6
445
455
; GFX9-O0-NEXT: v_mov_b32_e32 v6, v11
@@ -690,10 +700,10 @@ define i128 @v_sdiv_i128_vv(i128 %lhs, i128 %rhs) {
690
700
; GFX9-O0-NEXT: buffer_load_dword v28, off, s[0:3], s32 offset:260 ; 4-byte Folded Reload
691
701
; GFX9-O0-NEXT: buffer_load_dword v25, off, s[0:3], s32 offset:264 ; 4-byte Folded Reload
692
702
; GFX9-O0-NEXT: buffer_load_dword v26, off, s[0:3], s32 offset:268 ; 4-byte Folded Reload
693
- ; GFX9-O0-NEXT: buffer_load_dword v19, off, s[0:3], s32 offset:60 ; 4-byte Folded Reload
694
- ; GFX9-O0-NEXT: buffer_load_dword v20, off, s[0:3], s32 offset:64 ; 4-byte Folded Reload
695
- ; GFX9-O0-NEXT: buffer_load_dword v21, off, s[0:3], s32 offset:52 ; 4-byte Folded Reload
696
- ; GFX9-O0-NEXT: buffer_load_dword v22, off, s[0:3], s32 offset:56 ; 4-byte Folded Reload
703
+ ; GFX9-O0-NEXT: buffer_load_dword v19, off, s[0:3], s32 offset:52 ; 4-byte Folded Reload
704
+ ; GFX9-O0-NEXT: buffer_load_dword v20, off, s[0:3], s32 offset:56 ; 4-byte Folded Reload
705
+ ; GFX9-O0-NEXT: buffer_load_dword v21, off, s[0:3], s32 offset:60 ; 4-byte Folded Reload
706
+ ; GFX9-O0-NEXT: buffer_load_dword v22, off, s[0:3], s32 offset:64 ; 4-byte Folded Reload
697
707
; GFX9-O0-NEXT: buffer_load_dword v14, off, s[0:3], s32 offset:272 ; 4-byte Folded Reload
698
708
; GFX9-O0-NEXT: buffer_load_dword v15, off, s[0:3], s32 offset:276 ; 4-byte Folded Reload
699
709
; GFX9-O0-NEXT: buffer_load_dword v11, off, s[0:3], s32 offset:280 ; 4-byte Folded Reload
@@ -903,14 +913,14 @@ define i128 @v_sdiv_i128_vv(i128 %lhs, i128 %rhs) {
903
913
; GFX9-O0-NEXT: s_or_saveexec_b64 s[18:19], -1
904
914
; GFX9-O0-NEXT: buffer_load_dword v16, off, s[0:3], s32 ; 4-byte Folded Reload
905
915
; GFX9-O0-NEXT: s_mov_b64 exec, s[18:19]
906
- ; GFX9-O0-NEXT: buffer_load_dword v17, off, s[0:3], s32 offset:52 ; 4-byte Folded Reload
907
- ; GFX9-O0-NEXT: buffer_load_dword v18, off, s[0:3], s32 offset:56 ; 4-byte Folded Reload
908
- ; GFX9-O0-NEXT: buffer_load_dword v13, off, s[0:3], s32 offset:60 ; 4-byte Folded Reload
909
- ; GFX9-O0-NEXT: buffer_load_dword v14, off, s[0:3], s32 offset:64 ; 4-byte Folded Reload
910
- ; GFX9-O0-NEXT: buffer_load_dword v19, off, s[0:3], s32 offset:36 ; 4-byte Folded Reload
911
- ; GFX9-O0-NEXT: buffer_load_dword v20, off, s[0:3], s32 offset:40 ; 4-byte Folded Reload
912
- ; GFX9-O0-NEXT: buffer_load_dword v21, off, s[0:3], s32 offset:44 ; 4-byte Folded Reload
913
- ; GFX9-O0-NEXT: buffer_load_dword v22, off, s[0:3], s32 offset:48 ; 4-byte Folded Reload
916
+ ; GFX9-O0-NEXT: buffer_load_dword v17, off, s[0:3], s32 offset:60 ; 4-byte Folded Reload
917
+ ; GFX9-O0-NEXT: buffer_load_dword v18, off, s[0:3], s32 offset:64 ; 4-byte Folded Reload
918
+ ; GFX9-O0-NEXT: buffer_load_dword v13, off, s[0:3], s32 offset:52 ; 4-byte Folded Reload
919
+ ; GFX9-O0-NEXT: buffer_load_dword v14, off, s[0:3], s32 offset:56 ; 4-byte Folded Reload
920
+ ; GFX9-O0-NEXT: buffer_load_dword v19, off, s[0:3], s32 offset:44 ; 4-byte Folded Reload
921
+ ; GFX9-O0-NEXT: buffer_load_dword v20, off, s[0:3], s32 offset:48 ; 4-byte Folded Reload
922
+ ; GFX9-O0-NEXT: buffer_load_dword v21, off, s[0:3], s32 offset:36 ; 4-byte Folded Reload
923
+ ; GFX9-O0-NEXT: buffer_load_dword v22, off, s[0:3], s32 offset:40 ; 4-byte Folded Reload
914
924
; GFX9-O0-NEXT: s_waitcnt vmcnt(9)
915
925
; GFX9-O0-NEXT: v_mov_b32_e32 v4, v10
916
926
; GFX9-O0-NEXT: s_waitcnt vmcnt(0)
@@ -1028,10 +1038,10 @@ define i128 @v_sdiv_i128_vv(i128 %lhs, i128 %rhs) {
1028
1038
; GFX9-O0-NEXT: s_or_saveexec_b64 s[18:19], -1
1029
1039
; GFX9-O0-NEXT: buffer_load_dword v0, off, s[0:3], s32 ; 4-byte Folded Reload
1030
1040
; GFX9-O0-NEXT: s_mov_b64 exec, s[18:19]
1031
- ; GFX9-O0-NEXT: buffer_load_dword v7, off, s[0:3], s32 offset:44 ; 4-byte Folded Reload
1032
- ; GFX9-O0-NEXT: buffer_load_dword v8, off, s[0:3], s32 offset:48 ; 4-byte Folded Reload
1033
- ; GFX9-O0-NEXT: buffer_load_dword v11, off, s[0:3], s32 offset:36 ; 4-byte Folded Reload
1034
- ; GFX9-O0-NEXT: buffer_load_dword v12, off, s[0:3], s32 offset:40 ; 4-byte Folded Reload
1041
+ ; GFX9-O0-NEXT: buffer_load_dword v7, off, s[0:3], s32 offset:36 ; 4-byte Folded Reload
1042
+ ; GFX9-O0-NEXT: buffer_load_dword v8, off, s[0:3], s32 offset:40 ; 4-byte Folded Reload
1043
+ ; GFX9-O0-NEXT: buffer_load_dword v11, off, s[0:3], s32 offset:44 ; 4-byte Folded Reload
1044
+ ; GFX9-O0-NEXT: buffer_load_dword v12, off, s[0:3], s32 offset:48 ; 4-byte Folded Reload
1035
1045
; GFX9-O0-NEXT: buffer_load_dword v5, off, s[0:3], s32 offset:28 ; 4-byte Folded Reload
1036
1046
; GFX9-O0-NEXT: buffer_load_dword v6, off, s[0:3], s32 offset:32 ; 4-byte Folded Reload
1037
1047
; GFX9-O0-NEXT: buffer_load_dword v1, off, s[0:3], s32 offset:20 ; 4-byte Folded Reload
0 commit comments