@@ -2186,10 +2186,9 @@ define i7 @v_ctlz_zero_undef_i7(i7 %val) {
2186
2186
; GFX9-GISEL-LABEL: v_ctlz_zero_undef_i7:
2187
2187
; GFX9-GISEL: ; %bb.0:
2188
2188
; GFX9-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
2189
- ; GFX9-GISEL-NEXT: v_and_b32_e32 v0, 0x7f, v0
2190
- ; GFX9-GISEL-NEXT: v_ffbh_u32_e32 v0, v0
2191
- ; GFX9-GISEL-NEXT: v_subrev_u32_e32 v0, 25, v0
2192
- ; GFX9-GISEL-NEXT: s_setpc_b64 s[30:31]
2189
+ ; GFX9-GISEL-NEXT: v_lshrrev_b32_e32 v0, 25, v0
2190
+ ; GFX9-GISEL-NEXT: v_ffbh_u32_e32 v0, v0
2191
+ ; GFX9-GISEL-NEXT: s_setpc_b64 s[30:31]
2193
2192
%ctlz = call i7 @llvm.ctlz.i7 (i7 %val , i1 true )
2194
2193
ret i7 %ctlz
2195
2194
}
@@ -2276,19 +2275,18 @@ define amdgpu_kernel void @s_ctlz_zero_undef_i18(ptr addrspace(1) noalias %out,
2276
2275
; GFX9-GISEL-LABEL: s_ctlz_zero_undef_i18:
2277
2276
; GFX9-GISEL: ; %bb.0:
2278
2277
; GFX9-GISEL-NEXT: s_load_dword s4, s[0:1], 0x2c
2279
- ; GFX9-GISEL-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x24
2280
- ; GFX9-GISEL-NEXT: v_mov_b32_e32 v0, 0
2281
- ; GFX9-GISEL-NEXT: s_waitcnt lgkmcnt(0)
2282
- ; GFX9-GISEL-NEXT: s_and_b32 s0, s4, 0x3ffff
2283
- ; GFX9-GISEL-NEXT: s_flbit_i32_b32 s0, s0
2284
- ; GFX9-GISEL-NEXT: s_sub_i32 s0, s0, 14
2285
- ; GFX9-GISEL-NEXT: s_and_b32 s0, s0, 0x3ffff
2286
- ; GFX9-GISEL-NEXT: s_lshr_b32 s1, s0, 16
2287
- ; GFX9-GISEL-NEXT: v_mov_b32_e32 v1, s0
2288
- ; GFX9-GISEL-NEXT: global_store_short v0, v1, s[2:3]
2289
- ; GFX9-GISEL-NEXT: v_mov_b32_e32 v1, s1
2290
- ; GFX9-GISEL-NEXT: global_store_byte v0, v1, s[2:3] offset:2
2291
- ; GFX9-GISEL-NEXT: s_endpgm
2278
+ ; GFX9-GISEL-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x24
2279
+ ; GFX9-GISEL-NEXT: v_mov_b32_e32 v0, 0
2280
+ ; GFX9-GISEL-NEXT: s_waitcnt lgkmcnt(0)
2281
+ ; GFX9-GISEL-NEXT: s_lshr_b32 s0, s4, 14
2282
+ ; GFX9-GISEL-NEXT: s_flbit_i32_b32 s0, s0
2283
+ ; GFX9-GISEL-NEXT: s_and_b32 s0, s0, 0x3ffff
2284
+ ; GFX9-GISEL-NEXT: s_lshr_b32 s1, s0, 16
2285
+ ; GFX9-GISEL-NEXT: v_mov_b32_e32 v1, s0
2286
+ ; GFX9-GISEL-NEXT: global_store_short v0, v1, s[2:3]
2287
+ ; GFX9-GISEL-NEXT: v_mov_b32_e32 v1, s1
2288
+ ; GFX9-GISEL-NEXT: global_store_byte v0, v1, s[2:3] offset:2
2289
+ ; GFX9-GISEL-NEXT: s_endpgm
2292
2290
%ctlz = call i18 @llvm.ctlz.i18 (i18 %val , i1 true ) nounwind readnone
2293
2291
store i18 %ctlz , ptr addrspace (1 ) %out , align 4
2294
2292
ret void
@@ -2319,10 +2317,9 @@ define i18 @v_ctlz_zero_undef_i18(i18 %val) {
2319
2317
; GFX9-GISEL-LABEL: v_ctlz_zero_undef_i18:
2320
2318
; GFX9-GISEL: ; %bb.0:
2321
2319
; GFX9-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
2322
- ; GFX9-GISEL-NEXT: v_and_b32_e32 v0, 0x3ffff, v0
2323
- ; GFX9-GISEL-NEXT: v_ffbh_u32_e32 v0, v0
2324
- ; GFX9-GISEL-NEXT: v_subrev_u32_e32 v0, 14, v0
2325
- ; GFX9-GISEL-NEXT: s_setpc_b64 s[30:31]
2320
+ ; GFX9-GISEL-NEXT: v_lshrrev_b32_e32 v0, 14, v0
2321
+ ; GFX9-GISEL-NEXT: v_ffbh_u32_e32 v0, v0
2322
+ ; GFX9-GISEL-NEXT: s_setpc_b64 s[30:31]
2326
2323
%ctlz = call i18 @llvm.ctlz.i18 (i18 %val , i1 true )
2327
2324
ret i18 %ctlz
2328
2325
}
@@ -2358,13 +2355,11 @@ define <2 x i18> @v_ctlz_zero_undef_v2i18(<2 x i18> %val) {
2358
2355
; GFX9-GISEL-LABEL: v_ctlz_zero_undef_v2i18:
2359
2356
; GFX9-GISEL: ; %bb.0:
2360
2357
; GFX9-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
2361
- ; GFX9-GISEL-NEXT: v_and_b32_e32 v0, 0x3ffff, v0
2362
- ; GFX9-GISEL-NEXT: v_and_b32_e32 v1, 0x3ffff, v1
2363
- ; GFX9-GISEL-NEXT: v_ffbh_u32_e32 v0, v0
2364
- ; GFX9-GISEL-NEXT: v_ffbh_u32_e32 v1, v1
2365
- ; GFX9-GISEL-NEXT: v_subrev_u32_e32 v0, 14, v0
2366
- ; GFX9-GISEL-NEXT: v_subrev_u32_e32 v1, 14, v1
2367
- ; GFX9-GISEL-NEXT: s_setpc_b64 s[30:31]
2358
+ ; GFX9-GISEL-NEXT: v_lshrrev_b32_e32 v0, 14, v0
2359
+ ; GFX9-GISEL-NEXT: v_lshrrev_b32_e32 v1, 14, v1
2360
+ ; GFX9-GISEL-NEXT: v_ffbh_u32_e32 v0, v0
2361
+ ; GFX9-GISEL-NEXT: v_ffbh_u32_e32 v1, v1
2362
+ ; GFX9-GISEL-NEXT: s_setpc_b64 s[30:31]
2368
2363
%ctlz = call <2 x i18 > @llvm.ctlz.v2i18 (<2 x i18 > %val , i1 true )
2369
2364
ret <2 x i18 > %ctlz
2370
2365
}
@@ -2373,17 +2368,13 @@ define <2 x i16> @v_ctlz_zero_undef_v2i16(<2 x i16> %val) {
2373
2368
; SI-LABEL: v_ctlz_zero_undef_v2i16:
2374
2369
; SI: ; %bb.0:
2375
2370
; SI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
2376
- ; SI-NEXT: v_and_b32_e32 v1, 0xffff, v1
2377
- ; SI-NEXT: v_and_b32_e32 v0, 0xffff, v0
2378
- ; SI-NEXT: v_ffbh_u32_e32 v1, v1
2379
- ; SI-NEXT: v_ffbh_u32_e32 v0, v0
2380
- ; SI-NEXT: v_add_i32_e32 v1, vcc, -16, v1
2381
- ; SI-NEXT: v_add_i32_e32 v0, vcc, -16, v0
2382
- ; SI-NEXT: v_lshlrev_b32_e32 v2, 16, v1
2383
- ; SI-NEXT: v_and_b32_e32 v0, 0xffff, v0
2384
- ; SI-NEXT: v_or_b32_e32 v0, v0, v2
2385
- ; SI-NEXT: v_and_b32_e32 v1, 0xffff, v1
2386
- ; SI-NEXT: s_setpc_b64 s[30:31]
2371
+ ; SI-NEXT: v_lshlrev_b32_e32 v1, 16, v1
2372
+ ; SI-NEXT: v_lshlrev_b32_e32 v0, 16, v0
2373
+ ; SI-NEXT: v_ffbh_u32_e32 v1, v1
2374
+ ; SI-NEXT: v_lshlrev_b32_e32 v2, 16, v1
2375
+ ; SI-NEXT: v_ffbh_u32_e32 v0, v0
2376
+ ; SI-NEXT: v_or_b32_e32 v0, v0, v2
2377
+ ; SI-NEXT: s_setpc_b64 s[30:31]
2387
2378
;
2388
2379
; VI-LABEL: v_ctlz_zero_undef_v2i16:
2389
2380
; VI: ; %bb.0:
@@ -2403,13 +2394,11 @@ define <2 x i16> @v_ctlz_zero_undef_v2i16(<2 x i16> %val) {
2403
2394
; GFX9-GISEL-LABEL: v_ctlz_zero_undef_v2i16:
2404
2395
; GFX9-GISEL: ; %bb.0:
2405
2396
; GFX9-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
2406
- ; GFX9-GISEL-NEXT: v_ffbh_u32_sdwa v1, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_0
2407
- ; GFX9-GISEL-NEXT: v_subrev_u32_e32 v1, 16, v1
2408
2397
; GFX9-GISEL-NEXT: v_ffbh_u32_sdwa v0, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1
2409
- ; GFX9-GISEL-NEXT: v_subrev_u32_e32 v0, 16, v0
2410
- ; GFX9-GISEL-NEXT: v_and_b32_e32 v1 , 0xffff, v1
2411
- ; GFX9-GISEL-NEXT: v_lshl_or_b32 v0, v0 , 16, v1
2412
- ; GFX9-GISEL-NEXT: s_setpc_b64 s[30:31]
2398
+ ; GFX9-GISEL-NEXT: s_flbit_i32_b32 s4, 0
2399
+ ; GFX9-GISEL-NEXT: v_and_b32_e32 v0 , 0xffff, v0
2400
+ ; GFX9-GISEL-NEXT: v_lshl_or_b32 v0, s4 , 16, v0
2401
+ ; GFX9-GISEL-NEXT: s_setpc_b64 s[30:31]
2413
2402
%ctlz = call <2 x i16 > @llvm.ctlz.v2i16 (<2 x i16 > %val , i1 true )
2414
2403
ret <2 x i16 > %ctlz
2415
2404
}
@@ -2418,22 +2407,17 @@ define <3 x i16> @v_ctlz_zero_undef_v3i16(<3 x i16> %val) {
2418
2407
; SI-LABEL: v_ctlz_zero_undef_v3i16:
2419
2408
; SI: ; %bb.0:
2420
2409
; SI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
2421
- ; SI-NEXT: v_and_b32_e32 v1, 0xffff, v1
2422
- ; SI-NEXT: v_and_b32_e32 v0, 0xffff, v0
2423
- ; SI-NEXT: v_and_b32_e32 v2, 0xffff, v2
2424
- ; SI-NEXT: v_ffbh_u32_e32 v1, v1
2425
- ; SI-NEXT: v_ffbh_u32_e32 v0, v0
2426
- ; SI-NEXT: v_ffbh_u32_e32 v2, v2
2427
2410
; SI-NEXT: v_lshlrev_b32_e32 v1, 16, v1
2428
- ; SI-NEXT: v_add_i32_e32 v0, vcc, -16, v0
2429
- ; SI-NEXT: v_add_i32_e32 v3, vcc, -16, v2
2430
- ; SI-NEXT: v_and_b32_e32 v0, 0xffff, v0
2431
- ; SI-NEXT: v_and_b32_e32 v2, 0xffff, v3
2432
- ; SI-NEXT: v_or_b32_e32 v0, v1, v0
2433
- ; SI-NEXT: v_add_i32_e32 v0, vcc, 0xfff00000, v0
2434
- ; SI-NEXT: v_or_b32_e32 v2, 0x100000, v2
2435
- ; SI-NEXT: v_alignbit_b32 v1, v3, v0, 16
2436
- ; SI-NEXT: s_setpc_b64 s[30:31]
2411
+ ; SI-NEXT: v_lshlrev_b32_e32 v0, 16, v0
2412
+ ; SI-NEXT: v_lshlrev_b32_e32 v2, 16, v2
2413
+ ; SI-NEXT: v_ffbh_u32_e32 v1, v1
2414
+ ; SI-NEXT: v_ffbh_u32_e32 v0, v0
2415
+ ; SI-NEXT: v_ffbh_u32_e32 v3, v2
2416
+ ; SI-NEXT: v_lshlrev_b32_e32 v1, 16, v1
2417
+ ; SI-NEXT: v_or_b32_e32 v0, v0, v1
2418
+ ; SI-NEXT: v_or_b32_e32 v2, 0x200000, v3
2419
+ ; SI-NEXT: v_alignbit_b32 v1, v3, v0, 16
2420
+ ; SI-NEXT: s_setpc_b64 s[30:31]
2437
2421
;
2438
2422
; VI-LABEL: v_ctlz_zero_undef_v3i16:
2439
2423
; VI: ; %bb.0:
@@ -2455,15 +2439,12 @@ define <3 x i16> @v_ctlz_zero_undef_v3i16(<3 x i16> %val) {
2455
2439
; GFX9-GISEL-LABEL: v_ctlz_zero_undef_v3i16:
2456
2440
; GFX9-GISEL: ; %bb.0:
2457
2441
; GFX9-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
2458
- ; GFX9-GISEL-NEXT: v_ffbh_u32_sdwa v2, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_0
2459
- ; GFX9-GISEL-NEXT: v_subrev_u32_e32 v2, 16, v2
2460
2442
; GFX9-GISEL-NEXT: v_ffbh_u32_sdwa v0, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1
2461
- ; GFX9-GISEL-NEXT: v_subrev_u32_e32 v0, 16, v0
2462
- ; GFX9-GISEL-NEXT: v_ffbh_u32_sdwa v1, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_0
2463
- ; GFX9-GISEL-NEXT: v_and_b32_e32 v2, 0xffff, v2
2464
- ; GFX9-GISEL-NEXT: v_subrev_u32_e32 v1, 16, v1
2465
- ; GFX9-GISEL-NEXT: v_lshl_or_b32 v0, v0, 16, v2
2466
- ; GFX9-GISEL-NEXT: s_setpc_b64 s[30:31]
2443
+ ; GFX9-GISEL-NEXT: s_flbit_i32_b32 s4, 0
2444
+ ; GFX9-GISEL-NEXT: v_and_b32_e32 v0, 0xffff, v0
2445
+ ; GFX9-GISEL-NEXT: v_ffbh_u32_sdwa v1, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1
2446
+ ; GFX9-GISEL-NEXT: v_lshl_or_b32 v0, s4, 16, v0
2447
+ ; GFX9-GISEL-NEXT: s_setpc_b64 s[30:31]
2467
2448
%ctlz = call <3 x i16 > @llvm.ctlz.v3i16 (<3 x i16 > %val , i1 true )
2468
2449
ret <3 x i16 > %ctlz
2469
2450
}
@@ -2472,27 +2453,21 @@ define <4 x i16> @v_ctlz_zero_undef_v4i16(<4 x i16> %val) {
2472
2453
; SI-LABEL: v_ctlz_zero_undef_v4i16:
2473
2454
; SI: ; %bb.0:
2474
2455
; SI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
2475
- ; SI-NEXT: v_and_b32_e32 v3, 0xffff, v3
2476
- ; SI-NEXT: v_and_b32_e32 v2, 0xffff, v2
2477
- ; SI-NEXT: v_and_b32_e32 v1, 0xffff, v1
2478
- ; SI-NEXT: v_and_b32_e32 v0, 0xffff, v0
2479
- ; SI-NEXT: v_ffbh_u32_e32 v3, v3
2480
- ; SI-NEXT: v_ffbh_u32_e32 v2, v2
2481
- ; SI-NEXT: v_ffbh_u32_e32 v1, v1
2482
- ; SI-NEXT: v_ffbh_u32_e32 v0, v0
2483
2456
; SI-NEXT: v_lshlrev_b32_e32 v3, 16, v3
2484
- ; SI-NEXT: v_add_i32_e32 v2, vcc, -16, v2
2485
- ; SI-NEXT: v_lshlrev_b32_e32 v1, 16, v1
2486
- ; SI-NEXT: v_add_i32_e32 v0, vcc, -16, v0
2487
- ; SI-NEXT: v_and_b32_e32 v2, 0xffff, v2
2488
- ; SI-NEXT: v_and_b32_e32 v0, 0xffff, v0
2489
- ; SI-NEXT: v_or_b32_e32 v2, v3, v2
2490
- ; SI-NEXT: v_or_b32_e32 v0, v1, v0
2491
- ; SI-NEXT: v_add_i32_e32 v2, vcc, 0xfff00000, v2
2492
- ; SI-NEXT: v_add_i32_e32 v0, vcc, 0xfff00000, v0
2493
- ; SI-NEXT: v_alignbit_b32 v1, v2, v0, 16
2494
- ; SI-NEXT: v_lshrrev_b32_e32 v3, 16, v2
2495
- ; SI-NEXT: s_setpc_b64 s[30:31]
2457
+ ; SI-NEXT: v_lshlrev_b32_e32 v2, 16, v2
2458
+ ; SI-NEXT: v_lshlrev_b32_e32 v1, 16, v1
2459
+ ; SI-NEXT: v_lshlrev_b32_e32 v0, 16, v0
2460
+ ; SI-NEXT: v_ffbh_u32_e32 v3, v3
2461
+ ; SI-NEXT: v_ffbh_u32_e32 v2, v2
2462
+ ; SI-NEXT: v_ffbh_u32_e32 v1, v1
2463
+ ; SI-NEXT: v_ffbh_u32_e32 v0, v0
2464
+ ; SI-NEXT: v_lshlrev_b32_e32 v3, 16, v3
2465
+ ; SI-NEXT: v_lshlrev_b32_e32 v1, 16, v1
2466
+ ; SI-NEXT: v_or_b32_e32 v2, v2, v3
2467
+ ; SI-NEXT: v_or_b32_e32 v0, v0, v1
2468
+ ; SI-NEXT: v_alignbit_b32 v1, v2, v0, 16
2469
+ ; SI-NEXT: v_lshrrev_b32_e32 v3, 16, v2
2470
+ ; SI-NEXT: s_setpc_b64 s[30:31]
2496
2471
;
2497
2472
; VI-LABEL: v_ctlz_zero_undef_v4i16:
2498
2473
; VI: ; %bb.0:
@@ -2517,19 +2492,14 @@ define <4 x i16> @v_ctlz_zero_undef_v4i16(<4 x i16> %val) {
2517
2492
; GFX9-GISEL-LABEL: v_ctlz_zero_undef_v4i16:
2518
2493
; GFX9-GISEL: ; %bb.0:
2519
2494
; GFX9-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
2520
- ; GFX9-GISEL-NEXT: v_ffbh_u32_sdwa v2, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_0
2521
- ; GFX9-GISEL-NEXT: v_subrev_u32_e32 v2, 16, v2
2522
2495
; GFX9-GISEL-NEXT: v_ffbh_u32_sdwa v0, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1
2523
- ; GFX9-GISEL-NEXT: v_ffbh_u32_sdwa v3, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_0
2524
- ; GFX9-GISEL-NEXT: v_subrev_u32_e32 v0, 16, v0
2525
- ; GFX9-GISEL-NEXT: v_subrev_u32_e32 v3, 16, v3
2526
- ; GFX9-GISEL-NEXT: v_ffbh_u32_sdwa v1, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1
2527
- ; GFX9-GISEL-NEXT: v_and_b32_e32 v2, 0xffff, v2
2528
- ; GFX9-GISEL-NEXT: v_subrev_u32_e32 v1, 16, v1
2529
- ; GFX9-GISEL-NEXT: v_lshl_or_b32 v0, v0, 16, v2
2530
- ; GFX9-GISEL-NEXT: v_and_b32_e32 v2, 0xffff, v3
2531
- ; GFX9-GISEL-NEXT: v_lshl_or_b32 v1, v1, 16, v2
2532
- ; GFX9-GISEL-NEXT: s_setpc_b64 s[30:31]
2496
+ ; GFX9-GISEL-NEXT: v_ffbh_u32_sdwa v1, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1
2497
+ ; GFX9-GISEL-NEXT: s_flbit_i32_b32 s4, 0
2498
+ ; GFX9-GISEL-NEXT: v_and_b32_e32 v0, 0xffff, v0
2499
+ ; GFX9-GISEL-NEXT: v_and_b32_e32 v1, 0xffff, v1
2500
+ ; GFX9-GISEL-NEXT: v_lshl_or_b32 v0, s4, 16, v0
2501
+ ; GFX9-GISEL-NEXT: v_lshl_or_b32 v1, s4, 16, v1
2502
+ ; GFX9-GISEL-NEXT: s_setpc_b64 s[30:31]
2533
2503
%ctlz = call <4 x i16 > @llvm.ctlz.v4i16 (<4 x i16 > %val , i1 true )
2534
2504
ret <4 x i16 > %ctlz
2535
2505
}
@@ -2538,28 +2508,25 @@ define <2 x i8> @v_ctlz_zero_undef_v2i8(<2 x i8> %val) {
2538
2508
; SI-LABEL: v_ctlz_zero_undef_v2i8:
2539
2509
; SI: ; %bb.0:
2540
2510
; SI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
2541
- ; SI-NEXT: v_and_b32_e32 v1, 0xff , v1
2542
- ; SI-NEXT: v_and_b32_e32 v0, 0xff , v0
2511
+ ; SI-NEXT: v_lshlrev_b32_e32 v1, 24 , v1
2512
+ ; SI-NEXT: v_lshlrev_b32_e32 v0, 24 , v0
2543
2513
; SI-NEXT: v_ffbh_u32_e32 v1, v1
2514
+ ; SI-NEXT: v_lshlrev_b32_e32 v2, 8, v1
2544
2515
; SI-NEXT: v_ffbh_u32_e32 v0, v0
2545
- ; SI-NEXT: v_lshlrev_b32_e32 v1, 8, v1
2546
- ; SI-NEXT: v_subrev_i32_e32 v0, vcc, 24, v0
2547
- ; SI-NEXT: v_and_b32_e32 v0, 0xff, v0
2548
- ; SI-NEXT: v_or_b32_e32 v0, v1, v0
2549
- ; SI-NEXT: v_add_i32_e32 v0, vcc, 0xffffe800, v0
2550
- ; SI-NEXT: v_bfe_u32 v1, v0, 8, 8
2516
+ ; SI-NEXT: v_or_b32_e32 v0, v0, v2
2551
2517
; SI-NEXT: s_setpc_b64 s[30:31]
2552
2518
;
2553
2519
; VI-LABEL: v_ctlz_zero_undef_v2i8:
2554
2520
; VI: ; %bb.0:
2555
2521
; VI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
2556
- ; VI-NEXT: v_ffbh_u32_sdwa v1, v1 dst_sel:BYTE_1 dst_unused:UNUSED_PAD src0_sel:BYTE_0
2557
- ; VI-NEXT: v_ffbh_u32_sdwa v0, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:BYTE_0
2558
- ; VI-NEXT: v_add_u16_e32 v1, 0xe800, v1
2559
- ; VI-NEXT: v_subrev_u16_e32 v0, 24, v0
2560
- ; VI-NEXT: v_or_b32_sdwa v0, v0, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:BYTE_0 src1_sel:DWORD
2561
- ; VI-NEXT: v_lshrrev_b16_e32 v1, 8, v1
2562
- ; VI-NEXT: s_setpc_b64 s[30:31]
2522
+ ; VI-NEXT: v_lshlrev_b32_e32 v1, 24, v1
2523
+ ; VI-NEXT: v_ffbh_u32_e32 v1, v1
2524
+ ; VI-NEXT: v_lshlrev_b32_e32 v0, 24, v0
2525
+ ; VI-NEXT: v_lshlrev_b16_e32 v2, 8, v1
2526
+ ; VI-NEXT: v_ffbh_u32_e32 v0, v0
2527
+ ; VI-NEXT: v_or_b32_e32 v0, v0, v2
2528
+ ; VI-NEXT: v_and_b32_e32 v1, 0xff, v1
2529
+ ; VI-NEXT: s_setpc_b64 s[30:31]
2563
2530
;
2564
2531
; EG-LABEL: v_ctlz_zero_undef_v2i8:
2565
2532
; EG: ; %bb.0:
@@ -2569,11 +2536,9 @@ define <2 x i8> @v_ctlz_zero_undef_v2i8(<2 x i8> %val) {
2569
2536
; GFX9-GISEL-LABEL: v_ctlz_zero_undef_v2i8:
2570
2537
; GFX9-GISEL: ; %bb.0:
2571
2538
; GFX9-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
2572
- ; GFX9-GISEL-NEXT: v_ffbh_u32_sdwa v0, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:BYTE_0
2573
- ; GFX9-GISEL-NEXT: v_ffbh_u32_sdwa v1, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:BYTE_0
2574
- ; GFX9-GISEL-NEXT: v_subrev_u32_e32 v0, 24, v0
2575
- ; GFX9-GISEL-NEXT: v_subrev_u32_e32 v1, 24, v1
2576
- ; GFX9-GISEL-NEXT: s_setpc_b64 s[30:31]
2539
+ ; GFX9-GISEL-NEXT: v_ffbh_u32_sdwa v0, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:BYTE_3
2540
+ ; GFX9-GISEL-NEXT: v_ffbh_u32_sdwa v1, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:BYTE_3
2541
+ ; GFX9-GISEL-NEXT: s_setpc_b64 s[30:31]
2577
2542
%ctlz = call <2 x i8 > @llvm.ctlz.v2i8 (<2 x i8 > %val , i1 true )
2578
2543
ret <2 x i8 > %ctlz
2579
2544
}
@@ -2614,13 +2579,11 @@ define <2 x i7> @v_ctlz_zero_undef_v2i7(<2 x i7> %val) {
2614
2579
; GFX9-GISEL-LABEL: v_ctlz_zero_undef_v2i7:
2615
2580
; GFX9-GISEL: ; %bb.0:
2616
2581
; GFX9-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
2617
- ; GFX9-GISEL-NEXT: v_and_b32_e32 v0, 0x7f, v0
2618
- ; GFX9-GISEL-NEXT: v_and_b32_e32 v1, 0x7f, v1
2619
- ; GFX9-GISEL-NEXT: v_ffbh_u32_e32 v0, v0
2620
- ; GFX9-GISEL-NEXT: v_ffbh_u32_e32 v1, v1
2621
- ; GFX9-GISEL-NEXT: v_subrev_u32_e32 v0, 25, v0
2622
- ; GFX9-GISEL-NEXT: v_subrev_u32_e32 v1, 25, v1
2623
- ; GFX9-GISEL-NEXT: s_setpc_b64 s[30:31]
2582
+ ; GFX9-GISEL-NEXT: v_lshrrev_b32_e32 v0, 25, v0
2583
+ ; GFX9-GISEL-NEXT: v_lshrrev_b32_e32 v1, 25, v1
2584
+ ; GFX9-GISEL-NEXT: v_ffbh_u32_e32 v0, v0
2585
+ ; GFX9-GISEL-NEXT: v_ffbh_u32_e32 v1, v1
2586
+ ; GFX9-GISEL-NEXT: s_setpc_b64 s[30:31]
2624
2587
%ctlz = call <2 x i7 > @llvm.ctlz.v2i7 (<2 x i7 > %val , i1 true )
2625
2588
ret <2 x i7 > %ctlz
2626
2589
}
0 commit comments