Skip to content

Commit 1b08b14

Browse files
author
Leon Clark
committed
Address review comments.
1 parent f9afbe9 commit 1b08b14

File tree

1 file changed

+90
-127
lines changed

1 file changed

+90
-127
lines changed

llvm/test/CodeGen/AMDGPU/ctlz_zero_undef.ll

+90-127
Original file line numberDiff line numberDiff line change
@@ -2186,10 +2186,9 @@ define i7 @v_ctlz_zero_undef_i7(i7 %val) {
21862186
; GFX9-GISEL-LABEL: v_ctlz_zero_undef_i7:
21872187
; GFX9-GISEL: ; %bb.0:
21882188
; GFX9-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
2189-
; GFX9-GISEL-NEXT: v_and_b32_e32 v0, 0x7f, v0
2190-
; GFX9-GISEL-NEXT: v_ffbh_u32_e32 v0, v0
2191-
; GFX9-GISEL-NEXT: v_subrev_u32_e32 v0, 25, v0
2192-
; GFX9-GISEL-NEXT: s_setpc_b64 s[30:31]
2189+
; GFX9-GISEL-NEXT: v_lshrrev_b32_e32 v0, 25, v0
2190+
; GFX9-GISEL-NEXT: v_ffbh_u32_e32 v0, v0
2191+
; GFX9-GISEL-NEXT: s_setpc_b64 s[30:31]
21932192
%ctlz = call i7 @llvm.ctlz.i7(i7 %val, i1 true)
21942193
ret i7 %ctlz
21952194
}
@@ -2276,19 +2275,18 @@ define amdgpu_kernel void @s_ctlz_zero_undef_i18(ptr addrspace(1) noalias %out,
22762275
; GFX9-GISEL-LABEL: s_ctlz_zero_undef_i18:
22772276
; GFX9-GISEL: ; %bb.0:
22782277
; GFX9-GISEL-NEXT: s_load_dword s4, s[0:1], 0x2c
2279-
; GFX9-GISEL-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x24
2280-
; GFX9-GISEL-NEXT: v_mov_b32_e32 v0, 0
2281-
; GFX9-GISEL-NEXT: s_waitcnt lgkmcnt(0)
2282-
; GFX9-GISEL-NEXT: s_and_b32 s0, s4, 0x3ffff
2283-
; GFX9-GISEL-NEXT: s_flbit_i32_b32 s0, s0
2284-
; GFX9-GISEL-NEXT: s_sub_i32 s0, s0, 14
2285-
; GFX9-GISEL-NEXT: s_and_b32 s0, s0, 0x3ffff
2286-
; GFX9-GISEL-NEXT: s_lshr_b32 s1, s0, 16
2287-
; GFX9-GISEL-NEXT: v_mov_b32_e32 v1, s0
2288-
; GFX9-GISEL-NEXT: global_store_short v0, v1, s[2:3]
2289-
; GFX9-GISEL-NEXT: v_mov_b32_e32 v1, s1
2290-
; GFX9-GISEL-NEXT: global_store_byte v0, v1, s[2:3] offset:2
2291-
; GFX9-GISEL-NEXT: s_endpgm
2278+
; GFX9-GISEL-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x24
2279+
; GFX9-GISEL-NEXT: v_mov_b32_e32 v0, 0
2280+
; GFX9-GISEL-NEXT: s_waitcnt lgkmcnt(0)
2281+
; GFX9-GISEL-NEXT: s_lshr_b32 s0, s4, 14
2282+
; GFX9-GISEL-NEXT: s_flbit_i32_b32 s0, s0
2283+
; GFX9-GISEL-NEXT: s_and_b32 s0, s0, 0x3ffff
2284+
; GFX9-GISEL-NEXT: s_lshr_b32 s1, s0, 16
2285+
; GFX9-GISEL-NEXT: v_mov_b32_e32 v1, s0
2286+
; GFX9-GISEL-NEXT: global_store_short v0, v1, s[2:3]
2287+
; GFX9-GISEL-NEXT: v_mov_b32_e32 v1, s1
2288+
; GFX9-GISEL-NEXT: global_store_byte v0, v1, s[2:3] offset:2
2289+
; GFX9-GISEL-NEXT: s_endpgm
22922290
%ctlz = call i18 @llvm.ctlz.i18(i18 %val, i1 true) nounwind readnone
22932291
store i18 %ctlz, ptr addrspace(1) %out, align 4
22942292
ret void
@@ -2319,10 +2317,9 @@ define i18 @v_ctlz_zero_undef_i18(i18 %val) {
23192317
; GFX9-GISEL-LABEL: v_ctlz_zero_undef_i18:
23202318
; GFX9-GISEL: ; %bb.0:
23212319
; GFX9-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
2322-
; GFX9-GISEL-NEXT: v_and_b32_e32 v0, 0x3ffff, v0
2323-
; GFX9-GISEL-NEXT: v_ffbh_u32_e32 v0, v0
2324-
; GFX9-GISEL-NEXT: v_subrev_u32_e32 v0, 14, v0
2325-
; GFX9-GISEL-NEXT: s_setpc_b64 s[30:31]
2320+
; GFX9-GISEL-NEXT: v_lshrrev_b32_e32 v0, 14, v0
2321+
; GFX9-GISEL-NEXT: v_ffbh_u32_e32 v0, v0
2322+
; GFX9-GISEL-NEXT: s_setpc_b64 s[30:31]
23262323
%ctlz = call i18 @llvm.ctlz.i18(i18 %val, i1 true)
23272324
ret i18 %ctlz
23282325
}
@@ -2358,13 +2355,11 @@ define <2 x i18> @v_ctlz_zero_undef_v2i18(<2 x i18> %val) {
23582355
; GFX9-GISEL-LABEL: v_ctlz_zero_undef_v2i18:
23592356
; GFX9-GISEL: ; %bb.0:
23602357
; GFX9-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
2361-
; GFX9-GISEL-NEXT: v_and_b32_e32 v0, 0x3ffff, v0
2362-
; GFX9-GISEL-NEXT: v_and_b32_e32 v1, 0x3ffff, v1
2363-
; GFX9-GISEL-NEXT: v_ffbh_u32_e32 v0, v0
2364-
; GFX9-GISEL-NEXT: v_ffbh_u32_e32 v1, v1
2365-
; GFX9-GISEL-NEXT: v_subrev_u32_e32 v0, 14, v0
2366-
; GFX9-GISEL-NEXT: v_subrev_u32_e32 v1, 14, v1
2367-
; GFX9-GISEL-NEXT: s_setpc_b64 s[30:31]
2358+
; GFX9-GISEL-NEXT: v_lshrrev_b32_e32 v0, 14, v0
2359+
; GFX9-GISEL-NEXT: v_lshrrev_b32_e32 v1, 14, v1
2360+
; GFX9-GISEL-NEXT: v_ffbh_u32_e32 v0, v0
2361+
; GFX9-GISEL-NEXT: v_ffbh_u32_e32 v1, v1
2362+
; GFX9-GISEL-NEXT: s_setpc_b64 s[30:31]
23682363
%ctlz = call <2 x i18> @llvm.ctlz.v2i18(<2 x i18> %val, i1 true)
23692364
ret <2 x i18> %ctlz
23702365
}
@@ -2373,17 +2368,13 @@ define <2 x i16> @v_ctlz_zero_undef_v2i16(<2 x i16> %val) {
23732368
; SI-LABEL: v_ctlz_zero_undef_v2i16:
23742369
; SI: ; %bb.0:
23752370
; SI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
2376-
; SI-NEXT: v_and_b32_e32 v1, 0xffff, v1
2377-
; SI-NEXT: v_and_b32_e32 v0, 0xffff, v0
2378-
; SI-NEXT: v_ffbh_u32_e32 v1, v1
2379-
; SI-NEXT: v_ffbh_u32_e32 v0, v0
2380-
; SI-NEXT: v_add_i32_e32 v1, vcc, -16, v1
2381-
; SI-NEXT: v_add_i32_e32 v0, vcc, -16, v0
2382-
; SI-NEXT: v_lshlrev_b32_e32 v2, 16, v1
2383-
; SI-NEXT: v_and_b32_e32 v0, 0xffff, v0
2384-
; SI-NEXT: v_or_b32_e32 v0, v0, v2
2385-
; SI-NEXT: v_and_b32_e32 v1, 0xffff, v1
2386-
; SI-NEXT: s_setpc_b64 s[30:31]
2371+
; SI-NEXT: v_lshlrev_b32_e32 v1, 16, v1
2372+
; SI-NEXT: v_lshlrev_b32_e32 v0, 16, v0
2373+
; SI-NEXT: v_ffbh_u32_e32 v1, v1
2374+
; SI-NEXT: v_lshlrev_b32_e32 v2, 16, v1
2375+
; SI-NEXT: v_ffbh_u32_e32 v0, v0
2376+
; SI-NEXT: v_or_b32_e32 v0, v0, v2
2377+
; SI-NEXT: s_setpc_b64 s[30:31]
23872378
;
23882379
; VI-LABEL: v_ctlz_zero_undef_v2i16:
23892380
; VI: ; %bb.0:
@@ -2403,13 +2394,11 @@ define <2 x i16> @v_ctlz_zero_undef_v2i16(<2 x i16> %val) {
24032394
; GFX9-GISEL-LABEL: v_ctlz_zero_undef_v2i16:
24042395
; GFX9-GISEL: ; %bb.0:
24052396
; GFX9-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
2406-
; GFX9-GISEL-NEXT: v_ffbh_u32_sdwa v1, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_0
2407-
; GFX9-GISEL-NEXT: v_subrev_u32_e32 v1, 16, v1
24082397
; GFX9-GISEL-NEXT: v_ffbh_u32_sdwa v0, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1
2409-
; GFX9-GISEL-NEXT: v_subrev_u32_e32 v0, 16, v0
2410-
; GFX9-GISEL-NEXT: v_and_b32_e32 v1, 0xffff, v1
2411-
; GFX9-GISEL-NEXT: v_lshl_or_b32 v0, v0, 16, v1
2412-
; GFX9-GISEL-NEXT: s_setpc_b64 s[30:31]
2398+
; GFX9-GISEL-NEXT: s_flbit_i32_b32 s4, 0
2399+
; GFX9-GISEL-NEXT: v_and_b32_e32 v0, 0xffff, v0
2400+
; GFX9-GISEL-NEXT: v_lshl_or_b32 v0, s4, 16, v0
2401+
; GFX9-GISEL-NEXT: s_setpc_b64 s[30:31]
24132402
%ctlz = call <2 x i16> @llvm.ctlz.v2i16(<2 x i16> %val, i1 true)
24142403
ret <2 x i16> %ctlz
24152404
}
@@ -2418,22 +2407,17 @@ define <3 x i16> @v_ctlz_zero_undef_v3i16(<3 x i16> %val) {
24182407
; SI-LABEL: v_ctlz_zero_undef_v3i16:
24192408
; SI: ; %bb.0:
24202409
; SI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
2421-
; SI-NEXT: v_and_b32_e32 v1, 0xffff, v1
2422-
; SI-NEXT: v_and_b32_e32 v0, 0xffff, v0
2423-
; SI-NEXT: v_and_b32_e32 v2, 0xffff, v2
2424-
; SI-NEXT: v_ffbh_u32_e32 v1, v1
2425-
; SI-NEXT: v_ffbh_u32_e32 v0, v0
2426-
; SI-NEXT: v_ffbh_u32_e32 v2, v2
24272410
; SI-NEXT: v_lshlrev_b32_e32 v1, 16, v1
2428-
; SI-NEXT: v_add_i32_e32 v0, vcc, -16, v0
2429-
; SI-NEXT: v_add_i32_e32 v3, vcc, -16, v2
2430-
; SI-NEXT: v_and_b32_e32 v0, 0xffff, v0
2431-
; SI-NEXT: v_and_b32_e32 v2, 0xffff, v3
2432-
; SI-NEXT: v_or_b32_e32 v0, v1, v0
2433-
; SI-NEXT: v_add_i32_e32 v0, vcc, 0xfff00000, v0
2434-
; SI-NEXT: v_or_b32_e32 v2, 0x100000, v2
2435-
; SI-NEXT: v_alignbit_b32 v1, v3, v0, 16
2436-
; SI-NEXT: s_setpc_b64 s[30:31]
2411+
; SI-NEXT: v_lshlrev_b32_e32 v0, 16, v0
2412+
; SI-NEXT: v_lshlrev_b32_e32 v2, 16, v2
2413+
; SI-NEXT: v_ffbh_u32_e32 v1, v1
2414+
; SI-NEXT: v_ffbh_u32_e32 v0, v0
2415+
; SI-NEXT: v_ffbh_u32_e32 v3, v2
2416+
; SI-NEXT: v_lshlrev_b32_e32 v1, 16, v1
2417+
; SI-NEXT: v_or_b32_e32 v0, v0, v1
2418+
; SI-NEXT: v_or_b32_e32 v2, 0x200000, v3
2419+
; SI-NEXT: v_alignbit_b32 v1, v3, v0, 16
2420+
; SI-NEXT: s_setpc_b64 s[30:31]
24372421
;
24382422
; VI-LABEL: v_ctlz_zero_undef_v3i16:
24392423
; VI: ; %bb.0:
@@ -2455,15 +2439,12 @@ define <3 x i16> @v_ctlz_zero_undef_v3i16(<3 x i16> %val) {
24552439
; GFX9-GISEL-LABEL: v_ctlz_zero_undef_v3i16:
24562440
; GFX9-GISEL: ; %bb.0:
24572441
; GFX9-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
2458-
; GFX9-GISEL-NEXT: v_ffbh_u32_sdwa v2, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_0
2459-
; GFX9-GISEL-NEXT: v_subrev_u32_e32 v2, 16, v2
24602442
; GFX9-GISEL-NEXT: v_ffbh_u32_sdwa v0, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1
2461-
; GFX9-GISEL-NEXT: v_subrev_u32_e32 v0, 16, v0
2462-
; GFX9-GISEL-NEXT: v_ffbh_u32_sdwa v1, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_0
2463-
; GFX9-GISEL-NEXT: v_and_b32_e32 v2, 0xffff, v2
2464-
; GFX9-GISEL-NEXT: v_subrev_u32_e32 v1, 16, v1
2465-
; GFX9-GISEL-NEXT: v_lshl_or_b32 v0, v0, 16, v2
2466-
; GFX9-GISEL-NEXT: s_setpc_b64 s[30:31]
2443+
; GFX9-GISEL-NEXT: s_flbit_i32_b32 s4, 0
2444+
; GFX9-GISEL-NEXT: v_and_b32_e32 v0, 0xffff, v0
2445+
; GFX9-GISEL-NEXT: v_ffbh_u32_sdwa v1, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1
2446+
; GFX9-GISEL-NEXT: v_lshl_or_b32 v0, s4, 16, v0
2447+
; GFX9-GISEL-NEXT: s_setpc_b64 s[30:31]
24672448
%ctlz = call <3 x i16> @llvm.ctlz.v3i16(<3 x i16> %val, i1 true)
24682449
ret <3 x i16> %ctlz
24692450
}
@@ -2472,27 +2453,21 @@ define <4 x i16> @v_ctlz_zero_undef_v4i16(<4 x i16> %val) {
24722453
; SI-LABEL: v_ctlz_zero_undef_v4i16:
24732454
; SI: ; %bb.0:
24742455
; SI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
2475-
; SI-NEXT: v_and_b32_e32 v3, 0xffff, v3
2476-
; SI-NEXT: v_and_b32_e32 v2, 0xffff, v2
2477-
; SI-NEXT: v_and_b32_e32 v1, 0xffff, v1
2478-
; SI-NEXT: v_and_b32_e32 v0, 0xffff, v0
2479-
; SI-NEXT: v_ffbh_u32_e32 v3, v3
2480-
; SI-NEXT: v_ffbh_u32_e32 v2, v2
2481-
; SI-NEXT: v_ffbh_u32_e32 v1, v1
2482-
; SI-NEXT: v_ffbh_u32_e32 v0, v0
24832456
; SI-NEXT: v_lshlrev_b32_e32 v3, 16, v3
2484-
; SI-NEXT: v_add_i32_e32 v2, vcc, -16, v2
2485-
; SI-NEXT: v_lshlrev_b32_e32 v1, 16, v1
2486-
; SI-NEXT: v_add_i32_e32 v0, vcc, -16, v0
2487-
; SI-NEXT: v_and_b32_e32 v2, 0xffff, v2
2488-
; SI-NEXT: v_and_b32_e32 v0, 0xffff, v0
2489-
; SI-NEXT: v_or_b32_e32 v2, v3, v2
2490-
; SI-NEXT: v_or_b32_e32 v0, v1, v0
2491-
; SI-NEXT: v_add_i32_e32 v2, vcc, 0xfff00000, v2
2492-
; SI-NEXT: v_add_i32_e32 v0, vcc, 0xfff00000, v0
2493-
; SI-NEXT: v_alignbit_b32 v1, v2, v0, 16
2494-
; SI-NEXT: v_lshrrev_b32_e32 v3, 16, v2
2495-
; SI-NEXT: s_setpc_b64 s[30:31]
2457+
; SI-NEXT: v_lshlrev_b32_e32 v2, 16, v2
2458+
; SI-NEXT: v_lshlrev_b32_e32 v1, 16, v1
2459+
; SI-NEXT: v_lshlrev_b32_e32 v0, 16, v0
2460+
; SI-NEXT: v_ffbh_u32_e32 v3, v3
2461+
; SI-NEXT: v_ffbh_u32_e32 v2, v2
2462+
; SI-NEXT: v_ffbh_u32_e32 v1, v1
2463+
; SI-NEXT: v_ffbh_u32_e32 v0, v0
2464+
; SI-NEXT: v_lshlrev_b32_e32 v3, 16, v3
2465+
; SI-NEXT: v_lshlrev_b32_e32 v1, 16, v1
2466+
; SI-NEXT: v_or_b32_e32 v2, v2, v3
2467+
; SI-NEXT: v_or_b32_e32 v0, v0, v1
2468+
; SI-NEXT: v_alignbit_b32 v1, v2, v0, 16
2469+
; SI-NEXT: v_lshrrev_b32_e32 v3, 16, v2
2470+
; SI-NEXT: s_setpc_b64 s[30:31]
24962471
;
24972472
; VI-LABEL: v_ctlz_zero_undef_v4i16:
24982473
; VI: ; %bb.0:
@@ -2517,19 +2492,14 @@ define <4 x i16> @v_ctlz_zero_undef_v4i16(<4 x i16> %val) {
25172492
; GFX9-GISEL-LABEL: v_ctlz_zero_undef_v4i16:
25182493
; GFX9-GISEL: ; %bb.0:
25192494
; GFX9-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
2520-
; GFX9-GISEL-NEXT: v_ffbh_u32_sdwa v2, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_0
2521-
; GFX9-GISEL-NEXT: v_subrev_u32_e32 v2, 16, v2
25222495
; GFX9-GISEL-NEXT: v_ffbh_u32_sdwa v0, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1
2523-
; GFX9-GISEL-NEXT: v_ffbh_u32_sdwa v3, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_0
2524-
; GFX9-GISEL-NEXT: v_subrev_u32_e32 v0, 16, v0
2525-
; GFX9-GISEL-NEXT: v_subrev_u32_e32 v3, 16, v3
2526-
; GFX9-GISEL-NEXT: v_ffbh_u32_sdwa v1, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1
2527-
; GFX9-GISEL-NEXT: v_and_b32_e32 v2, 0xffff, v2
2528-
; GFX9-GISEL-NEXT: v_subrev_u32_e32 v1, 16, v1
2529-
; GFX9-GISEL-NEXT: v_lshl_or_b32 v0, v0, 16, v2
2530-
; GFX9-GISEL-NEXT: v_and_b32_e32 v2, 0xffff, v3
2531-
; GFX9-GISEL-NEXT: v_lshl_or_b32 v1, v1, 16, v2
2532-
; GFX9-GISEL-NEXT: s_setpc_b64 s[30:31]
2496+
; GFX9-GISEL-NEXT: v_ffbh_u32_sdwa v1, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1
2497+
; GFX9-GISEL-NEXT: s_flbit_i32_b32 s4, 0
2498+
; GFX9-GISEL-NEXT: v_and_b32_e32 v0, 0xffff, v0
2499+
; GFX9-GISEL-NEXT: v_and_b32_e32 v1, 0xffff, v1
2500+
; GFX9-GISEL-NEXT: v_lshl_or_b32 v0, s4, 16, v0
2501+
; GFX9-GISEL-NEXT: v_lshl_or_b32 v1, s4, 16, v1
2502+
; GFX9-GISEL-NEXT: s_setpc_b64 s[30:31]
25332503
%ctlz = call <4 x i16> @llvm.ctlz.v4i16(<4 x i16> %val, i1 true)
25342504
ret <4 x i16> %ctlz
25352505
}
@@ -2538,28 +2508,25 @@ define <2 x i8> @v_ctlz_zero_undef_v2i8(<2 x i8> %val) {
25382508
; SI-LABEL: v_ctlz_zero_undef_v2i8:
25392509
; SI: ; %bb.0:
25402510
; SI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
2541-
; SI-NEXT: v_and_b32_e32 v1, 0xff, v1
2542-
; SI-NEXT: v_and_b32_e32 v0, 0xff, v0
2511+
; SI-NEXT: v_lshlrev_b32_e32 v1, 24, v1
2512+
; SI-NEXT: v_lshlrev_b32_e32 v0, 24, v0
25432513
; SI-NEXT: v_ffbh_u32_e32 v1, v1
2514+
; SI-NEXT: v_lshlrev_b32_e32 v2, 8, v1
25442515
; SI-NEXT: v_ffbh_u32_e32 v0, v0
2545-
; SI-NEXT: v_lshlrev_b32_e32 v1, 8, v1
2546-
; SI-NEXT: v_subrev_i32_e32 v0, vcc, 24, v0
2547-
; SI-NEXT: v_and_b32_e32 v0, 0xff, v0
2548-
; SI-NEXT: v_or_b32_e32 v0, v1, v0
2549-
; SI-NEXT: v_add_i32_e32 v0, vcc, 0xffffe800, v0
2550-
; SI-NEXT: v_bfe_u32 v1, v0, 8, 8
2516+
; SI-NEXT: v_or_b32_e32 v0, v0, v2
25512517
; SI-NEXT: s_setpc_b64 s[30:31]
25522518
;
25532519
; VI-LABEL: v_ctlz_zero_undef_v2i8:
25542520
; VI: ; %bb.0:
25552521
; VI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
2556-
; VI-NEXT: v_ffbh_u32_sdwa v1, v1 dst_sel:BYTE_1 dst_unused:UNUSED_PAD src0_sel:BYTE_0
2557-
; VI-NEXT: v_ffbh_u32_sdwa v0, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:BYTE_0
2558-
; VI-NEXT: v_add_u16_e32 v1, 0xe800, v1
2559-
; VI-NEXT: v_subrev_u16_e32 v0, 24, v0
2560-
; VI-NEXT: v_or_b32_sdwa v0, v0, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:BYTE_0 src1_sel:DWORD
2561-
; VI-NEXT: v_lshrrev_b16_e32 v1, 8, v1
2562-
; VI-NEXT: s_setpc_b64 s[30:31]
2522+
; VI-NEXT: v_lshlrev_b32_e32 v1, 24, v1
2523+
; VI-NEXT: v_ffbh_u32_e32 v1, v1
2524+
; VI-NEXT: v_lshlrev_b32_e32 v0, 24, v0
2525+
; VI-NEXT: v_lshlrev_b16_e32 v2, 8, v1
2526+
; VI-NEXT: v_ffbh_u32_e32 v0, v0
2527+
; VI-NEXT: v_or_b32_e32 v0, v0, v2
2528+
; VI-NEXT: v_and_b32_e32 v1, 0xff, v1
2529+
; VI-NEXT: s_setpc_b64 s[30:31]
25632530
;
25642531
; EG-LABEL: v_ctlz_zero_undef_v2i8:
25652532
; EG: ; %bb.0:
@@ -2569,11 +2536,9 @@ define <2 x i8> @v_ctlz_zero_undef_v2i8(<2 x i8> %val) {
25692536
; GFX9-GISEL-LABEL: v_ctlz_zero_undef_v2i8:
25702537
; GFX9-GISEL: ; %bb.0:
25712538
; GFX9-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
2572-
; GFX9-GISEL-NEXT: v_ffbh_u32_sdwa v0, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:BYTE_0
2573-
; GFX9-GISEL-NEXT: v_ffbh_u32_sdwa v1, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:BYTE_0
2574-
; GFX9-GISEL-NEXT: v_subrev_u32_e32 v0, 24, v0
2575-
; GFX9-GISEL-NEXT: v_subrev_u32_e32 v1, 24, v1
2576-
; GFX9-GISEL-NEXT: s_setpc_b64 s[30:31]
2539+
; GFX9-GISEL-NEXT: v_ffbh_u32_sdwa v0, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:BYTE_3
2540+
; GFX9-GISEL-NEXT: v_ffbh_u32_sdwa v1, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:BYTE_3
2541+
; GFX9-GISEL-NEXT: s_setpc_b64 s[30:31]
25772542
%ctlz = call <2 x i8> @llvm.ctlz.v2i8(<2 x i8> %val, i1 true)
25782543
ret <2 x i8> %ctlz
25792544
}
@@ -2614,13 +2579,11 @@ define <2 x i7> @v_ctlz_zero_undef_v2i7(<2 x i7> %val) {
26142579
; GFX9-GISEL-LABEL: v_ctlz_zero_undef_v2i7:
26152580
; GFX9-GISEL: ; %bb.0:
26162581
; GFX9-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
2617-
; GFX9-GISEL-NEXT: v_and_b32_e32 v0, 0x7f, v0
2618-
; GFX9-GISEL-NEXT: v_and_b32_e32 v1, 0x7f, v1
2619-
; GFX9-GISEL-NEXT: v_ffbh_u32_e32 v0, v0
2620-
; GFX9-GISEL-NEXT: v_ffbh_u32_e32 v1, v1
2621-
; GFX9-GISEL-NEXT: v_subrev_u32_e32 v0, 25, v0
2622-
; GFX9-GISEL-NEXT: v_subrev_u32_e32 v1, 25, v1
2623-
; GFX9-GISEL-NEXT: s_setpc_b64 s[30:31]
2582+
; GFX9-GISEL-NEXT: v_lshrrev_b32_e32 v0, 25, v0
2583+
; GFX9-GISEL-NEXT: v_lshrrev_b32_e32 v1, 25, v1
2584+
; GFX9-GISEL-NEXT: v_ffbh_u32_e32 v0, v0
2585+
; GFX9-GISEL-NEXT: v_ffbh_u32_e32 v1, v1
2586+
; GFX9-GISEL-NEXT: s_setpc_b64 s[30:31]
26242587
%ctlz = call <2 x i7> @llvm.ctlz.v2i7(<2 x i7> %val, i1 true)
26252588
ret <2 x i7> %ctlz
26262589
}

0 commit comments

Comments
 (0)