From 3f957cc67cff4e337f56fa2dbfdb037d3a997baf Mon Sep 17 00:00:00 2001 From: Tom Stellard Date: Fri, 21 Mar 2025 14:03:40 -0700 Subject: [PATCH 001/154] Bump version to 20.1.2 (#132293) --- cmake/Modules/LLVMVersion.cmake | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/cmake/Modules/LLVMVersion.cmake b/cmake/Modules/LLVMVersion.cmake index 05e772cb6fa38..49cdc04707eb7 100644 --- a/cmake/Modules/LLVMVersion.cmake +++ b/cmake/Modules/LLVMVersion.cmake @@ -7,7 +7,7 @@ if(NOT DEFINED LLVM_VERSION_MINOR) set(LLVM_VERSION_MINOR 1) endif() if(NOT DEFINED LLVM_VERSION_PATCH) - set(LLVM_VERSION_PATCH 1) + set(LLVM_VERSION_PATCH 2) endif() if(NOT DEFINED LLVM_VERSION_SUFFIX) set(LLVM_VERSION_SUFFIX) From 9710e9963455563444c5834f0c9a1e77a21447da Mon Sep 17 00:00:00 2001 From: Evgenii Kudriashov Date: Tue, 25 Mar 2025 13:26:13 +0100 Subject: [PATCH 002/154] [X86][AVX10.2] Include changes for COMX and VGETEXP from rev. 2 (#132824) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Address missing changes: - V[,U]COMXSD need to have XD (F3.0F –> F2.0F) - V[,U]COMXS[S,H] need to have XS (F2.[0F,MAP5] -> F3.[0F,MAP5]) - VGETEXPBF16 needs to have T_MAP6 and NP (66.MAP5 -> NP.MAP6) Ref.: https://cdrdv2.intel.com/v1/dl/getContent/828965 (cherry picked from commit 975c208556ef85b321a223fe592fa6d98fadfaa0) --- llvm/lib/Target/X86/X86InstrAVX10.td | 20 ++-- .../CodeGen/X86/avx10_2_512bf16-intrinsics.ll | 4 +- .../CodeGen/X86/avx10_2bf16-intrinsics.ll | 20 ++-- .../MC/Disassembler/X86/avx10.2-bf16-32.txt | 54 +++++------ .../MC/Disassembler/X86/avx10.2-bf16-64.txt | 54 +++++------ .../MC/Disassembler/X86/avx10.2-com-ef-32.txt | 96 +++++++++---------- .../MC/Disassembler/X86/avx10.2-com-ef-64.txt | 96 +++++++++---------- llvm/test/MC/X86/avx10.2-bf16-32-att.s | 54 +++++------ llvm/test/MC/X86/avx10.2-bf16-32-intel.s | 54 +++++------ llvm/test/MC/X86/avx10.2-bf16-64-att.s | 54 +++++------ llvm/test/MC/X86/avx10.2-bf16-64-intel.s | 54 +++++------ llvm/test/MC/X86/avx10.2-com-ef-32-att.s | 96 +++++++++---------- llvm/test/MC/X86/avx10.2-com-ef-32-intel.s | 96 +++++++++---------- llvm/test/MC/X86/avx10.2-com-ef-64-att.s | 96 +++++++++---------- llvm/test/MC/X86/avx10.2-com-ef-64-intel.s | 96 +++++++++---------- 15 files changed, 472 insertions(+), 472 deletions(-) diff --git a/llvm/lib/Target/X86/X86InstrAVX10.td b/llvm/lib/Target/X86/X86InstrAVX10.td index 9bb3e364f7c62..37d3b0a67cd33 100644 --- a/llvm/lib/Target/X86/X86InstrAVX10.td +++ b/llvm/lib/Target/X86/X86InstrAVX10.td @@ -1468,7 +1468,7 @@ defm VRSQRT : avx10_fp14_bf16<0x4E, "vrsqrt", X86rsqrt14, SchedWriteFRsqrt>, defm VRCP : avx10_fp14_bf16<0x4C, "vrcp", X86rcp14, SchedWriteFRcp>, T_MAP6, PS, EVEX_CD8<16, CD8VF>; defm VGETEXP : avx10_fp14_bf16<0x42, "vgetexp", X86fgetexp, SchedWriteFRnd>, - T_MAP5, EVEX_CD8<16, CD8VF>; + T_MAP6, PS, EVEX_CD8<16, CD8VF>; // VSCALEFBF16 multiclass avx10_fp_scalef_bf16 opc, string OpcodeStr, @@ -1665,31 +1665,31 @@ multiclass avx10_com_ef_int Opc, X86VectorVTInfo _, SDNode OpNode, let Defs = [EFLAGS], Uses = [MXCSR], Predicates = [HasAVX10_2] in { defm VUCOMXSDZ : avx10_com_ef<0x2e, FR64X, f64, X86ucomi512, "vucomxsd", f64mem, loadf64, SSEPackedDouble>, - TB, XS, VEX_LIG, REX_W, EVEX_CD8<64, CD8VT1>; + TB, XD, VEX_LIG, REX_W, EVEX_CD8<64, CD8VT1>; defm VUCOMXSHZ : avx10_com_ef<0x2e, FR16X, f16, X86ucomi512, "vucomxsh", f16mem, loadf16, SSEPackedSingle>, - T_MAP5, XD, EVEX_CD8<16, CD8VT1>; + T_MAP5, XS, EVEX_CD8<16, CD8VT1>; defm VUCOMXSSZ : avx10_com_ef<0x2e, FR32X, f32, X86ucomi512, "vucomxss", f32mem, loadf32, SSEPackedSingle>, - TB, XD, VEX_LIG, EVEX_CD8<32, CD8VT1>; + TB, XS, VEX_LIG, EVEX_CD8<32, CD8VT1>; defm VCOMXSDZ : avx10_com_ef_int<0x2f, v2f64x_info, X86comi512, "vcomxsd", SSEPackedDouble>, - TB, XS, VEX_LIG, REX_W, EVEX_CD8<64, CD8VT1>; + TB, XD, VEX_LIG, REX_W, EVEX_CD8<64, CD8VT1>; defm VCOMXSHZ : avx10_com_ef_int<0x2f, v8f16x_info, X86comi512, "vcomxsh", SSEPackedSingle>, - T_MAP5, XD, EVEX_CD8<16, CD8VT1>; + T_MAP5, XS, EVEX_CD8<16, CD8VT1>; defm VCOMXSSZ : avx10_com_ef_int<0x2f, v4f32x_info, X86comi512, "vcomxss", SSEPackedSingle>, - TB, XD, VEX_LIG, EVEX_CD8<32, CD8VT1>; + TB, XS, VEX_LIG, EVEX_CD8<32, CD8VT1>; defm VUCOMXSDZ : avx10_com_ef_int<0x2e, v2f64x_info, X86ucomi512, "vucomxsd", SSEPackedDouble>, - TB, XS, VEX_LIG, REX_W, EVEX_CD8<64, CD8VT1>; + TB, XD, VEX_LIG, REX_W, EVEX_CD8<64, CD8VT1>; defm VUCOMXSHZ : avx10_com_ef_int<0x2e, v8f16x_info, X86ucomi512, "vucomxsh", SSEPackedSingle>, - T_MAP5, XD, EVEX_CD8<16, CD8VT1>; + T_MAP5, XS, EVEX_CD8<16, CD8VT1>; defm VUCOMXSSZ : avx10_com_ef_int<0x2e, v4f32x_info, X86ucomi512, "vucomxss", SSEPackedSingle>, - TB, XD, VEX_LIG, EVEX_CD8<32, CD8VT1>; + TB, XS, VEX_LIG, EVEX_CD8<32, CD8VT1>; } //------------------------------------------------- diff --git a/llvm/test/CodeGen/X86/avx10_2_512bf16-intrinsics.ll b/llvm/test/CodeGen/X86/avx10_2_512bf16-intrinsics.ll index da17b995afedf..cbac76e9de273 100644 --- a/llvm/test/CodeGen/X86/avx10_2_512bf16-intrinsics.ll +++ b/llvm/test/CodeGen/X86/avx10_2_512bf16-intrinsics.ll @@ -164,7 +164,7 @@ define <32 x bfloat>@test_int_x86_avx512_mask_getexp_bf16_512(<32 x bfloat> %x0, ; X64-LABEL: test_int_x86_avx512_mask_getexp_bf16_512: ; X64: # %bb.0: ; X64-NEXT: kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf] -; X64-NEXT: vgetexpbf16 %zmm0, %zmm0 # encoding: [0x62,0xf5,0x7d,0x48,0x42,0xc0] +; X64-NEXT: vgetexpbf16 %zmm0, %zmm0 # encoding: [0x62,0xf6,0x7c,0x48,0x42,0xc0] ; X64-NEXT: vmovdqu16 %zmm0, %zmm1 {%k1} # encoding: [0x62,0xf1,0xff,0x49,0x6f,0xc8] ; X64-NEXT: vaddbf16 %zmm0, %zmm1, %zmm0 # encoding: [0x62,0xf5,0x75,0x48,0x58,0xc0] ; X64-NEXT: retq # encoding: [0xc3] @@ -172,7 +172,7 @@ define <32 x bfloat>@test_int_x86_avx512_mask_getexp_bf16_512(<32 x bfloat> %x0, ; X86-LABEL: test_int_x86_avx512_mask_getexp_bf16_512: ; X86: # %bb.0: ; X86-NEXT: kmovd {{[0-9]+}}(%esp), %k1 # encoding: [0xc4,0xe1,0xf9,0x90,0x4c,0x24,0x04] -; X86-NEXT: vgetexpbf16 %zmm0, %zmm0 # encoding: [0x62,0xf5,0x7d,0x48,0x42,0xc0] +; X86-NEXT: vgetexpbf16 %zmm0, %zmm0 # encoding: [0x62,0xf6,0x7c,0x48,0x42,0xc0] ; X86-NEXT: vmovdqu16 %zmm0, %zmm1 {%k1} # encoding: [0x62,0xf1,0xff,0x49,0x6f,0xc8] ; X86-NEXT: vaddbf16 %zmm0, %zmm1, %zmm0 # encoding: [0x62,0xf5,0x75,0x48,0x58,0xc0] ; X86-NEXT: retl # encoding: [0xc3] diff --git a/llvm/test/CodeGen/X86/avx10_2bf16-intrinsics.ll b/llvm/test/CodeGen/X86/avx10_2bf16-intrinsics.ll index 06875dbe7cd23..ba32b2adc7999 100644 --- a/llvm/test/CodeGen/X86/avx10_2bf16-intrinsics.ll +++ b/llvm/test/CodeGen/X86/avx10_2bf16-intrinsics.ll @@ -333,7 +333,7 @@ declare <16 x bfloat> @llvm.x86.avx10.mask.getexp.bf16.256(<16 x bfloat>, <16 x define <8 x bfloat>@test_int_x86_avx512_getexp_bf16_128(<8 x bfloat> %x0) { ; CHECK-LABEL: test_int_x86_avx512_getexp_bf16_128: ; CHECK: # %bb.0: -; CHECK-NEXT: vgetexpbf16 %xmm0, %xmm0 # encoding: [0x62,0xf5,0x7d,0x08,0x42,0xc0] +; CHECK-NEXT: vgetexpbf16 %xmm0, %xmm0 # encoding: [0x62,0xf6,0x7c,0x08,0x42,0xc0] ; CHECK-NEXT: ret{{[l|q]}} # encoding: [0xc3] %res = call <8 x bfloat> @llvm.x86.avx10.mask.getexp.bf16.128(<8 x bfloat> %x0, <8 x bfloat> zeroinitializer, i8 -1) ret <8 x bfloat> %res @@ -343,14 +343,14 @@ define <8 x bfloat>@test_int_x86_avx512_mask_getexp_bf16_128(<8 x bfloat> %x0, < ; X64-LABEL: test_int_x86_avx512_mask_getexp_bf16_128: ; X64: # %bb.0: ; X64-NEXT: kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf] -; X64-NEXT: vgetexpbf16 %xmm0, %xmm1 {%k1} # encoding: [0x62,0xf5,0x7d,0x09,0x42,0xc8] +; X64-NEXT: vgetexpbf16 %xmm0, %xmm1 {%k1} # encoding: [0x62,0xf6,0x7c,0x09,0x42,0xc8] ; X64-NEXT: vmovaps %xmm1, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf8,0x28,0xc1] ; X64-NEXT: retq # encoding: [0xc3] ; ; X86-LABEL: test_int_x86_avx512_mask_getexp_bf16_128: ; X86: # %bb.0: ; X86-NEXT: kmovb {{[0-9]+}}(%esp), %k1 # encoding: [0xc5,0xf9,0x90,0x4c,0x24,0x04] -; X86-NEXT: vgetexpbf16 %xmm0, %xmm1 {%k1} # encoding: [0x62,0xf5,0x7d,0x09,0x42,0xc8] +; X86-NEXT: vgetexpbf16 %xmm0, %xmm1 {%k1} # encoding: [0x62,0xf6,0x7c,0x09,0x42,0xc8] ; X86-NEXT: vmovaps %xmm1, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf8,0x28,0xc1] ; X86-NEXT: retl # encoding: [0xc3] %res = call <8 x bfloat> @llvm.x86.avx10.mask.getexp.bf16.128(<8 x bfloat> %x0, <8 x bfloat> %x1, i8 %x2) @@ -361,13 +361,13 @@ define <8 x bfloat>@test_int_x86_avx512_maskz_getexp_bf16_128(<8 x bfloat> %x0, ; X64-LABEL: test_int_x86_avx512_maskz_getexp_bf16_128: ; X64: # %bb.0: ; X64-NEXT: kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf] -; X64-NEXT: vgetexpbf16 %xmm0, %xmm0 {%k1} {z} # encoding: [0x62,0xf5,0x7d,0x89,0x42,0xc0] +; X64-NEXT: vgetexpbf16 %xmm0, %xmm0 {%k1} {z} # encoding: [0x62,0xf6,0x7c,0x89,0x42,0xc0] ; X64-NEXT: retq # encoding: [0xc3] ; ; X86-LABEL: test_int_x86_avx512_maskz_getexp_bf16_128: ; X86: # %bb.0: ; X86-NEXT: kmovb {{[0-9]+}}(%esp), %k1 # encoding: [0xc5,0xf9,0x90,0x4c,0x24,0x04] -; X86-NEXT: vgetexpbf16 %xmm0, %xmm0 {%k1} {z} # encoding: [0x62,0xf5,0x7d,0x89,0x42,0xc0] +; X86-NEXT: vgetexpbf16 %xmm0, %xmm0 {%k1} {z} # encoding: [0x62,0xf6,0x7c,0x89,0x42,0xc0] ; X86-NEXT: retl # encoding: [0xc3] %res = call <8 x bfloat> @llvm.x86.avx10.mask.getexp.bf16.128(<8 x bfloat> %x0, <8 x bfloat> zeroinitializer, i8 %x2) ret <8 x bfloat> %res @@ -376,7 +376,7 @@ define <8 x bfloat>@test_int_x86_avx512_maskz_getexp_bf16_128(<8 x bfloat> %x0, define <16 x bfloat>@test_int_x86_avx512_getexp_bf16_256(<16 x bfloat> %x0) { ; CHECK-LABEL: test_int_x86_avx512_getexp_bf16_256: ; CHECK: # %bb.0: -; CHECK-NEXT: vgetexpbf16 %ymm0, %ymm0 # encoding: [0x62,0xf5,0x7d,0x28,0x42,0xc0] +; CHECK-NEXT: vgetexpbf16 %ymm0, %ymm0 # encoding: [0x62,0xf6,0x7c,0x28,0x42,0xc0] ; CHECK-NEXT: ret{{[l|q]}} # encoding: [0xc3] %res = call <16 x bfloat> @llvm.x86.avx10.mask.getexp.bf16.256(<16 x bfloat> %x0, <16 x bfloat> zeroinitializer, i16 -1) ret <16 x bfloat> %res @@ -386,14 +386,14 @@ define <16 x bfloat>@test_int_x86_avx512_mask_getexp_bf16_256(<16 x bfloat> %x0, ; X64-LABEL: test_int_x86_avx512_mask_getexp_bf16_256: ; X64: # %bb.0: ; X64-NEXT: kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf] -; X64-NEXT: vgetexpbf16 %ymm0, %ymm1 {%k1} # encoding: [0x62,0xf5,0x7d,0x29,0x42,0xc8] +; X64-NEXT: vgetexpbf16 %ymm0, %ymm1 {%k1} # encoding: [0x62,0xf6,0x7c,0x29,0x42,0xc8] ; X64-NEXT: vmovaps %ymm1, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfc,0x28,0xc1] ; X64-NEXT: retq # encoding: [0xc3] ; ; X86-LABEL: test_int_x86_avx512_mask_getexp_bf16_256: ; X86: # %bb.0: ; X86-NEXT: kmovw {{[0-9]+}}(%esp), %k1 # encoding: [0xc5,0xf8,0x90,0x4c,0x24,0x04] -; X86-NEXT: vgetexpbf16 %ymm0, %ymm1 {%k1} # encoding: [0x62,0xf5,0x7d,0x29,0x42,0xc8] +; X86-NEXT: vgetexpbf16 %ymm0, %ymm1 {%k1} # encoding: [0x62,0xf6,0x7c,0x29,0x42,0xc8] ; X86-NEXT: vmovaps %ymm1, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfc,0x28,0xc1] ; X86-NEXT: retl # encoding: [0xc3] %res = call <16 x bfloat> @llvm.x86.avx10.mask.getexp.bf16.256(<16 x bfloat> %x0, <16 x bfloat> %x1, i16 %x2) @@ -404,13 +404,13 @@ define <16 x bfloat>@test_int_x86_avx512_maskz_getexp_bf16_256(<16 x bfloat> %x0 ; X64-LABEL: test_int_x86_avx512_maskz_getexp_bf16_256: ; X64: # %bb.0: ; X64-NEXT: kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf] -; X64-NEXT: vgetexpbf16 %ymm0, %ymm0 {%k1} {z} # encoding: [0x62,0xf5,0x7d,0xa9,0x42,0xc0] +; X64-NEXT: vgetexpbf16 %ymm0, %ymm0 {%k1} {z} # encoding: [0x62,0xf6,0x7c,0xa9,0x42,0xc0] ; X64-NEXT: retq # encoding: [0xc3] ; ; X86-LABEL: test_int_x86_avx512_maskz_getexp_bf16_256: ; X86: # %bb.0: ; X86-NEXT: kmovw {{[0-9]+}}(%esp), %k1 # encoding: [0xc5,0xf8,0x90,0x4c,0x24,0x04] -; X86-NEXT: vgetexpbf16 %ymm0, %ymm0 {%k1} {z} # encoding: [0x62,0xf5,0x7d,0xa9,0x42,0xc0] +; X86-NEXT: vgetexpbf16 %ymm0, %ymm0 {%k1} {z} # encoding: [0x62,0xf6,0x7c,0xa9,0x42,0xc0] ; X86-NEXT: retl # encoding: [0xc3] %res = call <16 x bfloat> @llvm.x86.avx10.mask.getexp.bf16.256(<16 x bfloat> %x0, <16 x bfloat> zeroinitializer, i16 %x2) ret <16 x bfloat> %res diff --git a/llvm/test/MC/Disassembler/X86/avx10.2-bf16-32.txt b/llvm/test/MC/Disassembler/X86/avx10.2-bf16-32.txt index a32e55e20e6b7..0db70d290e565 100644 --- a/llvm/test/MC/Disassembler/X86/avx10.2-bf16-32.txt +++ b/llvm/test/MC/Disassembler/X86/avx10.2-bf16-32.txt @@ -1719,111 +1719,111 @@ # ATT: vgetexpbf16 %xmm3, %xmm2 # INTEL: vgetexpbf16 xmm2, xmm3 -0x62,0xf5,0x7d,0x08,0x42,0xd3 +0x62,0xf6,0x7c,0x08,0x42,0xd3 # ATT: vgetexpbf16 %xmm3, %xmm2 {%k7} # INTEL: vgetexpbf16 xmm2 {k7}, xmm3 -0x62,0xf5,0x7d,0x0f,0x42,0xd3 +0x62,0xf6,0x7c,0x0f,0x42,0xd3 # ATT: vgetexpbf16 %xmm3, %xmm2 {%k7} {z} # INTEL: vgetexpbf16 xmm2 {k7} {z}, xmm3 -0x62,0xf5,0x7d,0x8f,0x42,0xd3 +0x62,0xf6,0x7c,0x8f,0x42,0xd3 # ATT: vgetexpbf16 %zmm3, %zmm2 # INTEL: vgetexpbf16 zmm2, zmm3 -0x62,0xf5,0x7d,0x48,0x42,0xd3 +0x62,0xf6,0x7c,0x48,0x42,0xd3 # ATT: vgetexpbf16 %zmm3, %zmm2 {%k7} # INTEL: vgetexpbf16 zmm2 {k7}, zmm3 -0x62,0xf5,0x7d,0x4f,0x42,0xd3 +0x62,0xf6,0x7c,0x4f,0x42,0xd3 # ATT: vgetexpbf16 %zmm3, %zmm2 {%k7} {z} # INTEL: vgetexpbf16 zmm2 {k7} {z}, zmm3 -0x62,0xf5,0x7d,0xcf,0x42,0xd3 +0x62,0xf6,0x7c,0xcf,0x42,0xd3 # ATT: vgetexpbf16 %ymm3, %ymm2 # INTEL: vgetexpbf16 ymm2, ymm3 -0x62,0xf5,0x7d,0x28,0x42,0xd3 +0x62,0xf6,0x7c,0x28,0x42,0xd3 # ATT: vgetexpbf16 %ymm3, %ymm2 {%k7} # INTEL: vgetexpbf16 ymm2 {k7}, ymm3 -0x62,0xf5,0x7d,0x2f,0x42,0xd3 +0x62,0xf6,0x7c,0x2f,0x42,0xd3 # ATT: vgetexpbf16 %ymm3, %ymm2 {%k7} {z} # INTEL: vgetexpbf16 ymm2 {k7} {z}, ymm3 -0x62,0xf5,0x7d,0xaf,0x42,0xd3 +0x62,0xf6,0x7c,0xaf,0x42,0xd3 # ATT: vgetexpbf16 268435456(%esp,%esi,8), %xmm2 # INTEL: vgetexpbf16 xmm2, xmmword ptr [esp + 8*esi + 268435456] -0x62,0xf5,0x7d,0x08,0x42,0x94,0xf4,0x00,0x00,0x00,0x10 +0x62,0xf6,0x7c,0x08,0x42,0x94,0xf4,0x00,0x00,0x00,0x10 # ATT: vgetexpbf16 291(%edi,%eax,4), %xmm2 {%k7} # INTEL: vgetexpbf16 xmm2 {k7}, xmmword ptr [edi + 4*eax + 291] -0x62,0xf5,0x7d,0x0f,0x42,0x94,0x87,0x23,0x01,0x00,0x00 +0x62,0xf6,0x7c,0x0f,0x42,0x94,0x87,0x23,0x01,0x00,0x00 # ATT: vgetexpbf16 (%eax){1to8}, %xmm2 # INTEL: vgetexpbf16 xmm2, word ptr [eax]{1to8} -0x62,0xf5,0x7d,0x18,0x42,0x10 +0x62,0xf6,0x7c,0x18,0x42,0x10 # ATT: vgetexpbf16 -512(,%ebp,2), %xmm2 # INTEL: vgetexpbf16 xmm2, xmmword ptr [2*ebp - 512] -0x62,0xf5,0x7d,0x08,0x42,0x14,0x6d,0x00,0xfe,0xff,0xff +0x62,0xf6,0x7c,0x08,0x42,0x14,0x6d,0x00,0xfe,0xff,0xff # ATT: vgetexpbf16 2032(%ecx), %xmm2 {%k7} {z} # INTEL: vgetexpbf16 xmm2 {k7} {z}, xmmword ptr [ecx + 2032] -0x62,0xf5,0x7d,0x8f,0x42,0x51,0x7f +0x62,0xf6,0x7c,0x8f,0x42,0x51,0x7f # ATT: vgetexpbf16 -256(%edx){1to8}, %xmm2 {%k7} {z} # INTEL: vgetexpbf16 xmm2 {k7} {z}, word ptr [edx - 256]{1to8} -0x62,0xf5,0x7d,0x9f,0x42,0x52,0x80 +0x62,0xf6,0x7c,0x9f,0x42,0x52,0x80 # ATT: vgetexpbf16 268435456(%esp,%esi,8), %ymm2 # INTEL: vgetexpbf16 ymm2, ymmword ptr [esp + 8*esi + 268435456] -0x62,0xf5,0x7d,0x28,0x42,0x94,0xf4,0x00,0x00,0x00,0x10 +0x62,0xf6,0x7c,0x28,0x42,0x94,0xf4,0x00,0x00,0x00,0x10 # ATT: vgetexpbf16 291(%edi,%eax,4), %ymm2 {%k7} # INTEL: vgetexpbf16 ymm2 {k7}, ymmword ptr [edi + 4*eax + 291] -0x62,0xf5,0x7d,0x2f,0x42,0x94,0x87,0x23,0x01,0x00,0x00 +0x62,0xf6,0x7c,0x2f,0x42,0x94,0x87,0x23,0x01,0x00,0x00 # ATT: vgetexpbf16 (%eax){1to16}, %ymm2 # INTEL: vgetexpbf16 ymm2, word ptr [eax]{1to16} -0x62,0xf5,0x7d,0x38,0x42,0x10 +0x62,0xf6,0x7c,0x38,0x42,0x10 # ATT: vgetexpbf16 -1024(,%ebp,2), %ymm2 # INTEL: vgetexpbf16 ymm2, ymmword ptr [2*ebp - 1024] -0x62,0xf5,0x7d,0x28,0x42,0x14,0x6d,0x00,0xfc,0xff,0xff +0x62,0xf6,0x7c,0x28,0x42,0x14,0x6d,0x00,0xfc,0xff,0xff # ATT: vgetexpbf16 4064(%ecx), %ymm2 {%k7} {z} # INTEL: vgetexpbf16 ymm2 {k7} {z}, ymmword ptr [ecx + 4064] -0x62,0xf5,0x7d,0xaf,0x42,0x51,0x7f +0x62,0xf6,0x7c,0xaf,0x42,0x51,0x7f # ATT: vgetexpbf16 -256(%edx){1to16}, %ymm2 {%k7} {z} # INTEL: vgetexpbf16 ymm2 {k7} {z}, word ptr [edx - 256]{1to16} -0x62,0xf5,0x7d,0xbf,0x42,0x52,0x80 +0x62,0xf6,0x7c,0xbf,0x42,0x52,0x80 # ATT: vgetexpbf16 268435456(%esp,%esi,8), %zmm2 # INTEL: vgetexpbf16 zmm2, zmmword ptr [esp + 8*esi + 268435456] -0x62,0xf5,0x7d,0x48,0x42,0x94,0xf4,0x00,0x00,0x00,0x10 +0x62,0xf6,0x7c,0x48,0x42,0x94,0xf4,0x00,0x00,0x00,0x10 # ATT: vgetexpbf16 291(%edi,%eax,4), %zmm2 {%k7} # INTEL: vgetexpbf16 zmm2 {k7}, zmmword ptr [edi + 4*eax + 291] -0x62,0xf5,0x7d,0x4f,0x42,0x94,0x87,0x23,0x01,0x00,0x00 +0x62,0xf6,0x7c,0x4f,0x42,0x94,0x87,0x23,0x01,0x00,0x00 # ATT: vgetexpbf16 (%eax){1to32}, %zmm2 # INTEL: vgetexpbf16 zmm2, word ptr [eax]{1to32} -0x62,0xf5,0x7d,0x58,0x42,0x10 +0x62,0xf6,0x7c,0x58,0x42,0x10 # ATT: vgetexpbf16 -2048(,%ebp,2), %zmm2 # INTEL: vgetexpbf16 zmm2, zmmword ptr [2*ebp - 2048] -0x62,0xf5,0x7d,0x48,0x42,0x14,0x6d,0x00,0xf8,0xff,0xff +0x62,0xf6,0x7c,0x48,0x42,0x14,0x6d,0x00,0xf8,0xff,0xff # ATT: vgetexpbf16 8128(%ecx), %zmm2 {%k7} {z} # INTEL: vgetexpbf16 zmm2 {k7} {z}, zmmword ptr [ecx + 8128] -0x62,0xf5,0x7d,0xcf,0x42,0x51,0x7f +0x62,0xf6,0x7c,0xcf,0x42,0x51,0x7f # ATT: vgetexpbf16 -256(%edx){1to32}, %zmm2 {%k7} {z} # INTEL: vgetexpbf16 zmm2 {k7} {z}, word ptr [edx - 256]{1to32} -0x62,0xf5,0x7d,0xdf,0x42,0x52,0x80 +0x62,0xf6,0x7c,0xdf,0x42,0x52,0x80 # ATT: vgetmantbf16 $123, %zmm3, %zmm2 # INTEL: vgetmantbf16 zmm2, zmm3, 123 diff --git a/llvm/test/MC/Disassembler/X86/avx10.2-bf16-64.txt b/llvm/test/MC/Disassembler/X86/avx10.2-bf16-64.txt index 1319c5cbd0362..197415e5ba329 100644 --- a/llvm/test/MC/Disassembler/X86/avx10.2-bf16-64.txt +++ b/llvm/test/MC/Disassembler/X86/avx10.2-bf16-64.txt @@ -1719,111 +1719,111 @@ # ATT: vgetexpbf16 %xmm23, %xmm22 # INTEL: vgetexpbf16 xmm22, xmm23 -0x62,0xa5,0x7d,0x08,0x42,0xf7 +0x62,0xa6,0x7c,0x08,0x42,0xf7 # ATT: vgetexpbf16 %xmm23, %xmm22 {%k7} # INTEL: vgetexpbf16 xmm22 {k7}, xmm23 -0x62,0xa5,0x7d,0x0f,0x42,0xf7 +0x62,0xa6,0x7c,0x0f,0x42,0xf7 # ATT: vgetexpbf16 %xmm23, %xmm22 {%k7} {z} # INTEL: vgetexpbf16 xmm22 {k7} {z}, xmm23 -0x62,0xa5,0x7d,0x8f,0x42,0xf7 +0x62,0xa6,0x7c,0x8f,0x42,0xf7 # ATT: vgetexpbf16 %zmm23, %zmm22 # INTEL: vgetexpbf16 zmm22, zmm23 -0x62,0xa5,0x7d,0x48,0x42,0xf7 +0x62,0xa6,0x7c,0x48,0x42,0xf7 # ATT: vgetexpbf16 %zmm23, %zmm22 {%k7} # INTEL: vgetexpbf16 zmm22 {k7}, zmm23 -0x62,0xa5,0x7d,0x4f,0x42,0xf7 +0x62,0xa6,0x7c,0x4f,0x42,0xf7 # ATT: vgetexpbf16 %zmm23, %zmm22 {%k7} {z} # INTEL: vgetexpbf16 zmm22 {k7} {z}, zmm23 -0x62,0xa5,0x7d,0xcf,0x42,0xf7 +0x62,0xa6,0x7c,0xcf,0x42,0xf7 # ATT: vgetexpbf16 %ymm23, %ymm22 # INTEL: vgetexpbf16 ymm22, ymm23 -0x62,0xa5,0x7d,0x28,0x42,0xf7 +0x62,0xa6,0x7c,0x28,0x42,0xf7 # ATT: vgetexpbf16 %ymm23, %ymm22 {%k7} # INTEL: vgetexpbf16 ymm22 {k7}, ymm23 -0x62,0xa5,0x7d,0x2f,0x42,0xf7 +0x62,0xa6,0x7c,0x2f,0x42,0xf7 # ATT: vgetexpbf16 %ymm23, %ymm22 {%k7} {z} # INTEL: vgetexpbf16 ymm22 {k7} {z}, ymm23 -0x62,0xa5,0x7d,0xaf,0x42,0xf7 +0x62,0xa6,0x7c,0xaf,0x42,0xf7 # ATT: vgetexpbf16 268435456(%rbp,%r14,8), %xmm22 # INTEL: vgetexpbf16 xmm22, xmmword ptr [rbp + 8*r14 + 268435456] -0x62,0xa5,0x7d,0x08,0x42,0xb4,0xf5,0x00,0x00,0x00,0x10 +0x62,0xa6,0x7c,0x08,0x42,0xb4,0xf5,0x00,0x00,0x00,0x10 # ATT: vgetexpbf16 291(%r8,%rax,4), %xmm22 {%k7} # INTEL: vgetexpbf16 xmm22 {k7}, xmmword ptr [r8 + 4*rax + 291] -0x62,0xc5,0x7d,0x0f,0x42,0xb4,0x80,0x23,0x01,0x00,0x00 +0x62,0xc6,0x7c,0x0f,0x42,0xb4,0x80,0x23,0x01,0x00,0x00 # ATT: vgetexpbf16 (%rip){1to8}, %xmm22 # INTEL: vgetexpbf16 xmm22, word ptr [rip]{1to8} -0x62,0xe5,0x7d,0x18,0x42,0x35,0x00,0x00,0x00,0x00 +0x62,0xe6,0x7c,0x18,0x42,0x35,0x00,0x00,0x00,0x00 # ATT: vgetexpbf16 -512(,%rbp,2), %xmm22 # INTEL: vgetexpbf16 xmm22, xmmword ptr [2*rbp - 512] -0x62,0xe5,0x7d,0x08,0x42,0x34,0x6d,0x00,0xfe,0xff,0xff +0x62,0xe6,0x7c,0x08,0x42,0x34,0x6d,0x00,0xfe,0xff,0xff # ATT: vgetexpbf16 2032(%rcx), %xmm22 {%k7} {z} # INTEL: vgetexpbf16 xmm22 {k7} {z}, xmmword ptr [rcx + 2032] -0x62,0xe5,0x7d,0x8f,0x42,0x71,0x7f +0x62,0xe6,0x7c,0x8f,0x42,0x71,0x7f # ATT: vgetexpbf16 -256(%rdx){1to8}, %xmm22 {%k7} {z} # INTEL: vgetexpbf16 xmm22 {k7} {z}, word ptr [rdx - 256]{1to8} -0x62,0xe5,0x7d,0x9f,0x42,0x72,0x80 +0x62,0xe6,0x7c,0x9f,0x42,0x72,0x80 # ATT: vgetexpbf16 268435456(%rbp,%r14,8), %ymm22 # INTEL: vgetexpbf16 ymm22, ymmword ptr [rbp + 8*r14 + 268435456] -0x62,0xa5,0x7d,0x28,0x42,0xb4,0xf5,0x00,0x00,0x00,0x10 +0x62,0xa6,0x7c,0x28,0x42,0xb4,0xf5,0x00,0x00,0x00,0x10 # ATT: vgetexpbf16 291(%r8,%rax,4), %ymm22 {%k7} # INTEL: vgetexpbf16 ymm22 {k7}, ymmword ptr [r8 + 4*rax + 291] -0x62,0xc5,0x7d,0x2f,0x42,0xb4,0x80,0x23,0x01,0x00,0x00 +0x62,0xc6,0x7c,0x2f,0x42,0xb4,0x80,0x23,0x01,0x00,0x00 # ATT: vgetexpbf16 (%rip){1to16}, %ymm22 # INTEL: vgetexpbf16 ymm22, word ptr [rip]{1to16} -0x62,0xe5,0x7d,0x38,0x42,0x35,0x00,0x00,0x00,0x00 +0x62,0xe6,0x7c,0x38,0x42,0x35,0x00,0x00,0x00,0x00 # ATT: vgetexpbf16 -1024(,%rbp,2), %ymm22 # INTEL: vgetexpbf16 ymm22, ymmword ptr [2*rbp - 1024] -0x62,0xe5,0x7d,0x28,0x42,0x34,0x6d,0x00,0xfc,0xff,0xff +0x62,0xe6,0x7c,0x28,0x42,0x34,0x6d,0x00,0xfc,0xff,0xff # ATT: vgetexpbf16 4064(%rcx), %ymm22 {%k7} {z} # INTEL: vgetexpbf16 ymm22 {k7} {z}, ymmword ptr [rcx + 4064] -0x62,0xe5,0x7d,0xaf,0x42,0x71,0x7f +0x62,0xe6,0x7c,0xaf,0x42,0x71,0x7f # ATT: vgetexpbf16 -256(%rdx){1to16}, %ymm22 {%k7} {z} # INTEL: vgetexpbf16 ymm22 {k7} {z}, word ptr [rdx - 256]{1to16} -0x62,0xe5,0x7d,0xbf,0x42,0x72,0x80 +0x62,0xe6,0x7c,0xbf,0x42,0x72,0x80 # ATT: vgetexpbf16 268435456(%rbp,%r14,8), %zmm22 # INTEL: vgetexpbf16 zmm22, zmmword ptr [rbp + 8*r14 + 268435456] -0x62,0xa5,0x7d,0x48,0x42,0xb4,0xf5,0x00,0x00,0x00,0x10 +0x62,0xa6,0x7c,0x48,0x42,0xb4,0xf5,0x00,0x00,0x00,0x10 # ATT: vgetexpbf16 291(%r8,%rax,4), %zmm22 {%k7} # INTEL: vgetexpbf16 zmm22 {k7}, zmmword ptr [r8 + 4*rax + 291] -0x62,0xc5,0x7d,0x4f,0x42,0xb4,0x80,0x23,0x01,0x00,0x00 +0x62,0xc6,0x7c,0x4f,0x42,0xb4,0x80,0x23,0x01,0x00,0x00 # ATT: vgetexpbf16 (%rip){1to32}, %zmm22 # INTEL: vgetexpbf16 zmm22, word ptr [rip]{1to32} -0x62,0xe5,0x7d,0x58,0x42,0x35,0x00,0x00,0x00,0x00 +0x62,0xe6,0x7c,0x58,0x42,0x35,0x00,0x00,0x00,0x00 # ATT: vgetexpbf16 -2048(,%rbp,2), %zmm22 # INTEL: vgetexpbf16 zmm22, zmmword ptr [2*rbp - 2048] -0x62,0xe5,0x7d,0x48,0x42,0x34,0x6d,0x00,0xf8,0xff,0xff +0x62,0xe6,0x7c,0x48,0x42,0x34,0x6d,0x00,0xf8,0xff,0xff # ATT: vgetexpbf16 8128(%rcx), %zmm22 {%k7} {z} # INTEL: vgetexpbf16 zmm22 {k7} {z}, zmmword ptr [rcx + 8128] -0x62,0xe5,0x7d,0xcf,0x42,0x71,0x7f +0x62,0xe6,0x7c,0xcf,0x42,0x71,0x7f # ATT: vgetexpbf16 -256(%rdx){1to32}, %zmm22 {%k7} {z} # INTEL: vgetexpbf16 zmm22 {k7} {z}, word ptr [rdx - 256]{1to32} -0x62,0xe5,0x7d,0xdf,0x42,0x72,0x80 +0x62,0xe6,0x7c,0xdf,0x42,0x72,0x80 # ATT: vgetmantbf16 $123, %zmm23, %zmm22 # INTEL: vgetmantbf16 zmm22, zmm23, 123 diff --git a/llvm/test/MC/Disassembler/X86/avx10.2-com-ef-32.txt b/llvm/test/MC/Disassembler/X86/avx10.2-com-ef-32.txt index e7adacbbf88c8..ecdc75979e8df 100644 --- a/llvm/test/MC/Disassembler/X86/avx10.2-com-ef-32.txt +++ b/llvm/test/MC/Disassembler/X86/avx10.2-com-ef-32.txt @@ -3,193 +3,193 @@ # ATT: vcomxsd %xmm3, %xmm2 # INTEL: vcomxsd xmm2, xmm3 -0x62,0xf1,0xfe,0x08,0x2f,0xd3 +0x62,0xf1,0xff,0x08,0x2f,0xd3 # ATT: vcomxsd {sae}, %xmm3, %xmm2 # INTEL: vcomxsd xmm2, xmm3, {sae} -0x62,0xf1,0xfe,0x18,0x2f,0xd3 +0x62,0xf1,0xff,0x18,0x2f,0xd3 # ATT: vcomxsd 268435456(%esp,%esi,8), %xmm2 # INTEL: vcomxsd xmm2, qword ptr [esp + 8*esi + 268435456] -0x62,0xf1,0xfe,0x08,0x2f,0x94,0xf4,0x00,0x00,0x00,0x10 +0x62,0xf1,0xff,0x08,0x2f,0x94,0xf4,0x00,0x00,0x00,0x10 # ATT: vcomxsd 291(%edi,%eax,4), %xmm2 # INTEL: vcomxsd xmm2, qword ptr [edi + 4*eax + 291] -0x62,0xf1,0xfe,0x08,0x2f,0x94,0x87,0x23,0x01,0x00,0x00 +0x62,0xf1,0xff,0x08,0x2f,0x94,0x87,0x23,0x01,0x00,0x00 # ATT: vcomxsd (%eax), %xmm2 # INTEL: vcomxsd xmm2, qword ptr [eax] -0x62,0xf1,0xfe,0x08,0x2f,0x10 +0x62,0xf1,0xff,0x08,0x2f,0x10 # ATT: vcomxsd -256(,%ebp,2), %xmm2 # INTEL: vcomxsd xmm2, qword ptr [2*ebp - 256] -0x62,0xf1,0xfe,0x08,0x2f,0x14,0x6d,0x00,0xff,0xff,0xff +0x62,0xf1,0xff,0x08,0x2f,0x14,0x6d,0x00,0xff,0xff,0xff # ATT: vcomxsd 1016(%ecx), %xmm2 # INTEL: vcomxsd xmm2, qword ptr [ecx + 1016] -0x62,0xf1,0xfe,0x08,0x2f,0x51,0x7f +0x62,0xf1,0xff,0x08,0x2f,0x51,0x7f # ATT: vcomxsd -1024(%edx), %xmm2 # INTEL: vcomxsd xmm2, qword ptr [edx - 1024] -0x62,0xf1,0xfe,0x08,0x2f,0x52,0x80 +0x62,0xf1,0xff,0x08,0x2f,0x52,0x80 # ATT: vcomxsh %xmm3, %xmm2 # INTEL: vcomxsh xmm2, xmm3 -0x62,0xf5,0x7f,0x08,0x2f,0xd3 +0x62,0xf5,0x7e,0x08,0x2f,0xd3 # ATT: vcomxsh {sae}, %xmm3, %xmm2 # INTEL: vcomxsh xmm2, xmm3, {sae} -0x62,0xf5,0x7f,0x18,0x2f,0xd3 +0x62,0xf5,0x7e,0x18,0x2f,0xd3 # ATT: vcomxsh 268435456(%esp,%esi,8), %xmm2 # INTEL: vcomxsh xmm2, word ptr [esp + 8*esi + 268435456] -0x62,0xf5,0x7f,0x08,0x2f,0x94,0xf4,0x00,0x00,0x00,0x10 +0x62,0xf5,0x7e,0x08,0x2f,0x94,0xf4,0x00,0x00,0x00,0x10 # ATT: vcomxsh 291(%edi,%eax,4), %xmm2 # INTEL: vcomxsh xmm2, word ptr [edi + 4*eax + 291] -0x62,0xf5,0x7f,0x08,0x2f,0x94,0x87,0x23,0x01,0x00,0x00 +0x62,0xf5,0x7e,0x08,0x2f,0x94,0x87,0x23,0x01,0x00,0x00 # ATT: vcomxsh (%eax), %xmm2 # INTEL: vcomxsh xmm2, word ptr [eax] -0x62,0xf5,0x7f,0x08,0x2f,0x10 +0x62,0xf5,0x7e,0x08,0x2f,0x10 # ATT: vcomxsh -64(,%ebp,2), %xmm2 # INTEL: vcomxsh xmm2, word ptr [2*ebp - 64] -0x62,0xf5,0x7f,0x08,0x2f,0x14,0x6d,0xc0,0xff,0xff,0xff +0x62,0xf5,0x7e,0x08,0x2f,0x14,0x6d,0xc0,0xff,0xff,0xff # ATT: vcomxsh 254(%ecx), %xmm2 # INTEL: vcomxsh xmm2, word ptr [ecx + 254] -0x62,0xf5,0x7f,0x08,0x2f,0x51,0x7f +0x62,0xf5,0x7e,0x08,0x2f,0x51,0x7f # ATT: vcomxsh -256(%edx), %xmm2 # INTEL: vcomxsh xmm2, word ptr [edx - 256] -0x62,0xf5,0x7f,0x08,0x2f,0x52,0x80 +0x62,0xf5,0x7e,0x08,0x2f,0x52,0x80 # ATT: vcomxss %xmm3, %xmm2 # INTEL: vcomxss xmm2, xmm3 -0x62,0xf1,0x7f,0x08,0x2f,0xd3 +0x62,0xf1,0x7e,0x08,0x2f,0xd3 # ATT: vcomxss {sae}, %xmm3, %xmm2 # INTEL: vcomxss xmm2, xmm3, {sae} -0x62,0xf1,0x7f,0x18,0x2f,0xd3 +0x62,0xf1,0x7e,0x18,0x2f,0xd3 # ATT: vcomxss 268435456(%esp,%esi,8), %xmm2 # INTEL: vcomxss xmm2, dword ptr [esp + 8*esi + 268435456] -0x62,0xf1,0x7f,0x08,0x2f,0x94,0xf4,0x00,0x00,0x00,0x10 +0x62,0xf1,0x7e,0x08,0x2f,0x94,0xf4,0x00,0x00,0x00,0x10 # ATT: vcomxss 291(%edi,%eax,4), %xmm2 # INTEL: vcomxss xmm2, dword ptr [edi + 4*eax + 291] -0x62,0xf1,0x7f,0x08,0x2f,0x94,0x87,0x23,0x01,0x00,0x00 +0x62,0xf1,0x7e,0x08,0x2f,0x94,0x87,0x23,0x01,0x00,0x00 # ATT: vcomxss (%eax), %xmm2 # INTEL: vcomxss xmm2, dword ptr [eax] -0x62,0xf1,0x7f,0x08,0x2f,0x10 +0x62,0xf1,0x7e,0x08,0x2f,0x10 # ATT: vcomxss -128(,%ebp,2), %xmm2 # INTEL: vcomxss xmm2, dword ptr [2*ebp - 128] -0x62,0xf1,0x7f,0x08,0x2f,0x14,0x6d,0x80,0xff,0xff,0xff +0x62,0xf1,0x7e,0x08,0x2f,0x14,0x6d,0x80,0xff,0xff,0xff # ATT: vcomxss 508(%ecx), %xmm2 # INTEL: vcomxss xmm2, dword ptr [ecx + 508] -0x62,0xf1,0x7f,0x08,0x2f,0x51,0x7f +0x62,0xf1,0x7e,0x08,0x2f,0x51,0x7f # ATT: vcomxss -512(%edx), %xmm2 # INTEL: vcomxss xmm2, dword ptr [edx - 512] -0x62,0xf1,0x7f,0x08,0x2f,0x52,0x80 +0x62,0xf1,0x7e,0x08,0x2f,0x52,0x80 # ATT: vucomxsd %xmm3, %xmm2 # INTEL: vucomxsd xmm2, xmm3 -0x62,0xf1,0xfe,0x08,0x2e,0xd3 +0x62,0xf1,0xff,0x08,0x2e,0xd3 # ATT: vucomxsd {sae}, %xmm3, %xmm2 # INTEL: vucomxsd xmm2, xmm3, {sae} -0x62,0xf1,0xfe,0x18,0x2e,0xd3 +0x62,0xf1,0xff,0x18,0x2e,0xd3 # ATT: vucomxsd 268435456(%esp,%esi,8), %xmm2 # INTEL: vucomxsd xmm2, qword ptr [esp + 8*esi + 268435456] -0x62,0xf1,0xfe,0x08,0x2e,0x94,0xf4,0x00,0x00,0x00,0x10 +0x62,0xf1,0xff,0x08,0x2e,0x94,0xf4,0x00,0x00,0x00,0x10 # ATT: vucomxsd 291(%edi,%eax,4), %xmm2 # INTEL: vucomxsd xmm2, qword ptr [edi + 4*eax + 291] -0x62,0xf1,0xfe,0x08,0x2e,0x94,0x87,0x23,0x01,0x00,0x00 +0x62,0xf1,0xff,0x08,0x2e,0x94,0x87,0x23,0x01,0x00,0x00 # ATT: vucomxsd (%eax), %xmm2 # INTEL: vucomxsd xmm2, qword ptr [eax] -0x62,0xf1,0xfe,0x08,0x2e,0x10 +0x62,0xf1,0xff,0x08,0x2e,0x10 # ATT: vucomxsd -256(,%ebp,2), %xmm2 # INTEL: vucomxsd xmm2, qword ptr [2*ebp - 256] -0x62,0xf1,0xfe,0x08,0x2e,0x14,0x6d,0x00,0xff,0xff,0xff +0x62,0xf1,0xff,0x08,0x2e,0x14,0x6d,0x00,0xff,0xff,0xff # ATT: vucomxsd 1016(%ecx), %xmm2 # INTEL: vucomxsd xmm2, qword ptr [ecx + 1016] -0x62,0xf1,0xfe,0x08,0x2e,0x51,0x7f +0x62,0xf1,0xff,0x08,0x2e,0x51,0x7f # ATT: vucomxsd -1024(%edx), %xmm2 # INTEL: vucomxsd xmm2, qword ptr [edx - 1024] -0x62,0xf1,0xfe,0x08,0x2e,0x52,0x80 +0x62,0xf1,0xff,0x08,0x2e,0x52,0x80 # ATT: vucomxsh %xmm3, %xmm2 # INTEL: vucomxsh xmm2, xmm3 -0x62,0xf5,0x7f,0x08,0x2e,0xd3 +0x62,0xf5,0x7e,0x08,0x2e,0xd3 # ATT: vucomxsh {sae}, %xmm3, %xmm2 # INTEL: vucomxsh xmm2, xmm3, {sae} -0x62,0xf5,0x7f,0x18,0x2e,0xd3 +0x62,0xf5,0x7e,0x18,0x2e,0xd3 # ATT: vucomxsh 268435456(%esp,%esi,8), %xmm2 # INTEL: vucomxsh xmm2, word ptr [esp + 8*esi + 268435456] -0x62,0xf5,0x7f,0x08,0x2e,0x94,0xf4,0x00,0x00,0x00,0x10 +0x62,0xf5,0x7e,0x08,0x2e,0x94,0xf4,0x00,0x00,0x00,0x10 # ATT: vucomxsh 291(%edi,%eax,4), %xmm2 # INTEL: vucomxsh xmm2, word ptr [edi + 4*eax + 291] -0x62,0xf5,0x7f,0x08,0x2e,0x94,0x87,0x23,0x01,0x00,0x00 +0x62,0xf5,0x7e,0x08,0x2e,0x94,0x87,0x23,0x01,0x00,0x00 # ATT: vucomxsh (%eax), %xmm2 # INTEL: vucomxsh xmm2, word ptr [eax] -0x62,0xf5,0x7f,0x08,0x2e,0x10 +0x62,0xf5,0x7e,0x08,0x2e,0x10 # ATT: vucomxsh -64(,%ebp,2), %xmm2 # INTEL: vucomxsh xmm2, word ptr [2*ebp - 64] -0x62,0xf5,0x7f,0x08,0x2e,0x14,0x6d,0xc0,0xff,0xff,0xff +0x62,0xf5,0x7e,0x08,0x2e,0x14,0x6d,0xc0,0xff,0xff,0xff # ATT: vucomxsh 254(%ecx), %xmm2 # INTEL: vucomxsh xmm2, word ptr [ecx + 254] -0x62,0xf5,0x7f,0x08,0x2e,0x51,0x7f +0x62,0xf5,0x7e,0x08,0x2e,0x51,0x7f # ATT: vucomxsh -256(%edx), %xmm2 # INTEL: vucomxsh xmm2, word ptr [edx - 256] -0x62,0xf5,0x7f,0x08,0x2e,0x52,0x80 +0x62,0xf5,0x7e,0x08,0x2e,0x52,0x80 # ATT: vucomxss %xmm3, %xmm2 # INTEL: vucomxss xmm2, xmm3 -0x62,0xf1,0x7f,0x08,0x2e,0xd3 +0x62,0xf1,0x7e,0x08,0x2e,0xd3 # ATT: vucomxss {sae}, %xmm3, %xmm2 # INTEL: vucomxss xmm2, xmm3, {sae} -0x62,0xf1,0x7f,0x18,0x2e,0xd3 +0x62,0xf1,0x7e,0x18,0x2e,0xd3 # ATT: vucomxss 268435456(%esp,%esi,8), %xmm2 # INTEL: vucomxss xmm2, dword ptr [esp + 8*esi + 268435456] -0x62,0xf1,0x7f,0x08,0x2e,0x94,0xf4,0x00,0x00,0x00,0x10 +0x62,0xf1,0x7e,0x08,0x2e,0x94,0xf4,0x00,0x00,0x00,0x10 # ATT: vucomxss 291(%edi,%eax,4), %xmm2 # INTEL: vucomxss xmm2, dword ptr [edi + 4*eax + 291] -0x62,0xf1,0x7f,0x08,0x2e,0x94,0x87,0x23,0x01,0x00,0x00 +0x62,0xf1,0x7e,0x08,0x2e,0x94,0x87,0x23,0x01,0x00,0x00 # ATT: vucomxss (%eax), %xmm2 # INTEL: vucomxss xmm2, dword ptr [eax] -0x62,0xf1,0x7f,0x08,0x2e,0x10 +0x62,0xf1,0x7e,0x08,0x2e,0x10 # ATT: vucomxss -128(,%ebp,2), %xmm2 # INTEL: vucomxss xmm2, dword ptr [2*ebp - 128] -0x62,0xf1,0x7f,0x08,0x2e,0x14,0x6d,0x80,0xff,0xff,0xff +0x62,0xf1,0x7e,0x08,0x2e,0x14,0x6d,0x80,0xff,0xff,0xff # ATT: vucomxss 508(%ecx), %xmm2 # INTEL: vucomxss xmm2, dword ptr [ecx + 508] -0x62,0xf1,0x7f,0x08,0x2e,0x51,0x7f +0x62,0xf1,0x7e,0x08,0x2e,0x51,0x7f # ATT: vucomxss -512(%edx), %xmm2 # INTEL: vucomxss xmm2, dword ptr [edx - 512] -0x62,0xf1,0x7f,0x08,0x2e,0x52,0x80 +0x62,0xf1,0x7e,0x08,0x2e,0x52,0x80 diff --git a/llvm/test/MC/Disassembler/X86/avx10.2-com-ef-64.txt b/llvm/test/MC/Disassembler/X86/avx10.2-com-ef-64.txt index ea580fe8d5083..e01e762d12aaf 100644 --- a/llvm/test/MC/Disassembler/X86/avx10.2-com-ef-64.txt +++ b/llvm/test/MC/Disassembler/X86/avx10.2-com-ef-64.txt @@ -3,193 +3,193 @@ # ATT: vcomxsd %xmm23, %xmm22 # INTEL: vcomxsd xmm22, xmm23 -0x62,0xa1,0xfe,0x08,0x2f,0xf7 +0x62,0xa1,0xff,0x08,0x2f,0xf7 # ATT: vcomxsd {sae}, %xmm23, %xmm22 # INTEL: vcomxsd xmm22, xmm23, {sae} -0x62,0xa1,0xfe,0x18,0x2f,0xf7 +0x62,0xa1,0xff,0x18,0x2f,0xf7 # ATT: vcomxsd 268435456(%rbp,%r14,8), %xmm22 # INTEL: vcomxsd xmm22, qword ptr [rbp + 8*r14 + 268435456] -0x62,0xa1,0xfe,0x08,0x2f,0xb4,0xf5,0x00,0x00,0x00,0x10 +0x62,0xa1,0xff,0x08,0x2f,0xb4,0xf5,0x00,0x00,0x00,0x10 # ATT: vcomxsd 291(%r8,%rax,4), %xmm22 # INTEL: vcomxsd xmm22, qword ptr [r8 + 4*rax + 291] -0x62,0xc1,0xfe,0x08,0x2f,0xb4,0x80,0x23,0x01,0x00,0x00 +0x62,0xc1,0xff,0x08,0x2f,0xb4,0x80,0x23,0x01,0x00,0x00 # ATT: vcomxsd (%rip), %xmm22 # INTEL: vcomxsd xmm22, qword ptr [rip] -0x62,0xe1,0xfe,0x08,0x2f,0x35,0x00,0x00,0x00,0x00 +0x62,0xe1,0xff,0x08,0x2f,0x35,0x00,0x00,0x00,0x00 # ATT: vcomxsd -256(,%rbp,2), %xmm22 # INTEL: vcomxsd xmm22, qword ptr [2*rbp - 256] -0x62,0xe1,0xfe,0x08,0x2f,0x34,0x6d,0x00,0xff,0xff,0xff +0x62,0xe1,0xff,0x08,0x2f,0x34,0x6d,0x00,0xff,0xff,0xff # ATT: vcomxsd 1016(%rcx), %xmm22 # INTEL: vcomxsd xmm22, qword ptr [rcx + 1016] -0x62,0xe1,0xfe,0x08,0x2f,0x71,0x7f +0x62,0xe1,0xff,0x08,0x2f,0x71,0x7f # ATT: vcomxsd -1024(%rdx), %xmm22 # INTEL: vcomxsd xmm22, qword ptr [rdx - 1024] -0x62,0xe1,0xfe,0x08,0x2f,0x72,0x80 +0x62,0xe1,0xff,0x08,0x2f,0x72,0x80 # ATT: vcomxsh %xmm23, %xmm22 # INTEL: vcomxsh xmm22, xmm23 -0x62,0xa5,0x7f,0x08,0x2f,0xf7 +0x62,0xa5,0x7e,0x08,0x2f,0xf7 # ATT: vcomxsh {sae}, %xmm23, %xmm22 # INTEL: vcomxsh xmm22, xmm23, {sae} -0x62,0xa5,0x7f,0x18,0x2f,0xf7 +0x62,0xa5,0x7e,0x18,0x2f,0xf7 # ATT: vcomxsh 268435456(%rbp,%r14,8), %xmm22 # INTEL: vcomxsh xmm22, word ptr [rbp + 8*r14 + 268435456] -0x62,0xa5,0x7f,0x08,0x2f,0xb4,0xf5,0x00,0x00,0x00,0x10 +0x62,0xa5,0x7e,0x08,0x2f,0xb4,0xf5,0x00,0x00,0x00,0x10 # ATT: vcomxsh 291(%r8,%rax,4), %xmm22 # INTEL: vcomxsh xmm22, word ptr [r8 + 4*rax + 291] -0x62,0xc5,0x7f,0x08,0x2f,0xb4,0x80,0x23,0x01,0x00,0x00 +0x62,0xc5,0x7e,0x08,0x2f,0xb4,0x80,0x23,0x01,0x00,0x00 # ATT: vcomxsh (%rip), %xmm22 # INTEL: vcomxsh xmm22, word ptr [rip] -0x62,0xe5,0x7f,0x08,0x2f,0x35,0x00,0x00,0x00,0x00 +0x62,0xe5,0x7e,0x08,0x2f,0x35,0x00,0x00,0x00,0x00 # ATT: vcomxsh -64(,%rbp,2), %xmm22 # INTEL: vcomxsh xmm22, word ptr [2*rbp - 64] -0x62,0xe5,0x7f,0x08,0x2f,0x34,0x6d,0xc0,0xff,0xff,0xff +0x62,0xe5,0x7e,0x08,0x2f,0x34,0x6d,0xc0,0xff,0xff,0xff # ATT: vcomxsh 254(%rcx), %xmm22 # INTEL: vcomxsh xmm22, word ptr [rcx + 254] -0x62,0xe5,0x7f,0x08,0x2f,0x71,0x7f +0x62,0xe5,0x7e,0x08,0x2f,0x71,0x7f # ATT: vcomxsh -256(%rdx), %xmm22 # INTEL: vcomxsh xmm22, word ptr [rdx - 256] -0x62,0xe5,0x7f,0x08,0x2f,0x72,0x80 +0x62,0xe5,0x7e,0x08,0x2f,0x72,0x80 # ATT: vcomxss %xmm23, %xmm22 # INTEL: vcomxss xmm22, xmm23 -0x62,0xa1,0x7f,0x08,0x2f,0xf7 +0x62,0xa1,0x7e,0x08,0x2f,0xf7 # ATT: vcomxss {sae}, %xmm23, %xmm22 # INTEL: vcomxss xmm22, xmm23, {sae} -0x62,0xa1,0x7f,0x18,0x2f,0xf7 +0x62,0xa1,0x7e,0x18,0x2f,0xf7 # ATT: vcomxss 268435456(%rbp,%r14,8), %xmm22 # INTEL: vcomxss xmm22, dword ptr [rbp + 8*r14 + 268435456] -0x62,0xa1,0x7f,0x08,0x2f,0xb4,0xf5,0x00,0x00,0x00,0x10 +0x62,0xa1,0x7e,0x08,0x2f,0xb4,0xf5,0x00,0x00,0x00,0x10 # ATT: vcomxss 291(%r8,%rax,4), %xmm22 # INTEL: vcomxss xmm22, dword ptr [r8 + 4*rax + 291] -0x62,0xc1,0x7f,0x08,0x2f,0xb4,0x80,0x23,0x01,0x00,0x00 +0x62,0xc1,0x7e,0x08,0x2f,0xb4,0x80,0x23,0x01,0x00,0x00 # ATT: vcomxss (%rip), %xmm22 # INTEL: vcomxss xmm22, dword ptr [rip] -0x62,0xe1,0x7f,0x08,0x2f,0x35,0x00,0x00,0x00,0x00 +0x62,0xe1,0x7e,0x08,0x2f,0x35,0x00,0x00,0x00,0x00 # ATT: vcomxss -128(,%rbp,2), %xmm22 # INTEL: vcomxss xmm22, dword ptr [2*rbp - 128] -0x62,0xe1,0x7f,0x08,0x2f,0x34,0x6d,0x80,0xff,0xff,0xff +0x62,0xe1,0x7e,0x08,0x2f,0x34,0x6d,0x80,0xff,0xff,0xff # ATT: vcomxss 508(%rcx), %xmm22 # INTEL: vcomxss xmm22, dword ptr [rcx + 508] -0x62,0xe1,0x7f,0x08,0x2f,0x71,0x7f +0x62,0xe1,0x7e,0x08,0x2f,0x71,0x7f # ATT: vcomxss -512(%rdx), %xmm22 # INTEL: vcomxss xmm22, dword ptr [rdx - 512] -0x62,0xe1,0x7f,0x08,0x2f,0x72,0x80 +0x62,0xe1,0x7e,0x08,0x2f,0x72,0x80 # ATT: vucomxsd %xmm23, %xmm22 # INTEL: vucomxsd xmm22, xmm23 -0x62,0xa1,0xfe,0x08,0x2e,0xf7 +0x62,0xa1,0xff,0x08,0x2e,0xf7 # ATT: vucomxsd {sae}, %xmm23, %xmm22 # INTEL: vucomxsd xmm22, xmm23, {sae} -0x62,0xa1,0xfe,0x18,0x2e,0xf7 +0x62,0xa1,0xff,0x18,0x2e,0xf7 # ATT: vucomxsd 268435456(%rbp,%r14,8), %xmm22 # INTEL: vucomxsd xmm22, qword ptr [rbp + 8*r14 + 268435456] -0x62,0xa1,0xfe,0x08,0x2e,0xb4,0xf5,0x00,0x00,0x00,0x10 +0x62,0xa1,0xff,0x08,0x2e,0xb4,0xf5,0x00,0x00,0x00,0x10 # ATT: vucomxsd 291(%r8,%rax,4), %xmm22 # INTEL: vucomxsd xmm22, qword ptr [r8 + 4*rax + 291] -0x62,0xc1,0xfe,0x08,0x2e,0xb4,0x80,0x23,0x01,0x00,0x00 +0x62,0xc1,0xff,0x08,0x2e,0xb4,0x80,0x23,0x01,0x00,0x00 # ATT: vucomxsd (%rip), %xmm22 # INTEL: vucomxsd xmm22, qword ptr [rip] -0x62,0xe1,0xfe,0x08,0x2e,0x35,0x00,0x00,0x00,0x00 +0x62,0xe1,0xff,0x08,0x2e,0x35,0x00,0x00,0x00,0x00 # ATT: vucomxsd -256(,%rbp,2), %xmm22 # INTEL: vucomxsd xmm22, qword ptr [2*rbp - 256] -0x62,0xe1,0xfe,0x08,0x2e,0x34,0x6d,0x00,0xff,0xff,0xff +0x62,0xe1,0xff,0x08,0x2e,0x34,0x6d,0x00,0xff,0xff,0xff # ATT: vucomxsd 1016(%rcx), %xmm22 # INTEL: vucomxsd xmm22, qword ptr [rcx + 1016] -0x62,0xe1,0xfe,0x08,0x2e,0x71,0x7f +0x62,0xe1,0xff,0x08,0x2e,0x71,0x7f # ATT: vucomxsd -1024(%rdx), %xmm22 # INTEL: vucomxsd xmm22, qword ptr [rdx - 1024] -0x62,0xe1,0xfe,0x08,0x2e,0x72,0x80 +0x62,0xe1,0xff,0x08,0x2e,0x72,0x80 # ATT: vucomxsh %xmm23, %xmm22 # INTEL: vucomxsh xmm22, xmm23 -0x62,0xa5,0x7f,0x08,0x2e,0xf7 +0x62,0xa5,0x7e,0x08,0x2e,0xf7 # ATT: vucomxsh {sae}, %xmm23, %xmm22 # INTEL: vucomxsh xmm22, xmm23, {sae} -0x62,0xa5,0x7f,0x18,0x2e,0xf7 +0x62,0xa5,0x7e,0x18,0x2e,0xf7 # ATT: vucomxsh 268435456(%rbp,%r14,8), %xmm22 # INTEL: vucomxsh xmm22, word ptr [rbp + 8*r14 + 268435456] -0x62,0xa5,0x7f,0x08,0x2e,0xb4,0xf5,0x00,0x00,0x00,0x10 +0x62,0xa5,0x7e,0x08,0x2e,0xb4,0xf5,0x00,0x00,0x00,0x10 # ATT: vucomxsh 291(%r8,%rax,4), %xmm22 # INTEL: vucomxsh xmm22, word ptr [r8 + 4*rax + 291] -0x62,0xc5,0x7f,0x08,0x2e,0xb4,0x80,0x23,0x01,0x00,0x00 +0x62,0xc5,0x7e,0x08,0x2e,0xb4,0x80,0x23,0x01,0x00,0x00 # ATT: vucomxsh (%rip), %xmm22 # INTEL: vucomxsh xmm22, word ptr [rip] -0x62,0xe5,0x7f,0x08,0x2e,0x35,0x00,0x00,0x00,0x00 +0x62,0xe5,0x7e,0x08,0x2e,0x35,0x00,0x00,0x00,0x00 # ATT: vucomxsh -64(,%rbp,2), %xmm22 # INTEL: vucomxsh xmm22, word ptr [2*rbp - 64] -0x62,0xe5,0x7f,0x08,0x2e,0x34,0x6d,0xc0,0xff,0xff,0xff +0x62,0xe5,0x7e,0x08,0x2e,0x34,0x6d,0xc0,0xff,0xff,0xff # ATT: vucomxsh 254(%rcx), %xmm22 # INTEL: vucomxsh xmm22, word ptr [rcx + 254] -0x62,0xe5,0x7f,0x08,0x2e,0x71,0x7f +0x62,0xe5,0x7e,0x08,0x2e,0x71,0x7f # ATT: vucomxsh -256(%rdx), %xmm22 # INTEL: vucomxsh xmm22, word ptr [rdx - 256] -0x62,0xe5,0x7f,0x08,0x2e,0x72,0x80 +0x62,0xe5,0x7e,0x08,0x2e,0x72,0x80 # ATT: vucomxss %xmm23, %xmm22 # INTEL: vucomxss xmm22, xmm23 -0x62,0xa1,0x7f,0x08,0x2e,0xf7 +0x62,0xa1,0x7e,0x08,0x2e,0xf7 # ATT: vucomxss {sae}, %xmm23, %xmm22 # INTEL: vucomxss xmm22, xmm23, {sae} -0x62,0xa1,0x7f,0x18,0x2e,0xf7 +0x62,0xa1,0x7e,0x18,0x2e,0xf7 # ATT: vucomxss 268435456(%rbp,%r14,8), %xmm22 # INTEL: vucomxss xmm22, dword ptr [rbp + 8*r14 + 268435456] -0x62,0xa1,0x7f,0x08,0x2e,0xb4,0xf5,0x00,0x00,0x00,0x10 +0x62,0xa1,0x7e,0x08,0x2e,0xb4,0xf5,0x00,0x00,0x00,0x10 # ATT: vucomxss 291(%r8,%rax,4), %xmm22 # INTEL: vucomxss xmm22, dword ptr [r8 + 4*rax + 291] -0x62,0xc1,0x7f,0x08,0x2e,0xb4,0x80,0x23,0x01,0x00,0x00 +0x62,0xc1,0x7e,0x08,0x2e,0xb4,0x80,0x23,0x01,0x00,0x00 # ATT: vucomxss (%rip), %xmm22 # INTEL: vucomxss xmm22, dword ptr [rip] -0x62,0xe1,0x7f,0x08,0x2e,0x35,0x00,0x00,0x00,0x00 +0x62,0xe1,0x7e,0x08,0x2e,0x35,0x00,0x00,0x00,0x00 # ATT: vucomxss -128(,%rbp,2), %xmm22 # INTEL: vucomxss xmm22, dword ptr [2*rbp - 128] -0x62,0xe1,0x7f,0x08,0x2e,0x34,0x6d,0x80,0xff,0xff,0xff +0x62,0xe1,0x7e,0x08,0x2e,0x34,0x6d,0x80,0xff,0xff,0xff # ATT: vucomxss 508(%rcx), %xmm22 # INTEL: vucomxss xmm22, dword ptr [rcx + 508] -0x62,0xe1,0x7f,0x08,0x2e,0x71,0x7f +0x62,0xe1,0x7e,0x08,0x2e,0x71,0x7f # ATT: vucomxss -512(%rdx), %xmm22 # INTEL: vucomxss xmm22, dword ptr [rdx - 512] -0x62,0xe1,0x7f,0x08,0x2e,0x72,0x80 +0x62,0xe1,0x7e,0x08,0x2e,0x72,0x80 diff --git a/llvm/test/MC/X86/avx10.2-bf16-32-att.s b/llvm/test/MC/X86/avx10.2-bf16-32-att.s index e1e82623d838c..88433d7a3411a 100644 --- a/llvm/test/MC/X86/avx10.2-bf16-32-att.s +++ b/llvm/test/MC/X86/avx10.2-bf16-32-att.s @@ -1717,111 +1717,111 @@ vfpclassbf16 $123, -256(%edx){1to32}, %k5 {%k7} // CHECK: vgetexpbf16 %xmm3, %xmm2 -// CHECK: encoding: [0x62,0xf5,0x7d,0x08,0x42,0xd3] +// CHECK: encoding: [0x62,0xf6,0x7c,0x08,0x42,0xd3] vgetexpbf16 %xmm3, %xmm2 // CHECK: vgetexpbf16 %xmm3, %xmm2 {%k7} -// CHECK: encoding: [0x62,0xf5,0x7d,0x0f,0x42,0xd3] +// CHECK: encoding: [0x62,0xf6,0x7c,0x0f,0x42,0xd3] vgetexpbf16 %xmm3, %xmm2 {%k7} // CHECK: vgetexpbf16 %xmm3, %xmm2 {%k7} {z} -// CHECK: encoding: [0x62,0xf5,0x7d,0x8f,0x42,0xd3] +// CHECK: encoding: [0x62,0xf6,0x7c,0x8f,0x42,0xd3] vgetexpbf16 %xmm3, %xmm2 {%k7} {z} // CHECK: vgetexpbf16 %zmm3, %zmm2 -// CHECK: encoding: [0x62,0xf5,0x7d,0x48,0x42,0xd3] +// CHECK: encoding: [0x62,0xf6,0x7c,0x48,0x42,0xd3] vgetexpbf16 %zmm3, %zmm2 // CHECK: vgetexpbf16 %zmm3, %zmm2 {%k7} -// CHECK: encoding: [0x62,0xf5,0x7d,0x4f,0x42,0xd3] +// CHECK: encoding: [0x62,0xf6,0x7c,0x4f,0x42,0xd3] vgetexpbf16 %zmm3, %zmm2 {%k7} // CHECK: vgetexpbf16 %zmm3, %zmm2 {%k7} {z} -// CHECK: encoding: [0x62,0xf5,0x7d,0xcf,0x42,0xd3] +// CHECK: encoding: [0x62,0xf6,0x7c,0xcf,0x42,0xd3] vgetexpbf16 %zmm3, %zmm2 {%k7} {z} // CHECK: vgetexpbf16 %ymm3, %ymm2 -// CHECK: encoding: [0x62,0xf5,0x7d,0x28,0x42,0xd3] +// CHECK: encoding: [0x62,0xf6,0x7c,0x28,0x42,0xd3] vgetexpbf16 %ymm3, %ymm2 // CHECK: vgetexpbf16 %ymm3, %ymm2 {%k7} -// CHECK: encoding: [0x62,0xf5,0x7d,0x2f,0x42,0xd3] +// CHECK: encoding: [0x62,0xf6,0x7c,0x2f,0x42,0xd3] vgetexpbf16 %ymm3, %ymm2 {%k7} // CHECK: vgetexpbf16 %ymm3, %ymm2 {%k7} {z} -// CHECK: encoding: [0x62,0xf5,0x7d,0xaf,0x42,0xd3] +// CHECK: encoding: [0x62,0xf6,0x7c,0xaf,0x42,0xd3] vgetexpbf16 %ymm3, %ymm2 {%k7} {z} // CHECK: vgetexpbf16 268435456(%esp,%esi,8), %xmm2 -// CHECK: encoding: [0x62,0xf5,0x7d,0x08,0x42,0x94,0xf4,0x00,0x00,0x00,0x10] +// CHECK: encoding: [0x62,0xf6,0x7c,0x08,0x42,0x94,0xf4,0x00,0x00,0x00,0x10] vgetexpbf16 268435456(%esp,%esi,8), %xmm2 // CHECK: vgetexpbf16 291(%edi,%eax,4), %xmm2 {%k7} -// CHECK: encoding: [0x62,0xf5,0x7d,0x0f,0x42,0x94,0x87,0x23,0x01,0x00,0x00] +// CHECK: encoding: [0x62,0xf6,0x7c,0x0f,0x42,0x94,0x87,0x23,0x01,0x00,0x00] vgetexpbf16 291(%edi,%eax,4), %xmm2 {%k7} // CHECK: vgetexpbf16 (%eax){1to8}, %xmm2 -// CHECK: encoding: [0x62,0xf5,0x7d,0x18,0x42,0x10] +// CHECK: encoding: [0x62,0xf6,0x7c,0x18,0x42,0x10] vgetexpbf16 (%eax){1to8}, %xmm2 // CHECK: vgetexpbf16 -512(,%ebp,2), %xmm2 -// CHECK: encoding: [0x62,0xf5,0x7d,0x08,0x42,0x14,0x6d,0x00,0xfe,0xff,0xff] +// CHECK: encoding: [0x62,0xf6,0x7c,0x08,0x42,0x14,0x6d,0x00,0xfe,0xff,0xff] vgetexpbf16 -512(,%ebp,2), %xmm2 // CHECK: vgetexpbf16 2032(%ecx), %xmm2 {%k7} {z} -// CHECK: encoding: [0x62,0xf5,0x7d,0x8f,0x42,0x51,0x7f] +// CHECK: encoding: [0x62,0xf6,0x7c,0x8f,0x42,0x51,0x7f] vgetexpbf16 2032(%ecx), %xmm2 {%k7} {z} // CHECK: vgetexpbf16 -256(%edx){1to8}, %xmm2 {%k7} {z} -// CHECK: encoding: [0x62,0xf5,0x7d,0x9f,0x42,0x52,0x80] +// CHECK: encoding: [0x62,0xf6,0x7c,0x9f,0x42,0x52,0x80] vgetexpbf16 -256(%edx){1to8}, %xmm2 {%k7} {z} // CHECK: vgetexpbf16 268435456(%esp,%esi,8), %ymm2 -// CHECK: encoding: [0x62,0xf5,0x7d,0x28,0x42,0x94,0xf4,0x00,0x00,0x00,0x10] +// CHECK: encoding: [0x62,0xf6,0x7c,0x28,0x42,0x94,0xf4,0x00,0x00,0x00,0x10] vgetexpbf16 268435456(%esp,%esi,8), %ymm2 // CHECK: vgetexpbf16 291(%edi,%eax,4), %ymm2 {%k7} -// CHECK: encoding: [0x62,0xf5,0x7d,0x2f,0x42,0x94,0x87,0x23,0x01,0x00,0x00] +// CHECK: encoding: [0x62,0xf6,0x7c,0x2f,0x42,0x94,0x87,0x23,0x01,0x00,0x00] vgetexpbf16 291(%edi,%eax,4), %ymm2 {%k7} // CHECK: vgetexpbf16 (%eax){1to16}, %ymm2 -// CHECK: encoding: [0x62,0xf5,0x7d,0x38,0x42,0x10] +// CHECK: encoding: [0x62,0xf6,0x7c,0x38,0x42,0x10] vgetexpbf16 (%eax){1to16}, %ymm2 // CHECK: vgetexpbf16 -1024(,%ebp,2), %ymm2 -// CHECK: encoding: [0x62,0xf5,0x7d,0x28,0x42,0x14,0x6d,0x00,0xfc,0xff,0xff] +// CHECK: encoding: [0x62,0xf6,0x7c,0x28,0x42,0x14,0x6d,0x00,0xfc,0xff,0xff] vgetexpbf16 -1024(,%ebp,2), %ymm2 // CHECK: vgetexpbf16 4064(%ecx), %ymm2 {%k7} {z} -// CHECK: encoding: [0x62,0xf5,0x7d,0xaf,0x42,0x51,0x7f] +// CHECK: encoding: [0x62,0xf6,0x7c,0xaf,0x42,0x51,0x7f] vgetexpbf16 4064(%ecx), %ymm2 {%k7} {z} // CHECK: vgetexpbf16 -256(%edx){1to16}, %ymm2 {%k7} {z} -// CHECK: encoding: [0x62,0xf5,0x7d,0xbf,0x42,0x52,0x80] +// CHECK: encoding: [0x62,0xf6,0x7c,0xbf,0x42,0x52,0x80] vgetexpbf16 -256(%edx){1to16}, %ymm2 {%k7} {z} // CHECK: vgetexpbf16 268435456(%esp,%esi,8), %zmm2 -// CHECK: encoding: [0x62,0xf5,0x7d,0x48,0x42,0x94,0xf4,0x00,0x00,0x00,0x10] +// CHECK: encoding: [0x62,0xf6,0x7c,0x48,0x42,0x94,0xf4,0x00,0x00,0x00,0x10] vgetexpbf16 268435456(%esp,%esi,8), %zmm2 // CHECK: vgetexpbf16 291(%edi,%eax,4), %zmm2 {%k7} -// CHECK: encoding: [0x62,0xf5,0x7d,0x4f,0x42,0x94,0x87,0x23,0x01,0x00,0x00] +// CHECK: encoding: [0x62,0xf6,0x7c,0x4f,0x42,0x94,0x87,0x23,0x01,0x00,0x00] vgetexpbf16 291(%edi,%eax,4), %zmm2 {%k7} // CHECK: vgetexpbf16 (%eax){1to32}, %zmm2 -// CHECK: encoding: [0x62,0xf5,0x7d,0x58,0x42,0x10] +// CHECK: encoding: [0x62,0xf6,0x7c,0x58,0x42,0x10] vgetexpbf16 (%eax){1to32}, %zmm2 // CHECK: vgetexpbf16 -2048(,%ebp,2), %zmm2 -// CHECK: encoding: [0x62,0xf5,0x7d,0x48,0x42,0x14,0x6d,0x00,0xf8,0xff,0xff] +// CHECK: encoding: [0x62,0xf6,0x7c,0x48,0x42,0x14,0x6d,0x00,0xf8,0xff,0xff] vgetexpbf16 -2048(,%ebp,2), %zmm2 // CHECK: vgetexpbf16 8128(%ecx), %zmm2 {%k7} {z} -// CHECK: encoding: [0x62,0xf5,0x7d,0xcf,0x42,0x51,0x7f] +// CHECK: encoding: [0x62,0xf6,0x7c,0xcf,0x42,0x51,0x7f] vgetexpbf16 8128(%ecx), %zmm2 {%k7} {z} // CHECK: vgetexpbf16 -256(%edx){1to32}, %zmm2 {%k7} {z} -// CHECK: encoding: [0x62,0xf5,0x7d,0xdf,0x42,0x52,0x80] +// CHECK: encoding: [0x62,0xf6,0x7c,0xdf,0x42,0x52,0x80] vgetexpbf16 -256(%edx){1to32}, %zmm2 {%k7} {z} // CHECK: vgetmantbf16 $123, %zmm3, %zmm2 diff --git a/llvm/test/MC/X86/avx10.2-bf16-32-intel.s b/llvm/test/MC/X86/avx10.2-bf16-32-intel.s index d2e9440ba9c34..7e1d0c305336a 100644 --- a/llvm/test/MC/X86/avx10.2-bf16-32-intel.s +++ b/llvm/test/MC/X86/avx10.2-bf16-32-intel.s @@ -1717,111 +1717,111 @@ vfpclassbf16 k5 {k7}, word ptr [edx - 256]{1to32}, 123 // CHECK: vgetexpbf16 xmm2, xmm3 -// CHECK: encoding: [0x62,0xf5,0x7d,0x08,0x42,0xd3] +// CHECK: encoding: [0x62,0xf6,0x7c,0x08,0x42,0xd3] vgetexpbf16 xmm2, xmm3 // CHECK: vgetexpbf16 xmm2 {k7}, xmm3 -// CHECK: encoding: [0x62,0xf5,0x7d,0x0f,0x42,0xd3] +// CHECK: encoding: [0x62,0xf6,0x7c,0x0f,0x42,0xd3] vgetexpbf16 xmm2 {k7}, xmm3 // CHECK: vgetexpbf16 xmm2 {k7} {z}, xmm3 -// CHECK: encoding: [0x62,0xf5,0x7d,0x8f,0x42,0xd3] +// CHECK: encoding: [0x62,0xf6,0x7c,0x8f,0x42,0xd3] vgetexpbf16 xmm2 {k7} {z}, xmm3 // CHECK: vgetexpbf16 zmm2, zmm3 -// CHECK: encoding: [0x62,0xf5,0x7d,0x48,0x42,0xd3] +// CHECK: encoding: [0x62,0xf6,0x7c,0x48,0x42,0xd3] vgetexpbf16 zmm2, zmm3 // CHECK: vgetexpbf16 zmm2 {k7}, zmm3 -// CHECK: encoding: [0x62,0xf5,0x7d,0x4f,0x42,0xd3] +// CHECK: encoding: [0x62,0xf6,0x7c,0x4f,0x42,0xd3] vgetexpbf16 zmm2 {k7}, zmm3 // CHECK: vgetexpbf16 zmm2 {k7} {z}, zmm3 -// CHECK: encoding: [0x62,0xf5,0x7d,0xcf,0x42,0xd3] +// CHECK: encoding: [0x62,0xf6,0x7c,0xcf,0x42,0xd3] vgetexpbf16 zmm2 {k7} {z}, zmm3 // CHECK: vgetexpbf16 ymm2, ymm3 -// CHECK: encoding: [0x62,0xf5,0x7d,0x28,0x42,0xd3] +// CHECK: encoding: [0x62,0xf6,0x7c,0x28,0x42,0xd3] vgetexpbf16 ymm2, ymm3 // CHECK: vgetexpbf16 ymm2 {k7}, ymm3 -// CHECK: encoding: [0x62,0xf5,0x7d,0x2f,0x42,0xd3] +// CHECK: encoding: [0x62,0xf6,0x7c,0x2f,0x42,0xd3] vgetexpbf16 ymm2 {k7}, ymm3 // CHECK: vgetexpbf16 ymm2 {k7} {z}, ymm3 -// CHECK: encoding: [0x62,0xf5,0x7d,0xaf,0x42,0xd3] +// CHECK: encoding: [0x62,0xf6,0x7c,0xaf,0x42,0xd3] vgetexpbf16 ymm2 {k7} {z}, ymm3 // CHECK: vgetexpbf16 xmm2, xmmword ptr [esp + 8*esi + 268435456] -// CHECK: encoding: [0x62,0xf5,0x7d,0x08,0x42,0x94,0xf4,0x00,0x00,0x00,0x10] +// CHECK: encoding: [0x62,0xf6,0x7c,0x08,0x42,0x94,0xf4,0x00,0x00,0x00,0x10] vgetexpbf16 xmm2, xmmword ptr [esp + 8*esi + 268435456] // CHECK: vgetexpbf16 xmm2 {k7}, xmmword ptr [edi + 4*eax + 291] -// CHECK: encoding: [0x62,0xf5,0x7d,0x0f,0x42,0x94,0x87,0x23,0x01,0x00,0x00] +// CHECK: encoding: [0x62,0xf6,0x7c,0x0f,0x42,0x94,0x87,0x23,0x01,0x00,0x00] vgetexpbf16 xmm2 {k7}, xmmword ptr [edi + 4*eax + 291] // CHECK: vgetexpbf16 xmm2, word ptr [eax]{1to8} -// CHECK: encoding: [0x62,0xf5,0x7d,0x18,0x42,0x10] +// CHECK: encoding: [0x62,0xf6,0x7c,0x18,0x42,0x10] vgetexpbf16 xmm2, word ptr [eax]{1to8} // CHECK: vgetexpbf16 xmm2, xmmword ptr [2*ebp - 512] -// CHECK: encoding: [0x62,0xf5,0x7d,0x08,0x42,0x14,0x6d,0x00,0xfe,0xff,0xff] +// CHECK: encoding: [0x62,0xf6,0x7c,0x08,0x42,0x14,0x6d,0x00,0xfe,0xff,0xff] vgetexpbf16 xmm2, xmmword ptr [2*ebp - 512] // CHECK: vgetexpbf16 xmm2 {k7} {z}, xmmword ptr [ecx + 2032] -// CHECK: encoding: [0x62,0xf5,0x7d,0x8f,0x42,0x51,0x7f] +// CHECK: encoding: [0x62,0xf6,0x7c,0x8f,0x42,0x51,0x7f] vgetexpbf16 xmm2 {k7} {z}, xmmword ptr [ecx + 2032] // CHECK: vgetexpbf16 xmm2 {k7} {z}, word ptr [edx - 256]{1to8} -// CHECK: encoding: [0x62,0xf5,0x7d,0x9f,0x42,0x52,0x80] +// CHECK: encoding: [0x62,0xf6,0x7c,0x9f,0x42,0x52,0x80] vgetexpbf16 xmm2 {k7} {z}, word ptr [edx - 256]{1to8} // CHECK: vgetexpbf16 ymm2, ymmword ptr [esp + 8*esi + 268435456] -// CHECK: encoding: [0x62,0xf5,0x7d,0x28,0x42,0x94,0xf4,0x00,0x00,0x00,0x10] +// CHECK: encoding: [0x62,0xf6,0x7c,0x28,0x42,0x94,0xf4,0x00,0x00,0x00,0x10] vgetexpbf16 ymm2, ymmword ptr [esp + 8*esi + 268435456] // CHECK: vgetexpbf16 ymm2 {k7}, ymmword ptr [edi + 4*eax + 291] -// CHECK: encoding: [0x62,0xf5,0x7d,0x2f,0x42,0x94,0x87,0x23,0x01,0x00,0x00] +// CHECK: encoding: [0x62,0xf6,0x7c,0x2f,0x42,0x94,0x87,0x23,0x01,0x00,0x00] vgetexpbf16 ymm2 {k7}, ymmword ptr [edi + 4*eax + 291] // CHECK: vgetexpbf16 ymm2, word ptr [eax]{1to16} -// CHECK: encoding: [0x62,0xf5,0x7d,0x38,0x42,0x10] +// CHECK: encoding: [0x62,0xf6,0x7c,0x38,0x42,0x10] vgetexpbf16 ymm2, word ptr [eax]{1to16} // CHECK: vgetexpbf16 ymm2, ymmword ptr [2*ebp - 1024] -// CHECK: encoding: [0x62,0xf5,0x7d,0x28,0x42,0x14,0x6d,0x00,0xfc,0xff,0xff] +// CHECK: encoding: [0x62,0xf6,0x7c,0x28,0x42,0x14,0x6d,0x00,0xfc,0xff,0xff] vgetexpbf16 ymm2, ymmword ptr [2*ebp - 1024] // CHECK: vgetexpbf16 ymm2 {k7} {z}, ymmword ptr [ecx + 4064] -// CHECK: encoding: [0x62,0xf5,0x7d,0xaf,0x42,0x51,0x7f] +// CHECK: encoding: [0x62,0xf6,0x7c,0xaf,0x42,0x51,0x7f] vgetexpbf16 ymm2 {k7} {z}, ymmword ptr [ecx + 4064] // CHECK: vgetexpbf16 ymm2 {k7} {z}, word ptr [edx - 256]{1to16} -// CHECK: encoding: [0x62,0xf5,0x7d,0xbf,0x42,0x52,0x80] +// CHECK: encoding: [0x62,0xf6,0x7c,0xbf,0x42,0x52,0x80] vgetexpbf16 ymm2 {k7} {z}, word ptr [edx - 256]{1to16} // CHECK: vgetexpbf16 zmm2, zmmword ptr [esp + 8*esi + 268435456] -// CHECK: encoding: [0x62,0xf5,0x7d,0x48,0x42,0x94,0xf4,0x00,0x00,0x00,0x10] +// CHECK: encoding: [0x62,0xf6,0x7c,0x48,0x42,0x94,0xf4,0x00,0x00,0x00,0x10] vgetexpbf16 zmm2, zmmword ptr [esp + 8*esi + 268435456] // CHECK: vgetexpbf16 zmm2 {k7}, zmmword ptr [edi + 4*eax + 291] -// CHECK: encoding: [0x62,0xf5,0x7d,0x4f,0x42,0x94,0x87,0x23,0x01,0x00,0x00] +// CHECK: encoding: [0x62,0xf6,0x7c,0x4f,0x42,0x94,0x87,0x23,0x01,0x00,0x00] vgetexpbf16 zmm2 {k7}, zmmword ptr [edi + 4*eax + 291] // CHECK: vgetexpbf16 zmm2, word ptr [eax]{1to32} -// CHECK: encoding: [0x62,0xf5,0x7d,0x58,0x42,0x10] +// CHECK: encoding: [0x62,0xf6,0x7c,0x58,0x42,0x10] vgetexpbf16 zmm2, word ptr [eax]{1to32} // CHECK: vgetexpbf16 zmm2, zmmword ptr [2*ebp - 2048] -// CHECK: encoding: [0x62,0xf5,0x7d,0x48,0x42,0x14,0x6d,0x00,0xf8,0xff,0xff] +// CHECK: encoding: [0x62,0xf6,0x7c,0x48,0x42,0x14,0x6d,0x00,0xf8,0xff,0xff] vgetexpbf16 zmm2, zmmword ptr [2*ebp - 2048] // CHECK: vgetexpbf16 zmm2 {k7} {z}, zmmword ptr [ecx + 8128] -// CHECK: encoding: [0x62,0xf5,0x7d,0xcf,0x42,0x51,0x7f] +// CHECK: encoding: [0x62,0xf6,0x7c,0xcf,0x42,0x51,0x7f] vgetexpbf16 zmm2 {k7} {z}, zmmword ptr [ecx + 8128] // CHECK: vgetexpbf16 zmm2 {k7} {z}, word ptr [edx - 256]{1to32} -// CHECK: encoding: [0x62,0xf5,0x7d,0xdf,0x42,0x52,0x80] +// CHECK: encoding: [0x62,0xf6,0x7c,0xdf,0x42,0x52,0x80] vgetexpbf16 zmm2 {k7} {z}, word ptr [edx - 256]{1to32} // CHECK: vgetmantbf16 zmm2, zmm3, 123 diff --git a/llvm/test/MC/X86/avx10.2-bf16-64-att.s b/llvm/test/MC/X86/avx10.2-bf16-64-att.s index 67d6f3a531dfe..0eb10fbf6d86f 100644 --- a/llvm/test/MC/X86/avx10.2-bf16-64-att.s +++ b/llvm/test/MC/X86/avx10.2-bf16-64-att.s @@ -1717,111 +1717,111 @@ vfpclassbf16 $123, -256(%rdx){1to32}, %k5 {%k7} // CHECK: vgetexpbf16 %xmm23, %xmm22 -// CHECK: encoding: [0x62,0xa5,0x7d,0x08,0x42,0xf7] +// CHECK: encoding: [0x62,0xa6,0x7c,0x08,0x42,0xf7] vgetexpbf16 %xmm23, %xmm22 // CHECK: vgetexpbf16 %xmm23, %xmm22 {%k7} -// CHECK: encoding: [0x62,0xa5,0x7d,0x0f,0x42,0xf7] +// CHECK: encoding: [0x62,0xa6,0x7c,0x0f,0x42,0xf7] vgetexpbf16 %xmm23, %xmm22 {%k7} // CHECK: vgetexpbf16 %xmm23, %xmm22 {%k7} {z} -// CHECK: encoding: [0x62,0xa5,0x7d,0x8f,0x42,0xf7] +// CHECK: encoding: [0x62,0xa6,0x7c,0x8f,0x42,0xf7] vgetexpbf16 %xmm23, %xmm22 {%k7} {z} // CHECK: vgetexpbf16 %zmm23, %zmm22 -// CHECK: encoding: [0x62,0xa5,0x7d,0x48,0x42,0xf7] +// CHECK: encoding: [0x62,0xa6,0x7c,0x48,0x42,0xf7] vgetexpbf16 %zmm23, %zmm22 // CHECK: vgetexpbf16 %zmm23, %zmm22 {%k7} -// CHECK: encoding: [0x62,0xa5,0x7d,0x4f,0x42,0xf7] +// CHECK: encoding: [0x62,0xa6,0x7c,0x4f,0x42,0xf7] vgetexpbf16 %zmm23, %zmm22 {%k7} // CHECK: vgetexpbf16 %zmm23, %zmm22 {%k7} {z} -// CHECK: encoding: [0x62,0xa5,0x7d,0xcf,0x42,0xf7] +// CHECK: encoding: [0x62,0xa6,0x7c,0xcf,0x42,0xf7] vgetexpbf16 %zmm23, %zmm22 {%k7} {z} // CHECK: vgetexpbf16 %ymm23, %ymm22 -// CHECK: encoding: [0x62,0xa5,0x7d,0x28,0x42,0xf7] +// CHECK: encoding: [0x62,0xa6,0x7c,0x28,0x42,0xf7] vgetexpbf16 %ymm23, %ymm22 // CHECK: vgetexpbf16 %ymm23, %ymm22 {%k7} -// CHECK: encoding: [0x62,0xa5,0x7d,0x2f,0x42,0xf7] +// CHECK: encoding: [0x62,0xa6,0x7c,0x2f,0x42,0xf7] vgetexpbf16 %ymm23, %ymm22 {%k7} // CHECK: vgetexpbf16 %ymm23, %ymm22 {%k7} {z} -// CHECK: encoding: [0x62,0xa5,0x7d,0xaf,0x42,0xf7] +// CHECK: encoding: [0x62,0xa6,0x7c,0xaf,0x42,0xf7] vgetexpbf16 %ymm23, %ymm22 {%k7} {z} // CHECK: vgetexpbf16 268435456(%rbp,%r14,8), %xmm22 -// CHECK: encoding: [0x62,0xa5,0x7d,0x08,0x42,0xb4,0xf5,0x00,0x00,0x00,0x10] +// CHECK: encoding: [0x62,0xa6,0x7c,0x08,0x42,0xb4,0xf5,0x00,0x00,0x00,0x10] vgetexpbf16 268435456(%rbp,%r14,8), %xmm22 // CHECK: vgetexpbf16 291(%r8,%rax,4), %xmm22 {%k7} -// CHECK: encoding: [0x62,0xc5,0x7d,0x0f,0x42,0xb4,0x80,0x23,0x01,0x00,0x00] +// CHECK: encoding: [0x62,0xc6,0x7c,0x0f,0x42,0xb4,0x80,0x23,0x01,0x00,0x00] vgetexpbf16 291(%r8,%rax,4), %xmm22 {%k7} // CHECK: vgetexpbf16 (%rip){1to8}, %xmm22 -// CHECK: encoding: [0x62,0xe5,0x7d,0x18,0x42,0x35,0x00,0x00,0x00,0x00] +// CHECK: encoding: [0x62,0xe6,0x7c,0x18,0x42,0x35,0x00,0x00,0x00,0x00] vgetexpbf16 (%rip){1to8}, %xmm22 // CHECK: vgetexpbf16 -512(,%rbp,2), %xmm22 -// CHECK: encoding: [0x62,0xe5,0x7d,0x08,0x42,0x34,0x6d,0x00,0xfe,0xff,0xff] +// CHECK: encoding: [0x62,0xe6,0x7c,0x08,0x42,0x34,0x6d,0x00,0xfe,0xff,0xff] vgetexpbf16 -512(,%rbp,2), %xmm22 // CHECK: vgetexpbf16 2032(%rcx), %xmm22 {%k7} {z} -// CHECK: encoding: [0x62,0xe5,0x7d,0x8f,0x42,0x71,0x7f] +// CHECK: encoding: [0x62,0xe6,0x7c,0x8f,0x42,0x71,0x7f] vgetexpbf16 2032(%rcx), %xmm22 {%k7} {z} // CHECK: vgetexpbf16 -256(%rdx){1to8}, %xmm22 {%k7} {z} -// CHECK: encoding: [0x62,0xe5,0x7d,0x9f,0x42,0x72,0x80] +// CHECK: encoding: [0x62,0xe6,0x7c,0x9f,0x42,0x72,0x80] vgetexpbf16 -256(%rdx){1to8}, %xmm22 {%k7} {z} // CHECK: vgetexpbf16 268435456(%rbp,%r14,8), %ymm22 -// CHECK: encoding: [0x62,0xa5,0x7d,0x28,0x42,0xb4,0xf5,0x00,0x00,0x00,0x10] +// CHECK: encoding: [0x62,0xa6,0x7c,0x28,0x42,0xb4,0xf5,0x00,0x00,0x00,0x10] vgetexpbf16 268435456(%rbp,%r14,8), %ymm22 // CHECK: vgetexpbf16 291(%r8,%rax,4), %ymm22 {%k7} -// CHECK: encoding: [0x62,0xc5,0x7d,0x2f,0x42,0xb4,0x80,0x23,0x01,0x00,0x00] +// CHECK: encoding: [0x62,0xc6,0x7c,0x2f,0x42,0xb4,0x80,0x23,0x01,0x00,0x00] vgetexpbf16 291(%r8,%rax,4), %ymm22 {%k7} // CHECK: vgetexpbf16 (%rip){1to16}, %ymm22 -// CHECK: encoding: [0x62,0xe5,0x7d,0x38,0x42,0x35,0x00,0x00,0x00,0x00] +// CHECK: encoding: [0x62,0xe6,0x7c,0x38,0x42,0x35,0x00,0x00,0x00,0x00] vgetexpbf16 (%rip){1to16}, %ymm22 // CHECK: vgetexpbf16 -1024(,%rbp,2), %ymm22 -// CHECK: encoding: [0x62,0xe5,0x7d,0x28,0x42,0x34,0x6d,0x00,0xfc,0xff,0xff] +// CHECK: encoding: [0x62,0xe6,0x7c,0x28,0x42,0x34,0x6d,0x00,0xfc,0xff,0xff] vgetexpbf16 -1024(,%rbp,2), %ymm22 // CHECK: vgetexpbf16 4064(%rcx), %ymm22 {%k7} {z} -// CHECK: encoding: [0x62,0xe5,0x7d,0xaf,0x42,0x71,0x7f] +// CHECK: encoding: [0x62,0xe6,0x7c,0xaf,0x42,0x71,0x7f] vgetexpbf16 4064(%rcx), %ymm22 {%k7} {z} // CHECK: vgetexpbf16 -256(%rdx){1to16}, %ymm22 {%k7} {z} -// CHECK: encoding: [0x62,0xe5,0x7d,0xbf,0x42,0x72,0x80] +// CHECK: encoding: [0x62,0xe6,0x7c,0xbf,0x42,0x72,0x80] vgetexpbf16 -256(%rdx){1to16}, %ymm22 {%k7} {z} // CHECK: vgetexpbf16 268435456(%rbp,%r14,8), %zmm22 -// CHECK: encoding: [0x62,0xa5,0x7d,0x48,0x42,0xb4,0xf5,0x00,0x00,0x00,0x10] +// CHECK: encoding: [0x62,0xa6,0x7c,0x48,0x42,0xb4,0xf5,0x00,0x00,0x00,0x10] vgetexpbf16 268435456(%rbp,%r14,8), %zmm22 // CHECK: vgetexpbf16 291(%r8,%rax,4), %zmm22 {%k7} -// CHECK: encoding: [0x62,0xc5,0x7d,0x4f,0x42,0xb4,0x80,0x23,0x01,0x00,0x00] +// CHECK: encoding: [0x62,0xc6,0x7c,0x4f,0x42,0xb4,0x80,0x23,0x01,0x00,0x00] vgetexpbf16 291(%r8,%rax,4), %zmm22 {%k7} // CHECK: vgetexpbf16 (%rip){1to32}, %zmm22 -// CHECK: encoding: [0x62,0xe5,0x7d,0x58,0x42,0x35,0x00,0x00,0x00,0x00] +// CHECK: encoding: [0x62,0xe6,0x7c,0x58,0x42,0x35,0x00,0x00,0x00,0x00] vgetexpbf16 (%rip){1to32}, %zmm22 // CHECK: vgetexpbf16 -2048(,%rbp,2), %zmm22 -// CHECK: encoding: [0x62,0xe5,0x7d,0x48,0x42,0x34,0x6d,0x00,0xf8,0xff,0xff] +// CHECK: encoding: [0x62,0xe6,0x7c,0x48,0x42,0x34,0x6d,0x00,0xf8,0xff,0xff] vgetexpbf16 -2048(,%rbp,2), %zmm22 // CHECK: vgetexpbf16 8128(%rcx), %zmm22 {%k7} {z} -// CHECK: encoding: [0x62,0xe5,0x7d,0xcf,0x42,0x71,0x7f] +// CHECK: encoding: [0x62,0xe6,0x7c,0xcf,0x42,0x71,0x7f] vgetexpbf16 8128(%rcx), %zmm22 {%k7} {z} // CHECK: vgetexpbf16 -256(%rdx){1to32}, %zmm22 {%k7} {z} -// CHECK: encoding: [0x62,0xe5,0x7d,0xdf,0x42,0x72,0x80] +// CHECK: encoding: [0x62,0xe6,0x7c,0xdf,0x42,0x72,0x80] vgetexpbf16 -256(%rdx){1to32}, %zmm22 {%k7} {z} // CHECK: vgetmantbf16 $123, %zmm23, %zmm22 diff --git a/llvm/test/MC/X86/avx10.2-bf16-64-intel.s b/llvm/test/MC/X86/avx10.2-bf16-64-intel.s index d1727c586e240..b0787a60c7144 100644 --- a/llvm/test/MC/X86/avx10.2-bf16-64-intel.s +++ b/llvm/test/MC/X86/avx10.2-bf16-64-intel.s @@ -1717,111 +1717,111 @@ vfpclassbf16 k5 {k7}, word ptr [rdx - 256]{1to32}, 123 // CHECK: vgetexpbf16 xmm22, xmm23 -// CHECK: encoding: [0x62,0xa5,0x7d,0x08,0x42,0xf7] +// CHECK: encoding: [0x62,0xa6,0x7c,0x08,0x42,0xf7] vgetexpbf16 xmm22, xmm23 // CHECK: vgetexpbf16 xmm22 {k7}, xmm23 -// CHECK: encoding: [0x62,0xa5,0x7d,0x0f,0x42,0xf7] +// CHECK: encoding: [0x62,0xa6,0x7c,0x0f,0x42,0xf7] vgetexpbf16 xmm22 {k7}, xmm23 // CHECK: vgetexpbf16 xmm22 {k7} {z}, xmm23 -// CHECK: encoding: [0x62,0xa5,0x7d,0x8f,0x42,0xf7] +// CHECK: encoding: [0x62,0xa6,0x7c,0x8f,0x42,0xf7] vgetexpbf16 xmm22 {k7} {z}, xmm23 // CHECK: vgetexpbf16 zmm22, zmm23 -// CHECK: encoding: [0x62,0xa5,0x7d,0x48,0x42,0xf7] +// CHECK: encoding: [0x62,0xa6,0x7c,0x48,0x42,0xf7] vgetexpbf16 zmm22, zmm23 // CHECK: vgetexpbf16 zmm22 {k7}, zmm23 -// CHECK: encoding: [0x62,0xa5,0x7d,0x4f,0x42,0xf7] +// CHECK: encoding: [0x62,0xa6,0x7c,0x4f,0x42,0xf7] vgetexpbf16 zmm22 {k7}, zmm23 // CHECK: vgetexpbf16 zmm22 {k7} {z}, zmm23 -// CHECK: encoding: [0x62,0xa5,0x7d,0xcf,0x42,0xf7] +// CHECK: encoding: [0x62,0xa6,0x7c,0xcf,0x42,0xf7] vgetexpbf16 zmm22 {k7} {z}, zmm23 // CHECK: vgetexpbf16 ymm22, ymm23 -// CHECK: encoding: [0x62,0xa5,0x7d,0x28,0x42,0xf7] +// CHECK: encoding: [0x62,0xa6,0x7c,0x28,0x42,0xf7] vgetexpbf16 ymm22, ymm23 // CHECK: vgetexpbf16 ymm22 {k7}, ymm23 -// CHECK: encoding: [0x62,0xa5,0x7d,0x2f,0x42,0xf7] +// CHECK: encoding: [0x62,0xa6,0x7c,0x2f,0x42,0xf7] vgetexpbf16 ymm22 {k7}, ymm23 // CHECK: vgetexpbf16 ymm22 {k7} {z}, ymm23 -// CHECK: encoding: [0x62,0xa5,0x7d,0xaf,0x42,0xf7] +// CHECK: encoding: [0x62,0xa6,0x7c,0xaf,0x42,0xf7] vgetexpbf16 ymm22 {k7} {z}, ymm23 // CHECK: vgetexpbf16 xmm22, xmmword ptr [rbp + 8*r14 + 268435456] -// CHECK: encoding: [0x62,0xa5,0x7d,0x08,0x42,0xb4,0xf5,0x00,0x00,0x00,0x10] +// CHECK: encoding: [0x62,0xa6,0x7c,0x08,0x42,0xb4,0xf5,0x00,0x00,0x00,0x10] vgetexpbf16 xmm22, xmmword ptr [rbp + 8*r14 + 268435456] // CHECK: vgetexpbf16 xmm22 {k7}, xmmword ptr [r8 + 4*rax + 291] -// CHECK: encoding: [0x62,0xc5,0x7d,0x0f,0x42,0xb4,0x80,0x23,0x01,0x00,0x00] +// CHECK: encoding: [0x62,0xc6,0x7c,0x0f,0x42,0xb4,0x80,0x23,0x01,0x00,0x00] vgetexpbf16 xmm22 {k7}, xmmword ptr [r8 + 4*rax + 291] // CHECK: vgetexpbf16 xmm22, word ptr [rip]{1to8} -// CHECK: encoding: [0x62,0xe5,0x7d,0x18,0x42,0x35,0x00,0x00,0x00,0x00] +// CHECK: encoding: [0x62,0xe6,0x7c,0x18,0x42,0x35,0x00,0x00,0x00,0x00] vgetexpbf16 xmm22, word ptr [rip]{1to8} // CHECK: vgetexpbf16 xmm22, xmmword ptr [2*rbp - 512] -// CHECK: encoding: [0x62,0xe5,0x7d,0x08,0x42,0x34,0x6d,0x00,0xfe,0xff,0xff] +// CHECK: encoding: [0x62,0xe6,0x7c,0x08,0x42,0x34,0x6d,0x00,0xfe,0xff,0xff] vgetexpbf16 xmm22, xmmword ptr [2*rbp - 512] // CHECK: vgetexpbf16 xmm22 {k7} {z}, xmmword ptr [rcx + 2032] -// CHECK: encoding: [0x62,0xe5,0x7d,0x8f,0x42,0x71,0x7f] +// CHECK: encoding: [0x62,0xe6,0x7c,0x8f,0x42,0x71,0x7f] vgetexpbf16 xmm22 {k7} {z}, xmmword ptr [rcx + 2032] // CHECK: vgetexpbf16 xmm22 {k7} {z}, word ptr [rdx - 256]{1to8} -// CHECK: encoding: [0x62,0xe5,0x7d,0x9f,0x42,0x72,0x80] +// CHECK: encoding: [0x62,0xe6,0x7c,0x9f,0x42,0x72,0x80] vgetexpbf16 xmm22 {k7} {z}, word ptr [rdx - 256]{1to8} // CHECK: vgetexpbf16 ymm22, ymmword ptr [rbp + 8*r14 + 268435456] -// CHECK: encoding: [0x62,0xa5,0x7d,0x28,0x42,0xb4,0xf5,0x00,0x00,0x00,0x10] +// CHECK: encoding: [0x62,0xa6,0x7c,0x28,0x42,0xb4,0xf5,0x00,0x00,0x00,0x10] vgetexpbf16 ymm22, ymmword ptr [rbp + 8*r14 + 268435456] // CHECK: vgetexpbf16 ymm22 {k7}, ymmword ptr [r8 + 4*rax + 291] -// CHECK: encoding: [0x62,0xc5,0x7d,0x2f,0x42,0xb4,0x80,0x23,0x01,0x00,0x00] +// CHECK: encoding: [0x62,0xc6,0x7c,0x2f,0x42,0xb4,0x80,0x23,0x01,0x00,0x00] vgetexpbf16 ymm22 {k7}, ymmword ptr [r8 + 4*rax + 291] // CHECK: vgetexpbf16 ymm22, word ptr [rip]{1to16} -// CHECK: encoding: [0x62,0xe5,0x7d,0x38,0x42,0x35,0x00,0x00,0x00,0x00] +// CHECK: encoding: [0x62,0xe6,0x7c,0x38,0x42,0x35,0x00,0x00,0x00,0x00] vgetexpbf16 ymm22, word ptr [rip]{1to16} // CHECK: vgetexpbf16 ymm22, ymmword ptr [2*rbp - 1024] -// CHECK: encoding: [0x62,0xe5,0x7d,0x28,0x42,0x34,0x6d,0x00,0xfc,0xff,0xff] +// CHECK: encoding: [0x62,0xe6,0x7c,0x28,0x42,0x34,0x6d,0x00,0xfc,0xff,0xff] vgetexpbf16 ymm22, ymmword ptr [2*rbp - 1024] // CHECK: vgetexpbf16 ymm22 {k7} {z}, ymmword ptr [rcx + 4064] -// CHECK: encoding: [0x62,0xe5,0x7d,0xaf,0x42,0x71,0x7f] +// CHECK: encoding: [0x62,0xe6,0x7c,0xaf,0x42,0x71,0x7f] vgetexpbf16 ymm22 {k7} {z}, ymmword ptr [rcx + 4064] // CHECK: vgetexpbf16 ymm22 {k7} {z}, word ptr [rdx - 256]{1to16} -// CHECK: encoding: [0x62,0xe5,0x7d,0xbf,0x42,0x72,0x80] +// CHECK: encoding: [0x62,0xe6,0x7c,0xbf,0x42,0x72,0x80] vgetexpbf16 ymm22 {k7} {z}, word ptr [rdx - 256]{1to16} // CHECK: vgetexpbf16 zmm22, zmmword ptr [rbp + 8*r14 + 268435456] -// CHECK: encoding: [0x62,0xa5,0x7d,0x48,0x42,0xb4,0xf5,0x00,0x00,0x00,0x10] +// CHECK: encoding: [0x62,0xa6,0x7c,0x48,0x42,0xb4,0xf5,0x00,0x00,0x00,0x10] vgetexpbf16 zmm22, zmmword ptr [rbp + 8*r14 + 268435456] // CHECK: vgetexpbf16 zmm22 {k7}, zmmword ptr [r8 + 4*rax + 291] -// CHECK: encoding: [0x62,0xc5,0x7d,0x4f,0x42,0xb4,0x80,0x23,0x01,0x00,0x00] +// CHECK: encoding: [0x62,0xc6,0x7c,0x4f,0x42,0xb4,0x80,0x23,0x01,0x00,0x00] vgetexpbf16 zmm22 {k7}, zmmword ptr [r8 + 4*rax + 291] // CHECK: vgetexpbf16 zmm22, word ptr [rip]{1to32} -// CHECK: encoding: [0x62,0xe5,0x7d,0x58,0x42,0x35,0x00,0x00,0x00,0x00] +// CHECK: encoding: [0x62,0xe6,0x7c,0x58,0x42,0x35,0x00,0x00,0x00,0x00] vgetexpbf16 zmm22, word ptr [rip]{1to32} // CHECK: vgetexpbf16 zmm22, zmmword ptr [2*rbp - 2048] -// CHECK: encoding: [0x62,0xe5,0x7d,0x48,0x42,0x34,0x6d,0x00,0xf8,0xff,0xff] +// CHECK: encoding: [0x62,0xe6,0x7c,0x48,0x42,0x34,0x6d,0x00,0xf8,0xff,0xff] vgetexpbf16 zmm22, zmmword ptr [2*rbp - 2048] // CHECK: vgetexpbf16 zmm22 {k7} {z}, zmmword ptr [rcx + 8128] -// CHECK: encoding: [0x62,0xe5,0x7d,0xcf,0x42,0x71,0x7f] +// CHECK: encoding: [0x62,0xe6,0x7c,0xcf,0x42,0x71,0x7f] vgetexpbf16 zmm22 {k7} {z}, zmmword ptr [rcx + 8128] // CHECK: vgetexpbf16 zmm22 {k7} {z}, word ptr [rdx - 256]{1to32} -// CHECK: encoding: [0x62,0xe5,0x7d,0xdf,0x42,0x72,0x80] +// CHECK: encoding: [0x62,0xe6,0x7c,0xdf,0x42,0x72,0x80] vgetexpbf16 zmm22 {k7} {z}, word ptr [rdx - 256]{1to32} // CHECK: vgetmantbf16 zmm22, zmm23, 123 diff --git a/llvm/test/MC/X86/avx10.2-com-ef-32-att.s b/llvm/test/MC/X86/avx10.2-com-ef-32-att.s index 8883bb3d6775a..5f91ec8370ef1 100644 --- a/llvm/test/MC/X86/avx10.2-com-ef-32-att.s +++ b/llvm/test/MC/X86/avx10.2-com-ef-32-att.s @@ -1,194 +1,194 @@ // RUN: llvm-mc -triple i386 --show-encoding %s | FileCheck %s // CHECK: vcomxsd %xmm3, %xmm2 -// CHECK: encoding: [0x62,0xf1,0xfe,0x08,0x2f,0xd3] +// CHECK: encoding: [0x62,0xf1,0xff,0x08,0x2f,0xd3] vcomxsd %xmm3, %xmm2 // CHECK: vcomxsd {sae}, %xmm3, %xmm2 -// CHECK: encoding: [0x62,0xf1,0xfe,0x18,0x2f,0xd3] +// CHECK: encoding: [0x62,0xf1,0xff,0x18,0x2f,0xd3] vcomxsd {sae}, %xmm3, %xmm2 // CHECK: vcomxsd 268435456(%esp,%esi,8), %xmm2 -// CHECK: encoding: [0x62,0xf1,0xfe,0x08,0x2f,0x94,0xf4,0x00,0x00,0x00,0x10] +// CHECK: encoding: [0x62,0xf1,0xff,0x08,0x2f,0x94,0xf4,0x00,0x00,0x00,0x10] vcomxsd 268435456(%esp,%esi,8), %xmm2 // CHECK: vcomxsd 291(%edi,%eax,4), %xmm2 -// CHECK: encoding: [0x62,0xf1,0xfe,0x08,0x2f,0x94,0x87,0x23,0x01,0x00,0x00] +// CHECK: encoding: [0x62,0xf1,0xff,0x08,0x2f,0x94,0x87,0x23,0x01,0x00,0x00] vcomxsd 291(%edi,%eax,4), %xmm2 // CHECK: vcomxsd (%eax), %xmm2 -// CHECK: encoding: [0x62,0xf1,0xfe,0x08,0x2f,0x10] +// CHECK: encoding: [0x62,0xf1,0xff,0x08,0x2f,0x10] vcomxsd (%eax), %xmm2 // CHECK: vcomxsd -256(,%ebp,2), %xmm2 -// CHECK: encoding: [0x62,0xf1,0xfe,0x08,0x2f,0x14,0x6d,0x00,0xff,0xff,0xff] +// CHECK: encoding: [0x62,0xf1,0xff,0x08,0x2f,0x14,0x6d,0x00,0xff,0xff,0xff] vcomxsd -256(,%ebp,2), %xmm2 // CHECK: vcomxsd 1016(%ecx), %xmm2 -// CHECK: encoding: [0x62,0xf1,0xfe,0x08,0x2f,0x51,0x7f] +// CHECK: encoding: [0x62,0xf1,0xff,0x08,0x2f,0x51,0x7f] vcomxsd 1016(%ecx), %xmm2 // CHECK: vcomxsd -1024(%edx), %xmm2 -// CHECK: encoding: [0x62,0xf1,0xfe,0x08,0x2f,0x52,0x80] +// CHECK: encoding: [0x62,0xf1,0xff,0x08,0x2f,0x52,0x80] vcomxsd -1024(%edx), %xmm2 // CHECK: vcomxsh %xmm3, %xmm2 -// CHECK: encoding: [0x62,0xf5,0x7f,0x08,0x2f,0xd3] +// CHECK: encoding: [0x62,0xf5,0x7e,0x08,0x2f,0xd3] vcomxsh %xmm3, %xmm2 // CHECK: vcomxsh {sae}, %xmm3, %xmm2 -// CHECK: encoding: [0x62,0xf5,0x7f,0x18,0x2f,0xd3] +// CHECK: encoding: [0x62,0xf5,0x7e,0x18,0x2f,0xd3] vcomxsh {sae}, %xmm3, %xmm2 // CHECK: vcomxsh 268435456(%esp,%esi,8), %xmm2 -// CHECK: encoding: [0x62,0xf5,0x7f,0x08,0x2f,0x94,0xf4,0x00,0x00,0x00,0x10] +// CHECK: encoding: [0x62,0xf5,0x7e,0x08,0x2f,0x94,0xf4,0x00,0x00,0x00,0x10] vcomxsh 268435456(%esp,%esi,8), %xmm2 // CHECK: vcomxsh 291(%edi,%eax,4), %xmm2 -// CHECK: encoding: [0x62,0xf5,0x7f,0x08,0x2f,0x94,0x87,0x23,0x01,0x00,0x00] +// CHECK: encoding: [0x62,0xf5,0x7e,0x08,0x2f,0x94,0x87,0x23,0x01,0x00,0x00] vcomxsh 291(%edi,%eax,4), %xmm2 // CHECK: vcomxsh (%eax), %xmm2 -// CHECK: encoding: [0x62,0xf5,0x7f,0x08,0x2f,0x10] +// CHECK: encoding: [0x62,0xf5,0x7e,0x08,0x2f,0x10] vcomxsh (%eax), %xmm2 // CHECK: vcomxsh -64(,%ebp,2), %xmm2 -// CHECK: encoding: [0x62,0xf5,0x7f,0x08,0x2f,0x14,0x6d,0xc0,0xff,0xff,0xff] +// CHECK: encoding: [0x62,0xf5,0x7e,0x08,0x2f,0x14,0x6d,0xc0,0xff,0xff,0xff] vcomxsh -64(,%ebp,2), %xmm2 // CHECK: vcomxsh 254(%ecx), %xmm2 -// CHECK: encoding: [0x62,0xf5,0x7f,0x08,0x2f,0x51,0x7f] +// CHECK: encoding: [0x62,0xf5,0x7e,0x08,0x2f,0x51,0x7f] vcomxsh 254(%ecx), %xmm2 // CHECK: vcomxsh -256(%edx), %xmm2 -// CHECK: encoding: [0x62,0xf5,0x7f,0x08,0x2f,0x52,0x80] +// CHECK: encoding: [0x62,0xf5,0x7e,0x08,0x2f,0x52,0x80] vcomxsh -256(%edx), %xmm2 // CHECK: vcomxss %xmm3, %xmm2 -// CHECK: encoding: [0x62,0xf1,0x7f,0x08,0x2f,0xd3] +// CHECK: encoding: [0x62,0xf1,0x7e,0x08,0x2f,0xd3] vcomxss %xmm3, %xmm2 // CHECK: vcomxss {sae}, %xmm3, %xmm2 -// CHECK: encoding: [0x62,0xf1,0x7f,0x18,0x2f,0xd3] +// CHECK: encoding: [0x62,0xf1,0x7e,0x18,0x2f,0xd3] vcomxss {sae}, %xmm3, %xmm2 // CHECK: vcomxss 268435456(%esp,%esi,8), %xmm2 -// CHECK: encoding: [0x62,0xf1,0x7f,0x08,0x2f,0x94,0xf4,0x00,0x00,0x00,0x10] +// CHECK: encoding: [0x62,0xf1,0x7e,0x08,0x2f,0x94,0xf4,0x00,0x00,0x00,0x10] vcomxss 268435456(%esp,%esi,8), %xmm2 // CHECK: vcomxss 291(%edi,%eax,4), %xmm2 -// CHECK: encoding: [0x62,0xf1,0x7f,0x08,0x2f,0x94,0x87,0x23,0x01,0x00,0x00] +// CHECK: encoding: [0x62,0xf1,0x7e,0x08,0x2f,0x94,0x87,0x23,0x01,0x00,0x00] vcomxss 291(%edi,%eax,4), %xmm2 // CHECK: vcomxss (%eax), %xmm2 -// CHECK: encoding: [0x62,0xf1,0x7f,0x08,0x2f,0x10] +// CHECK: encoding: [0x62,0xf1,0x7e,0x08,0x2f,0x10] vcomxss (%eax), %xmm2 // CHECK: vcomxss -128(,%ebp,2), %xmm2 -// CHECK: encoding: [0x62,0xf1,0x7f,0x08,0x2f,0x14,0x6d,0x80,0xff,0xff,0xff] +// CHECK: encoding: [0x62,0xf1,0x7e,0x08,0x2f,0x14,0x6d,0x80,0xff,0xff,0xff] vcomxss -128(,%ebp,2), %xmm2 // CHECK: vcomxss 508(%ecx), %xmm2 -// CHECK: encoding: [0x62,0xf1,0x7f,0x08,0x2f,0x51,0x7f] +// CHECK: encoding: [0x62,0xf1,0x7e,0x08,0x2f,0x51,0x7f] vcomxss 508(%ecx), %xmm2 // CHECK: vcomxss -512(%edx), %xmm2 -// CHECK: encoding: [0x62,0xf1,0x7f,0x08,0x2f,0x52,0x80] +// CHECK: encoding: [0x62,0xf1,0x7e,0x08,0x2f,0x52,0x80] vcomxss -512(%edx), %xmm2 // CHECK: vucomxsd %xmm3, %xmm2 -// CHECK: encoding: [0x62,0xf1,0xfe,0x08,0x2e,0xd3] +// CHECK: encoding: [0x62,0xf1,0xff,0x08,0x2e,0xd3] vucomxsd %xmm3, %xmm2 // CHECK: vucomxsd {sae}, %xmm3, %xmm2 -// CHECK: encoding: [0x62,0xf1,0xfe,0x18,0x2e,0xd3] +// CHECK: encoding: [0x62,0xf1,0xff,0x18,0x2e,0xd3] vucomxsd {sae}, %xmm3, %xmm2 // CHECK: vucomxsd 268435456(%esp,%esi,8), %xmm2 -// CHECK: encoding: [0x62,0xf1,0xfe,0x08,0x2e,0x94,0xf4,0x00,0x00,0x00,0x10] +// CHECK: encoding: [0x62,0xf1,0xff,0x08,0x2e,0x94,0xf4,0x00,0x00,0x00,0x10] vucomxsd 268435456(%esp,%esi,8), %xmm2 // CHECK: vucomxsd 291(%edi,%eax,4), %xmm2 -// CHECK: encoding: [0x62,0xf1,0xfe,0x08,0x2e,0x94,0x87,0x23,0x01,0x00,0x00] +// CHECK: encoding: [0x62,0xf1,0xff,0x08,0x2e,0x94,0x87,0x23,0x01,0x00,0x00] vucomxsd 291(%edi,%eax,4), %xmm2 // CHECK: vucomxsd (%eax), %xmm2 -// CHECK: encoding: [0x62,0xf1,0xfe,0x08,0x2e,0x10] +// CHECK: encoding: [0x62,0xf1,0xff,0x08,0x2e,0x10] vucomxsd (%eax), %xmm2 // CHECK: vucomxsd -256(,%ebp,2), %xmm2 -// CHECK: encoding: [0x62,0xf1,0xfe,0x08,0x2e,0x14,0x6d,0x00,0xff,0xff,0xff] +// CHECK: encoding: [0x62,0xf1,0xff,0x08,0x2e,0x14,0x6d,0x00,0xff,0xff,0xff] vucomxsd -256(,%ebp,2), %xmm2 // CHECK: vucomxsd 1016(%ecx), %xmm2 -// CHECK: encoding: [0x62,0xf1,0xfe,0x08,0x2e,0x51,0x7f] +// CHECK: encoding: [0x62,0xf1,0xff,0x08,0x2e,0x51,0x7f] vucomxsd 1016(%ecx), %xmm2 // CHECK: vucomxsd -1024(%edx), %xmm2 -// CHECK: encoding: [0x62,0xf1,0xfe,0x08,0x2e,0x52,0x80] +// CHECK: encoding: [0x62,0xf1,0xff,0x08,0x2e,0x52,0x80] vucomxsd -1024(%edx), %xmm2 // CHECK: vucomxsh %xmm3, %xmm2 -// CHECK: encoding: [0x62,0xf5,0x7f,0x08,0x2e,0xd3] +// CHECK: encoding: [0x62,0xf5,0x7e,0x08,0x2e,0xd3] vucomxsh %xmm3, %xmm2 // CHECK: vucomxsh {sae}, %xmm3, %xmm2 -// CHECK: encoding: [0x62,0xf5,0x7f,0x18,0x2e,0xd3] +// CHECK: encoding: [0x62,0xf5,0x7e,0x18,0x2e,0xd3] vucomxsh {sae}, %xmm3, %xmm2 // CHECK: vucomxsh 268435456(%esp,%esi,8), %xmm2 -// CHECK: encoding: [0x62,0xf5,0x7f,0x08,0x2e,0x94,0xf4,0x00,0x00,0x00,0x10] +// CHECK: encoding: [0x62,0xf5,0x7e,0x08,0x2e,0x94,0xf4,0x00,0x00,0x00,0x10] vucomxsh 268435456(%esp,%esi,8), %xmm2 // CHECK: vucomxsh 291(%edi,%eax,4), %xmm2 -// CHECK: encoding: [0x62,0xf5,0x7f,0x08,0x2e,0x94,0x87,0x23,0x01,0x00,0x00] +// CHECK: encoding: [0x62,0xf5,0x7e,0x08,0x2e,0x94,0x87,0x23,0x01,0x00,0x00] vucomxsh 291(%edi,%eax,4), %xmm2 // CHECK: vucomxsh (%eax), %xmm2 -// CHECK: encoding: [0x62,0xf5,0x7f,0x08,0x2e,0x10] +// CHECK: encoding: [0x62,0xf5,0x7e,0x08,0x2e,0x10] vucomxsh (%eax), %xmm2 // CHECK: vucomxsh -64(,%ebp,2), %xmm2 -// CHECK: encoding: [0x62,0xf5,0x7f,0x08,0x2e,0x14,0x6d,0xc0,0xff,0xff,0xff] +// CHECK: encoding: [0x62,0xf5,0x7e,0x08,0x2e,0x14,0x6d,0xc0,0xff,0xff,0xff] vucomxsh -64(,%ebp,2), %xmm2 // CHECK: vucomxsh 254(%ecx), %xmm2 -// CHECK: encoding: [0x62,0xf5,0x7f,0x08,0x2e,0x51,0x7f] +// CHECK: encoding: [0x62,0xf5,0x7e,0x08,0x2e,0x51,0x7f] vucomxsh 254(%ecx), %xmm2 // CHECK: vucomxsh -256(%edx), %xmm2 -// CHECK: encoding: [0x62,0xf5,0x7f,0x08,0x2e,0x52,0x80] +// CHECK: encoding: [0x62,0xf5,0x7e,0x08,0x2e,0x52,0x80] vucomxsh -256(%edx), %xmm2 // CHECK: vucomxss %xmm3, %xmm2 -// CHECK: encoding: [0x62,0xf1,0x7f,0x08,0x2e,0xd3] +// CHECK: encoding: [0x62,0xf1,0x7e,0x08,0x2e,0xd3] vucomxss %xmm3, %xmm2 // CHECK: vucomxss {sae}, %xmm3, %xmm2 -// CHECK: encoding: [0x62,0xf1,0x7f,0x18,0x2e,0xd3] +// CHECK: encoding: [0x62,0xf1,0x7e,0x18,0x2e,0xd3] vucomxss {sae}, %xmm3, %xmm2 // CHECK: vucomxss 268435456(%esp,%esi,8), %xmm2 -// CHECK: encoding: [0x62,0xf1,0x7f,0x08,0x2e,0x94,0xf4,0x00,0x00,0x00,0x10] +// CHECK: encoding: [0x62,0xf1,0x7e,0x08,0x2e,0x94,0xf4,0x00,0x00,0x00,0x10] vucomxss 268435456(%esp,%esi,8), %xmm2 // CHECK: vucomxss 291(%edi,%eax,4), %xmm2 -// CHECK: encoding: [0x62,0xf1,0x7f,0x08,0x2e,0x94,0x87,0x23,0x01,0x00,0x00] +// CHECK: encoding: [0x62,0xf1,0x7e,0x08,0x2e,0x94,0x87,0x23,0x01,0x00,0x00] vucomxss 291(%edi,%eax,4), %xmm2 // CHECK: vucomxss (%eax), %xmm2 -// CHECK: encoding: [0x62,0xf1,0x7f,0x08,0x2e,0x10] +// CHECK: encoding: [0x62,0xf1,0x7e,0x08,0x2e,0x10] vucomxss (%eax), %xmm2 // CHECK: vucomxss -128(,%ebp,2), %xmm2 -// CHECK: encoding: [0x62,0xf1,0x7f,0x08,0x2e,0x14,0x6d,0x80,0xff,0xff,0xff] +// CHECK: encoding: [0x62,0xf1,0x7e,0x08,0x2e,0x14,0x6d,0x80,0xff,0xff,0xff] vucomxss -128(,%ebp,2), %xmm2 // CHECK: vucomxss 508(%ecx), %xmm2 -// CHECK: encoding: [0x62,0xf1,0x7f,0x08,0x2e,0x51,0x7f] +// CHECK: encoding: [0x62,0xf1,0x7e,0x08,0x2e,0x51,0x7f] vucomxss 508(%ecx), %xmm2 // CHECK: vucomxss -512(%edx), %xmm2 -// CHECK: encoding: [0x62,0xf1,0x7f,0x08,0x2e,0x52,0x80] +// CHECK: encoding: [0x62,0xf1,0x7e,0x08,0x2e,0x52,0x80] vucomxss -512(%edx), %xmm2 diff --git a/llvm/test/MC/X86/avx10.2-com-ef-32-intel.s b/llvm/test/MC/X86/avx10.2-com-ef-32-intel.s index 9ff0484db133c..7cbd4e9722ddb 100644 --- a/llvm/test/MC/X86/avx10.2-com-ef-32-intel.s +++ b/llvm/test/MC/X86/avx10.2-com-ef-32-intel.s @@ -1,194 +1,194 @@ // RUN: llvm-mc -triple i386 -x86-asm-syntax=intel -output-asm-variant=1 --show-encoding %s | FileCheck %s // CHECK: vcomxsd xmm2, xmm3 -// CHECK: encoding: [0x62,0xf1,0xfe,0x08,0x2f,0xd3] +// CHECK: encoding: [0x62,0xf1,0xff,0x08,0x2f,0xd3] vcomxsd xmm2, xmm3 // CHECK: vcomxsd xmm2, xmm3, {sae} -// CHECK: encoding: [0x62,0xf1,0xfe,0x18,0x2f,0xd3] +// CHECK: encoding: [0x62,0xf1,0xff,0x18,0x2f,0xd3] vcomxsd xmm2, xmm3, {sae} // CHECK: vcomxsd xmm2, qword ptr [esp + 8*esi + 268435456] -// CHECK: encoding: [0x62,0xf1,0xfe,0x08,0x2f,0x94,0xf4,0x00,0x00,0x00,0x10] +// CHECK: encoding: [0x62,0xf1,0xff,0x08,0x2f,0x94,0xf4,0x00,0x00,0x00,0x10] vcomxsd xmm2, qword ptr [esp + 8*esi + 268435456] // CHECK: vcomxsd xmm2, qword ptr [edi + 4*eax + 291] -// CHECK: encoding: [0x62,0xf1,0xfe,0x08,0x2f,0x94,0x87,0x23,0x01,0x00,0x00] +// CHECK: encoding: [0x62,0xf1,0xff,0x08,0x2f,0x94,0x87,0x23,0x01,0x00,0x00] vcomxsd xmm2, qword ptr [edi + 4*eax + 291] // CHECK: vcomxsd xmm2, qword ptr [eax] -// CHECK: encoding: [0x62,0xf1,0xfe,0x08,0x2f,0x10] +// CHECK: encoding: [0x62,0xf1,0xff,0x08,0x2f,0x10] vcomxsd xmm2, qword ptr [eax] // CHECK: vcomxsd xmm2, qword ptr [2*ebp - 256] -// CHECK: encoding: [0x62,0xf1,0xfe,0x08,0x2f,0x14,0x6d,0x00,0xff,0xff,0xff] +// CHECK: encoding: [0x62,0xf1,0xff,0x08,0x2f,0x14,0x6d,0x00,0xff,0xff,0xff] vcomxsd xmm2, qword ptr [2*ebp - 256] // CHECK: vcomxsd xmm2, qword ptr [ecx + 1016] -// CHECK: encoding: [0x62,0xf1,0xfe,0x08,0x2f,0x51,0x7f] +// CHECK: encoding: [0x62,0xf1,0xff,0x08,0x2f,0x51,0x7f] vcomxsd xmm2, qword ptr [ecx + 1016] // CHECK: vcomxsd xmm2, qword ptr [edx - 1024] -// CHECK: encoding: [0x62,0xf1,0xfe,0x08,0x2f,0x52,0x80] +// CHECK: encoding: [0x62,0xf1,0xff,0x08,0x2f,0x52,0x80] vcomxsd xmm2, qword ptr [edx - 1024] // CHECK: vcomxsh xmm2, xmm3 -// CHECK: encoding: [0x62,0xf5,0x7f,0x08,0x2f,0xd3] +// CHECK: encoding: [0x62,0xf5,0x7e,0x08,0x2f,0xd3] vcomxsh xmm2, xmm3 // CHECK: vcomxsh xmm2, xmm3, {sae} -// CHECK: encoding: [0x62,0xf5,0x7f,0x18,0x2f,0xd3] +// CHECK: encoding: [0x62,0xf5,0x7e,0x18,0x2f,0xd3] vcomxsh xmm2, xmm3, {sae} // CHECK: vcomxsh xmm2, word ptr [esp + 8*esi + 268435456] -// CHECK: encoding: [0x62,0xf5,0x7f,0x08,0x2f,0x94,0xf4,0x00,0x00,0x00,0x10] +// CHECK: encoding: [0x62,0xf5,0x7e,0x08,0x2f,0x94,0xf4,0x00,0x00,0x00,0x10] vcomxsh xmm2, word ptr [esp + 8*esi + 268435456] // CHECK: vcomxsh xmm2, word ptr [edi + 4*eax + 291] -// CHECK: encoding: [0x62,0xf5,0x7f,0x08,0x2f,0x94,0x87,0x23,0x01,0x00,0x00] +// CHECK: encoding: [0x62,0xf5,0x7e,0x08,0x2f,0x94,0x87,0x23,0x01,0x00,0x00] vcomxsh xmm2, word ptr [edi + 4*eax + 291] // CHECK: vcomxsh xmm2, word ptr [eax] -// CHECK: encoding: [0x62,0xf5,0x7f,0x08,0x2f,0x10] +// CHECK: encoding: [0x62,0xf5,0x7e,0x08,0x2f,0x10] vcomxsh xmm2, word ptr [eax] // CHECK: vcomxsh xmm2, word ptr [2*ebp - 64] -// CHECK: encoding: [0x62,0xf5,0x7f,0x08,0x2f,0x14,0x6d,0xc0,0xff,0xff,0xff] +// CHECK: encoding: [0x62,0xf5,0x7e,0x08,0x2f,0x14,0x6d,0xc0,0xff,0xff,0xff] vcomxsh xmm2, word ptr [2*ebp - 64] // CHECK: vcomxsh xmm2, word ptr [ecx + 254] -// CHECK: encoding: [0x62,0xf5,0x7f,0x08,0x2f,0x51,0x7f] +// CHECK: encoding: [0x62,0xf5,0x7e,0x08,0x2f,0x51,0x7f] vcomxsh xmm2, word ptr [ecx + 254] // CHECK: vcomxsh xmm2, word ptr [edx - 256] -// CHECK: encoding: [0x62,0xf5,0x7f,0x08,0x2f,0x52,0x80] +// CHECK: encoding: [0x62,0xf5,0x7e,0x08,0x2f,0x52,0x80] vcomxsh xmm2, word ptr [edx - 256] // CHECK: vcomxss xmm2, xmm3 -// CHECK: encoding: [0x62,0xf1,0x7f,0x08,0x2f,0xd3] +// CHECK: encoding: [0x62,0xf1,0x7e,0x08,0x2f,0xd3] vcomxss xmm2, xmm3 // CHECK: vcomxss xmm2, xmm3, {sae} -// CHECK: encoding: [0x62,0xf1,0x7f,0x18,0x2f,0xd3] +// CHECK: encoding: [0x62,0xf1,0x7e,0x18,0x2f,0xd3] vcomxss xmm2, xmm3, {sae} // CHECK: vcomxss xmm2, dword ptr [esp + 8*esi + 268435456] -// CHECK: encoding: [0x62,0xf1,0x7f,0x08,0x2f,0x94,0xf4,0x00,0x00,0x00,0x10] +// CHECK: encoding: [0x62,0xf1,0x7e,0x08,0x2f,0x94,0xf4,0x00,0x00,0x00,0x10] vcomxss xmm2, dword ptr [esp + 8*esi + 268435456] // CHECK: vcomxss xmm2, dword ptr [edi + 4*eax + 291] -// CHECK: encoding: [0x62,0xf1,0x7f,0x08,0x2f,0x94,0x87,0x23,0x01,0x00,0x00] +// CHECK: encoding: [0x62,0xf1,0x7e,0x08,0x2f,0x94,0x87,0x23,0x01,0x00,0x00] vcomxss xmm2, dword ptr [edi + 4*eax + 291] // CHECK: vcomxss xmm2, dword ptr [eax] -// CHECK: encoding: [0x62,0xf1,0x7f,0x08,0x2f,0x10] +// CHECK: encoding: [0x62,0xf1,0x7e,0x08,0x2f,0x10] vcomxss xmm2, dword ptr [eax] // CHECK: vcomxss xmm2, dword ptr [2*ebp - 128] -// CHECK: encoding: [0x62,0xf1,0x7f,0x08,0x2f,0x14,0x6d,0x80,0xff,0xff,0xff] +// CHECK: encoding: [0x62,0xf1,0x7e,0x08,0x2f,0x14,0x6d,0x80,0xff,0xff,0xff] vcomxss xmm2, dword ptr [2*ebp - 128] // CHECK: vcomxss xmm2, dword ptr [ecx + 508] -// CHECK: encoding: [0x62,0xf1,0x7f,0x08,0x2f,0x51,0x7f] +// CHECK: encoding: [0x62,0xf1,0x7e,0x08,0x2f,0x51,0x7f] vcomxss xmm2, dword ptr [ecx + 508] // CHECK: vcomxss xmm2, dword ptr [edx - 512] -// CHECK: encoding: [0x62,0xf1,0x7f,0x08,0x2f,0x52,0x80] +// CHECK: encoding: [0x62,0xf1,0x7e,0x08,0x2f,0x52,0x80] vcomxss xmm2, dword ptr [edx - 512] // CHECK: vucomxsd xmm2, xmm3 -// CHECK: encoding: [0x62,0xf1,0xfe,0x08,0x2e,0xd3] +// CHECK: encoding: [0x62,0xf1,0xff,0x08,0x2e,0xd3] vucomxsd xmm2, xmm3 // CHECK: vucomxsd xmm2, xmm3, {sae} -// CHECK: encoding: [0x62,0xf1,0xfe,0x18,0x2e,0xd3] +// CHECK: encoding: [0x62,0xf1,0xff,0x18,0x2e,0xd3] vucomxsd xmm2, xmm3, {sae} // CHECK: vucomxsd xmm2, qword ptr [esp + 8*esi + 268435456] -// CHECK: encoding: [0x62,0xf1,0xfe,0x08,0x2e,0x94,0xf4,0x00,0x00,0x00,0x10] +// CHECK: encoding: [0x62,0xf1,0xff,0x08,0x2e,0x94,0xf4,0x00,0x00,0x00,0x10] vucomxsd xmm2, qword ptr [esp + 8*esi + 268435456] // CHECK: vucomxsd xmm2, qword ptr [edi + 4*eax + 291] -// CHECK: encoding: [0x62,0xf1,0xfe,0x08,0x2e,0x94,0x87,0x23,0x01,0x00,0x00] +// CHECK: encoding: [0x62,0xf1,0xff,0x08,0x2e,0x94,0x87,0x23,0x01,0x00,0x00] vucomxsd xmm2, qword ptr [edi + 4*eax + 291] // CHECK: vucomxsd xmm2, qword ptr [eax] -// CHECK: encoding: [0x62,0xf1,0xfe,0x08,0x2e,0x10] +// CHECK: encoding: [0x62,0xf1,0xff,0x08,0x2e,0x10] vucomxsd xmm2, qword ptr [eax] // CHECK: vucomxsd xmm2, qword ptr [2*ebp - 256] -// CHECK: encoding: [0x62,0xf1,0xfe,0x08,0x2e,0x14,0x6d,0x00,0xff,0xff,0xff] +// CHECK: encoding: [0x62,0xf1,0xff,0x08,0x2e,0x14,0x6d,0x00,0xff,0xff,0xff] vucomxsd xmm2, qword ptr [2*ebp - 256] // CHECK: vucomxsd xmm2, qword ptr [ecx + 1016] -// CHECK: encoding: [0x62,0xf1,0xfe,0x08,0x2e,0x51,0x7f] +// CHECK: encoding: [0x62,0xf1,0xff,0x08,0x2e,0x51,0x7f] vucomxsd xmm2, qword ptr [ecx + 1016] // CHECK: vucomxsd xmm2, qword ptr [edx - 1024] -// CHECK: encoding: [0x62,0xf1,0xfe,0x08,0x2e,0x52,0x80] +// CHECK: encoding: [0x62,0xf1,0xff,0x08,0x2e,0x52,0x80] vucomxsd xmm2, qword ptr [edx - 1024] // CHECK: vucomxsh xmm2, xmm3 -// CHECK: encoding: [0x62,0xf5,0x7f,0x08,0x2e,0xd3] +// CHECK: encoding: [0x62,0xf5,0x7e,0x08,0x2e,0xd3] vucomxsh xmm2, xmm3 // CHECK: vucomxsh xmm2, xmm3, {sae} -// CHECK: encoding: [0x62,0xf5,0x7f,0x18,0x2e,0xd3] +// CHECK: encoding: [0x62,0xf5,0x7e,0x18,0x2e,0xd3] vucomxsh xmm2, xmm3, {sae} // CHECK: vucomxsh xmm2, word ptr [esp + 8*esi + 268435456] -// CHECK: encoding: [0x62,0xf5,0x7f,0x08,0x2e,0x94,0xf4,0x00,0x00,0x00,0x10] +// CHECK: encoding: [0x62,0xf5,0x7e,0x08,0x2e,0x94,0xf4,0x00,0x00,0x00,0x10] vucomxsh xmm2, word ptr [esp + 8*esi + 268435456] // CHECK: vucomxsh xmm2, word ptr [edi + 4*eax + 291] -// CHECK: encoding: [0x62,0xf5,0x7f,0x08,0x2e,0x94,0x87,0x23,0x01,0x00,0x00] +// CHECK: encoding: [0x62,0xf5,0x7e,0x08,0x2e,0x94,0x87,0x23,0x01,0x00,0x00] vucomxsh xmm2, word ptr [edi + 4*eax + 291] // CHECK: vucomxsh xmm2, word ptr [eax] -// CHECK: encoding: [0x62,0xf5,0x7f,0x08,0x2e,0x10] +// CHECK: encoding: [0x62,0xf5,0x7e,0x08,0x2e,0x10] vucomxsh xmm2, word ptr [eax] // CHECK: vucomxsh xmm2, word ptr [2*ebp - 64] -// CHECK: encoding: [0x62,0xf5,0x7f,0x08,0x2e,0x14,0x6d,0xc0,0xff,0xff,0xff] +// CHECK: encoding: [0x62,0xf5,0x7e,0x08,0x2e,0x14,0x6d,0xc0,0xff,0xff,0xff] vucomxsh xmm2, word ptr [2*ebp - 64] // CHECK: vucomxsh xmm2, word ptr [ecx + 254] -// CHECK: encoding: [0x62,0xf5,0x7f,0x08,0x2e,0x51,0x7f] +// CHECK: encoding: [0x62,0xf5,0x7e,0x08,0x2e,0x51,0x7f] vucomxsh xmm2, word ptr [ecx + 254] // CHECK: vucomxsh xmm2, word ptr [edx - 256] -// CHECK: encoding: [0x62,0xf5,0x7f,0x08,0x2e,0x52,0x80] +// CHECK: encoding: [0x62,0xf5,0x7e,0x08,0x2e,0x52,0x80] vucomxsh xmm2, word ptr [edx - 256] // CHECK: vucomxss xmm2, xmm3 -// CHECK: encoding: [0x62,0xf1,0x7f,0x08,0x2e,0xd3] +// CHECK: encoding: [0x62,0xf1,0x7e,0x08,0x2e,0xd3] vucomxss xmm2, xmm3 // CHECK: vucomxss xmm2, xmm3, {sae} -// CHECK: encoding: [0x62,0xf1,0x7f,0x18,0x2e,0xd3] +// CHECK: encoding: [0x62,0xf1,0x7e,0x18,0x2e,0xd3] vucomxss xmm2, xmm3, {sae} // CHECK: vucomxss xmm2, dword ptr [esp + 8*esi + 268435456] -// CHECK: encoding: [0x62,0xf1,0x7f,0x08,0x2e,0x94,0xf4,0x00,0x00,0x00,0x10] +// CHECK: encoding: [0x62,0xf1,0x7e,0x08,0x2e,0x94,0xf4,0x00,0x00,0x00,0x10] vucomxss xmm2, dword ptr [esp + 8*esi + 268435456] // CHECK: vucomxss xmm2, dword ptr [edi + 4*eax + 291] -// CHECK: encoding: [0x62,0xf1,0x7f,0x08,0x2e,0x94,0x87,0x23,0x01,0x00,0x00] +// CHECK: encoding: [0x62,0xf1,0x7e,0x08,0x2e,0x94,0x87,0x23,0x01,0x00,0x00] vucomxss xmm2, dword ptr [edi + 4*eax + 291] // CHECK: vucomxss xmm2, dword ptr [eax] -// CHECK: encoding: [0x62,0xf1,0x7f,0x08,0x2e,0x10] +// CHECK: encoding: [0x62,0xf1,0x7e,0x08,0x2e,0x10] vucomxss xmm2, dword ptr [eax] // CHECK: vucomxss xmm2, dword ptr [2*ebp - 128] -// CHECK: encoding: [0x62,0xf1,0x7f,0x08,0x2e,0x14,0x6d,0x80,0xff,0xff,0xff] +// CHECK: encoding: [0x62,0xf1,0x7e,0x08,0x2e,0x14,0x6d,0x80,0xff,0xff,0xff] vucomxss xmm2, dword ptr [2*ebp - 128] // CHECK: vucomxss xmm2, dword ptr [ecx + 508] -// CHECK: encoding: [0x62,0xf1,0x7f,0x08,0x2e,0x51,0x7f] +// CHECK: encoding: [0x62,0xf1,0x7e,0x08,0x2e,0x51,0x7f] vucomxss xmm2, dword ptr [ecx + 508] // CHECK: vucomxss xmm2, dword ptr [edx - 512] -// CHECK: encoding: [0x62,0xf1,0x7f,0x08,0x2e,0x52,0x80] +// CHECK: encoding: [0x62,0xf1,0x7e,0x08,0x2e,0x52,0x80] vucomxss xmm2, dword ptr [edx - 512] diff --git a/llvm/test/MC/X86/avx10.2-com-ef-64-att.s b/llvm/test/MC/X86/avx10.2-com-ef-64-att.s index 2f3690537334a..832151ab23707 100644 --- a/llvm/test/MC/X86/avx10.2-com-ef-64-att.s +++ b/llvm/test/MC/X86/avx10.2-com-ef-64-att.s @@ -1,194 +1,194 @@ // RUN: llvm-mc -triple x86_64 --show-encoding %s | FileCheck %s // CHECK: vcomxsd %xmm23, %xmm22 -// CHECK: encoding: [0x62,0xa1,0xfe,0x08,0x2f,0xf7] +// CHECK: encoding: [0x62,0xa1,0xff,0x08,0x2f,0xf7] vcomxsd %xmm23, %xmm22 // CHECK: vcomxsd {sae}, %xmm23, %xmm22 -// CHECK: encoding: [0x62,0xa1,0xfe,0x18,0x2f,0xf7] +// CHECK: encoding: [0x62,0xa1,0xff,0x18,0x2f,0xf7] vcomxsd {sae}, %xmm23, %xmm22 // CHECK: vcomxsd 268435456(%rbp,%r14,8), %xmm22 -// CHECK: encoding: [0x62,0xa1,0xfe,0x08,0x2f,0xb4,0xf5,0x00,0x00,0x00,0x10] +// CHECK: encoding: [0x62,0xa1,0xff,0x08,0x2f,0xb4,0xf5,0x00,0x00,0x00,0x10] vcomxsd 268435456(%rbp,%r14,8), %xmm22 // CHECK: vcomxsd 291(%r8,%rax,4), %xmm22 -// CHECK: encoding: [0x62,0xc1,0xfe,0x08,0x2f,0xb4,0x80,0x23,0x01,0x00,0x00] +// CHECK: encoding: [0x62,0xc1,0xff,0x08,0x2f,0xb4,0x80,0x23,0x01,0x00,0x00] vcomxsd 291(%r8,%rax,4), %xmm22 // CHECK: vcomxsd (%rip), %xmm22 -// CHECK: encoding: [0x62,0xe1,0xfe,0x08,0x2f,0x35,0x00,0x00,0x00,0x00] +// CHECK: encoding: [0x62,0xe1,0xff,0x08,0x2f,0x35,0x00,0x00,0x00,0x00] vcomxsd (%rip), %xmm22 // CHECK: vcomxsd -256(,%rbp,2), %xmm22 -// CHECK: encoding: [0x62,0xe1,0xfe,0x08,0x2f,0x34,0x6d,0x00,0xff,0xff,0xff] +// CHECK: encoding: [0x62,0xe1,0xff,0x08,0x2f,0x34,0x6d,0x00,0xff,0xff,0xff] vcomxsd -256(,%rbp,2), %xmm22 // CHECK: vcomxsd 1016(%rcx), %xmm22 -// CHECK: encoding: [0x62,0xe1,0xfe,0x08,0x2f,0x71,0x7f] +// CHECK: encoding: [0x62,0xe1,0xff,0x08,0x2f,0x71,0x7f] vcomxsd 1016(%rcx), %xmm22 // CHECK: vcomxsd -1024(%rdx), %xmm22 -// CHECK: encoding: [0x62,0xe1,0xfe,0x08,0x2f,0x72,0x80] +// CHECK: encoding: [0x62,0xe1,0xff,0x08,0x2f,0x72,0x80] vcomxsd -1024(%rdx), %xmm22 // CHECK: vcomxsh %xmm23, %xmm22 -// CHECK: encoding: [0x62,0xa5,0x7f,0x08,0x2f,0xf7] +// CHECK: encoding: [0x62,0xa5,0x7e,0x08,0x2f,0xf7] vcomxsh %xmm23, %xmm22 // CHECK: vcomxsh {sae}, %xmm23, %xmm22 -// CHECK: encoding: [0x62,0xa5,0x7f,0x18,0x2f,0xf7] +// CHECK: encoding: [0x62,0xa5,0x7e,0x18,0x2f,0xf7] vcomxsh {sae}, %xmm23, %xmm22 // CHECK: vcomxsh 268435456(%rbp,%r14,8), %xmm22 -// CHECK: encoding: [0x62,0xa5,0x7f,0x08,0x2f,0xb4,0xf5,0x00,0x00,0x00,0x10] +// CHECK: encoding: [0x62,0xa5,0x7e,0x08,0x2f,0xb4,0xf5,0x00,0x00,0x00,0x10] vcomxsh 268435456(%rbp,%r14,8), %xmm22 // CHECK: vcomxsh 291(%r8,%rax,4), %xmm22 -// CHECK: encoding: [0x62,0xc5,0x7f,0x08,0x2f,0xb4,0x80,0x23,0x01,0x00,0x00] +// CHECK: encoding: [0x62,0xc5,0x7e,0x08,0x2f,0xb4,0x80,0x23,0x01,0x00,0x00] vcomxsh 291(%r8,%rax,4), %xmm22 // CHECK: vcomxsh (%rip), %xmm22 -// CHECK: encoding: [0x62,0xe5,0x7f,0x08,0x2f,0x35,0x00,0x00,0x00,0x00] +// CHECK: encoding: [0x62,0xe5,0x7e,0x08,0x2f,0x35,0x00,0x00,0x00,0x00] vcomxsh (%rip), %xmm22 // CHECK: vcomxsh -64(,%rbp,2), %xmm22 -// CHECK: encoding: [0x62,0xe5,0x7f,0x08,0x2f,0x34,0x6d,0xc0,0xff,0xff,0xff] +// CHECK: encoding: [0x62,0xe5,0x7e,0x08,0x2f,0x34,0x6d,0xc0,0xff,0xff,0xff] vcomxsh -64(,%rbp,2), %xmm22 // CHECK: vcomxsh 254(%rcx), %xmm22 -// CHECK: encoding: [0x62,0xe5,0x7f,0x08,0x2f,0x71,0x7f] +// CHECK: encoding: [0x62,0xe5,0x7e,0x08,0x2f,0x71,0x7f] vcomxsh 254(%rcx), %xmm22 // CHECK: vcomxsh -256(%rdx), %xmm22 -// CHECK: encoding: [0x62,0xe5,0x7f,0x08,0x2f,0x72,0x80] +// CHECK: encoding: [0x62,0xe5,0x7e,0x08,0x2f,0x72,0x80] vcomxsh -256(%rdx), %xmm22 // CHECK: vcomxss %xmm23, %xmm22 -// CHECK: encoding: [0x62,0xa1,0x7f,0x08,0x2f,0xf7] +// CHECK: encoding: [0x62,0xa1,0x7e,0x08,0x2f,0xf7] vcomxss %xmm23, %xmm22 // CHECK: vcomxss {sae}, %xmm23, %xmm22 -// CHECK: encoding: [0x62,0xa1,0x7f,0x18,0x2f,0xf7] +// CHECK: encoding: [0x62,0xa1,0x7e,0x18,0x2f,0xf7] vcomxss {sae}, %xmm23, %xmm22 // CHECK: vcomxss 268435456(%rbp,%r14,8), %xmm22 -// CHECK: encoding: [0x62,0xa1,0x7f,0x08,0x2f,0xb4,0xf5,0x00,0x00,0x00,0x10] +// CHECK: encoding: [0x62,0xa1,0x7e,0x08,0x2f,0xb4,0xf5,0x00,0x00,0x00,0x10] vcomxss 268435456(%rbp,%r14,8), %xmm22 // CHECK: vcomxss 291(%r8,%rax,4), %xmm22 -// CHECK: encoding: [0x62,0xc1,0x7f,0x08,0x2f,0xb4,0x80,0x23,0x01,0x00,0x00] +// CHECK: encoding: [0x62,0xc1,0x7e,0x08,0x2f,0xb4,0x80,0x23,0x01,0x00,0x00] vcomxss 291(%r8,%rax,4), %xmm22 // CHECK: vcomxss (%rip), %xmm22 -// CHECK: encoding: [0x62,0xe1,0x7f,0x08,0x2f,0x35,0x00,0x00,0x00,0x00] +// CHECK: encoding: [0x62,0xe1,0x7e,0x08,0x2f,0x35,0x00,0x00,0x00,0x00] vcomxss (%rip), %xmm22 // CHECK: vcomxss -128(,%rbp,2), %xmm22 -// CHECK: encoding: [0x62,0xe1,0x7f,0x08,0x2f,0x34,0x6d,0x80,0xff,0xff,0xff] +// CHECK: encoding: [0x62,0xe1,0x7e,0x08,0x2f,0x34,0x6d,0x80,0xff,0xff,0xff] vcomxss -128(,%rbp,2), %xmm22 // CHECK: vcomxss 508(%rcx), %xmm22 -// CHECK: encoding: [0x62,0xe1,0x7f,0x08,0x2f,0x71,0x7f] +// CHECK: encoding: [0x62,0xe1,0x7e,0x08,0x2f,0x71,0x7f] vcomxss 508(%rcx), %xmm22 // CHECK: vcomxss -512(%rdx), %xmm22 -// CHECK: encoding: [0x62,0xe1,0x7f,0x08,0x2f,0x72,0x80] +// CHECK: encoding: [0x62,0xe1,0x7e,0x08,0x2f,0x72,0x80] vcomxss -512(%rdx), %xmm22 // CHECK: vucomxsd %xmm23, %xmm22 -// CHECK: encoding: [0x62,0xa1,0xfe,0x08,0x2e,0xf7] +// CHECK: encoding: [0x62,0xa1,0xff,0x08,0x2e,0xf7] vucomxsd %xmm23, %xmm22 // CHECK: vucomxsd {sae}, %xmm23, %xmm22 -// CHECK: encoding: [0x62,0xa1,0xfe,0x18,0x2e,0xf7] +// CHECK: encoding: [0x62,0xa1,0xff,0x18,0x2e,0xf7] vucomxsd {sae}, %xmm23, %xmm22 // CHECK: vucomxsd 268435456(%rbp,%r14,8), %xmm22 -// CHECK: encoding: [0x62,0xa1,0xfe,0x08,0x2e,0xb4,0xf5,0x00,0x00,0x00,0x10] +// CHECK: encoding: [0x62,0xa1,0xff,0x08,0x2e,0xb4,0xf5,0x00,0x00,0x00,0x10] vucomxsd 268435456(%rbp,%r14,8), %xmm22 // CHECK: vucomxsd 291(%r8,%rax,4), %xmm22 -// CHECK: encoding: [0x62,0xc1,0xfe,0x08,0x2e,0xb4,0x80,0x23,0x01,0x00,0x00] +// CHECK: encoding: [0x62,0xc1,0xff,0x08,0x2e,0xb4,0x80,0x23,0x01,0x00,0x00] vucomxsd 291(%r8,%rax,4), %xmm22 // CHECK: vucomxsd (%rip), %xmm22 -// CHECK: encoding: [0x62,0xe1,0xfe,0x08,0x2e,0x35,0x00,0x00,0x00,0x00] +// CHECK: encoding: [0x62,0xe1,0xff,0x08,0x2e,0x35,0x00,0x00,0x00,0x00] vucomxsd (%rip), %xmm22 // CHECK: vucomxsd -256(,%rbp,2), %xmm22 -// CHECK: encoding: [0x62,0xe1,0xfe,0x08,0x2e,0x34,0x6d,0x00,0xff,0xff,0xff] +// CHECK: encoding: [0x62,0xe1,0xff,0x08,0x2e,0x34,0x6d,0x00,0xff,0xff,0xff] vucomxsd -256(,%rbp,2), %xmm22 // CHECK: vucomxsd 1016(%rcx), %xmm22 -// CHECK: encoding: [0x62,0xe1,0xfe,0x08,0x2e,0x71,0x7f] +// CHECK: encoding: [0x62,0xe1,0xff,0x08,0x2e,0x71,0x7f] vucomxsd 1016(%rcx), %xmm22 // CHECK: vucomxsd -1024(%rdx), %xmm22 -// CHECK: encoding: [0x62,0xe1,0xfe,0x08,0x2e,0x72,0x80] +// CHECK: encoding: [0x62,0xe1,0xff,0x08,0x2e,0x72,0x80] vucomxsd -1024(%rdx), %xmm22 // CHECK: vucomxsh %xmm23, %xmm22 -// CHECK: encoding: [0x62,0xa5,0x7f,0x08,0x2e,0xf7] +// CHECK: encoding: [0x62,0xa5,0x7e,0x08,0x2e,0xf7] vucomxsh %xmm23, %xmm22 // CHECK: vucomxsh {sae}, %xmm23, %xmm22 -// CHECK: encoding: [0x62,0xa5,0x7f,0x18,0x2e,0xf7] +// CHECK: encoding: [0x62,0xa5,0x7e,0x18,0x2e,0xf7] vucomxsh {sae}, %xmm23, %xmm22 // CHECK: vucomxsh 268435456(%rbp,%r14,8), %xmm22 -// CHECK: encoding: [0x62,0xa5,0x7f,0x08,0x2e,0xb4,0xf5,0x00,0x00,0x00,0x10] +// CHECK: encoding: [0x62,0xa5,0x7e,0x08,0x2e,0xb4,0xf5,0x00,0x00,0x00,0x10] vucomxsh 268435456(%rbp,%r14,8), %xmm22 // CHECK: vucomxsh 291(%r8,%rax,4), %xmm22 -// CHECK: encoding: [0x62,0xc5,0x7f,0x08,0x2e,0xb4,0x80,0x23,0x01,0x00,0x00] +// CHECK: encoding: [0x62,0xc5,0x7e,0x08,0x2e,0xb4,0x80,0x23,0x01,0x00,0x00] vucomxsh 291(%r8,%rax,4), %xmm22 // CHECK: vucomxsh (%rip), %xmm22 -// CHECK: encoding: [0x62,0xe5,0x7f,0x08,0x2e,0x35,0x00,0x00,0x00,0x00] +// CHECK: encoding: [0x62,0xe5,0x7e,0x08,0x2e,0x35,0x00,0x00,0x00,0x00] vucomxsh (%rip), %xmm22 // CHECK: vucomxsh -64(,%rbp,2), %xmm22 -// CHECK: encoding: [0x62,0xe5,0x7f,0x08,0x2e,0x34,0x6d,0xc0,0xff,0xff,0xff] +// CHECK: encoding: [0x62,0xe5,0x7e,0x08,0x2e,0x34,0x6d,0xc0,0xff,0xff,0xff] vucomxsh -64(,%rbp,2), %xmm22 // CHECK: vucomxsh 254(%rcx), %xmm22 -// CHECK: encoding: [0x62,0xe5,0x7f,0x08,0x2e,0x71,0x7f] +// CHECK: encoding: [0x62,0xe5,0x7e,0x08,0x2e,0x71,0x7f] vucomxsh 254(%rcx), %xmm22 // CHECK: vucomxsh -256(%rdx), %xmm22 -// CHECK: encoding: [0x62,0xe5,0x7f,0x08,0x2e,0x72,0x80] +// CHECK: encoding: [0x62,0xe5,0x7e,0x08,0x2e,0x72,0x80] vucomxsh -256(%rdx), %xmm22 // CHECK: vucomxss %xmm23, %xmm22 -// CHECK: encoding: [0x62,0xa1,0x7f,0x08,0x2e,0xf7] +// CHECK: encoding: [0x62,0xa1,0x7e,0x08,0x2e,0xf7] vucomxss %xmm23, %xmm22 // CHECK: vucomxss {sae}, %xmm23, %xmm22 -// CHECK: encoding: [0x62,0xa1,0x7f,0x18,0x2e,0xf7] +// CHECK: encoding: [0x62,0xa1,0x7e,0x18,0x2e,0xf7] vucomxss {sae}, %xmm23, %xmm22 // CHECK: vucomxss 268435456(%rbp,%r14,8), %xmm22 -// CHECK: encoding: [0x62,0xa1,0x7f,0x08,0x2e,0xb4,0xf5,0x00,0x00,0x00,0x10] +// CHECK: encoding: [0x62,0xa1,0x7e,0x08,0x2e,0xb4,0xf5,0x00,0x00,0x00,0x10] vucomxss 268435456(%rbp,%r14,8), %xmm22 // CHECK: vucomxss 291(%r8,%rax,4), %xmm22 -// CHECK: encoding: [0x62,0xc1,0x7f,0x08,0x2e,0xb4,0x80,0x23,0x01,0x00,0x00] +// CHECK: encoding: [0x62,0xc1,0x7e,0x08,0x2e,0xb4,0x80,0x23,0x01,0x00,0x00] vucomxss 291(%r8,%rax,4), %xmm22 // CHECK: vucomxss (%rip), %xmm22 -// CHECK: encoding: [0x62,0xe1,0x7f,0x08,0x2e,0x35,0x00,0x00,0x00,0x00] +// CHECK: encoding: [0x62,0xe1,0x7e,0x08,0x2e,0x35,0x00,0x00,0x00,0x00] vucomxss (%rip), %xmm22 // CHECK: vucomxss -128(,%rbp,2), %xmm22 -// CHECK: encoding: [0x62,0xe1,0x7f,0x08,0x2e,0x34,0x6d,0x80,0xff,0xff,0xff] +// CHECK: encoding: [0x62,0xe1,0x7e,0x08,0x2e,0x34,0x6d,0x80,0xff,0xff,0xff] vucomxss -128(,%rbp,2), %xmm22 // CHECK: vucomxss 508(%rcx), %xmm22 -// CHECK: encoding: [0x62,0xe1,0x7f,0x08,0x2e,0x71,0x7f] +// CHECK: encoding: [0x62,0xe1,0x7e,0x08,0x2e,0x71,0x7f] vucomxss 508(%rcx), %xmm22 // CHECK: vucomxss -512(%rdx), %xmm22 -// CHECK: encoding: [0x62,0xe1,0x7f,0x08,0x2e,0x72,0x80] +// CHECK: encoding: [0x62,0xe1,0x7e,0x08,0x2e,0x72,0x80] vucomxss -512(%rdx), %xmm22 diff --git a/llvm/test/MC/X86/avx10.2-com-ef-64-intel.s b/llvm/test/MC/X86/avx10.2-com-ef-64-intel.s index 41aaf99270b88..94e3b77984c83 100644 --- a/llvm/test/MC/X86/avx10.2-com-ef-64-intel.s +++ b/llvm/test/MC/X86/avx10.2-com-ef-64-intel.s @@ -1,194 +1,194 @@ // RUN: llvm-mc -triple x86_64 -x86-asm-syntax=intel -output-asm-variant=1 --show-encoding %s | FileCheck %s // CHECK: vcomxsd xmm22, xmm23 -// CHECK: encoding: [0x62,0xa1,0xfe,0x08,0x2f,0xf7] +// CHECK: encoding: [0x62,0xa1,0xff,0x08,0x2f,0xf7] vcomxsd xmm22, xmm23 // CHECK: vcomxsd xmm22, xmm23, {sae} -// CHECK: encoding: [0x62,0xa1,0xfe,0x18,0x2f,0xf7] +// CHECK: encoding: [0x62,0xa1,0xff,0x18,0x2f,0xf7] vcomxsd xmm22, xmm23, {sae} // CHECK: vcomxsd xmm22, qword ptr [rbp + 8*r14 + 268435456] -// CHECK: encoding: [0x62,0xa1,0xfe,0x08,0x2f,0xb4,0xf5,0x00,0x00,0x00,0x10] +// CHECK: encoding: [0x62,0xa1,0xff,0x08,0x2f,0xb4,0xf5,0x00,0x00,0x00,0x10] vcomxsd xmm22, qword ptr [rbp + 8*r14 + 268435456] // CHECK: vcomxsd xmm22, qword ptr [r8 + 4*rax + 291] -// CHECK: encoding: [0x62,0xc1,0xfe,0x08,0x2f,0xb4,0x80,0x23,0x01,0x00,0x00] +// CHECK: encoding: [0x62,0xc1,0xff,0x08,0x2f,0xb4,0x80,0x23,0x01,0x00,0x00] vcomxsd xmm22, qword ptr [r8 + 4*rax + 291] // CHECK: vcomxsd xmm22, qword ptr [rip] -// CHECK: encoding: [0x62,0xe1,0xfe,0x08,0x2f,0x35,0x00,0x00,0x00,0x00] +// CHECK: encoding: [0x62,0xe1,0xff,0x08,0x2f,0x35,0x00,0x00,0x00,0x00] vcomxsd xmm22, qword ptr [rip] // CHECK: vcomxsd xmm22, qword ptr [2*rbp - 256] -// CHECK: encoding: [0x62,0xe1,0xfe,0x08,0x2f,0x34,0x6d,0x00,0xff,0xff,0xff] +// CHECK: encoding: [0x62,0xe1,0xff,0x08,0x2f,0x34,0x6d,0x00,0xff,0xff,0xff] vcomxsd xmm22, qword ptr [2*rbp - 256] // CHECK: vcomxsd xmm22, qword ptr [rcx + 1016] -// CHECK: encoding: [0x62,0xe1,0xfe,0x08,0x2f,0x71,0x7f] +// CHECK: encoding: [0x62,0xe1,0xff,0x08,0x2f,0x71,0x7f] vcomxsd xmm22, qword ptr [rcx + 1016] // CHECK: vcomxsd xmm22, qword ptr [rdx - 1024] -// CHECK: encoding: [0x62,0xe1,0xfe,0x08,0x2f,0x72,0x80] +// CHECK: encoding: [0x62,0xe1,0xff,0x08,0x2f,0x72,0x80] vcomxsd xmm22, qword ptr [rdx - 1024] // CHECK: vcomxsh xmm22, xmm23 -// CHECK: encoding: [0x62,0xa5,0x7f,0x08,0x2f,0xf7] +// CHECK: encoding: [0x62,0xa5,0x7e,0x08,0x2f,0xf7] vcomxsh xmm22, xmm23 // CHECK: vcomxsh xmm22, xmm23, {sae} -// CHECK: encoding: [0x62,0xa5,0x7f,0x18,0x2f,0xf7] +// CHECK: encoding: [0x62,0xa5,0x7e,0x18,0x2f,0xf7] vcomxsh xmm22, xmm23, {sae} // CHECK: vcomxsh xmm22, word ptr [rbp + 8*r14 + 268435456] -// CHECK: encoding: [0x62,0xa5,0x7f,0x08,0x2f,0xb4,0xf5,0x00,0x00,0x00,0x10] +// CHECK: encoding: [0x62,0xa5,0x7e,0x08,0x2f,0xb4,0xf5,0x00,0x00,0x00,0x10] vcomxsh xmm22, word ptr [rbp + 8*r14 + 268435456] // CHECK: vcomxsh xmm22, word ptr [r8 + 4*rax + 291] -// CHECK: encoding: [0x62,0xc5,0x7f,0x08,0x2f,0xb4,0x80,0x23,0x01,0x00,0x00] +// CHECK: encoding: [0x62,0xc5,0x7e,0x08,0x2f,0xb4,0x80,0x23,0x01,0x00,0x00] vcomxsh xmm22, word ptr [r8 + 4*rax + 291] // CHECK: vcomxsh xmm22, word ptr [rip] -// CHECK: encoding: [0x62,0xe5,0x7f,0x08,0x2f,0x35,0x00,0x00,0x00,0x00] +// CHECK: encoding: [0x62,0xe5,0x7e,0x08,0x2f,0x35,0x00,0x00,0x00,0x00] vcomxsh xmm22, word ptr [rip] // CHECK: vcomxsh xmm22, word ptr [2*rbp - 64] -// CHECK: encoding: [0x62,0xe5,0x7f,0x08,0x2f,0x34,0x6d,0xc0,0xff,0xff,0xff] +// CHECK: encoding: [0x62,0xe5,0x7e,0x08,0x2f,0x34,0x6d,0xc0,0xff,0xff,0xff] vcomxsh xmm22, word ptr [2*rbp - 64] // CHECK: vcomxsh xmm22, word ptr [rcx + 254] -// CHECK: encoding: [0x62,0xe5,0x7f,0x08,0x2f,0x71,0x7f] +// CHECK: encoding: [0x62,0xe5,0x7e,0x08,0x2f,0x71,0x7f] vcomxsh xmm22, word ptr [rcx + 254] // CHECK: vcomxsh xmm22, word ptr [rdx - 256] -// CHECK: encoding: [0x62,0xe5,0x7f,0x08,0x2f,0x72,0x80] +// CHECK: encoding: [0x62,0xe5,0x7e,0x08,0x2f,0x72,0x80] vcomxsh xmm22, word ptr [rdx - 256] // CHECK: vcomxss xmm22, xmm23 -// CHECK: encoding: [0x62,0xa1,0x7f,0x08,0x2f,0xf7] +// CHECK: encoding: [0x62,0xa1,0x7e,0x08,0x2f,0xf7] vcomxss xmm22, xmm23 // CHECK: vcomxss xmm22, xmm23, {sae} -// CHECK: encoding: [0x62,0xa1,0x7f,0x18,0x2f,0xf7] +// CHECK: encoding: [0x62,0xa1,0x7e,0x18,0x2f,0xf7] vcomxss xmm22, xmm23, {sae} // CHECK: vcomxss xmm22, dword ptr [rbp + 8*r14 + 268435456] -// CHECK: encoding: [0x62,0xa1,0x7f,0x08,0x2f,0xb4,0xf5,0x00,0x00,0x00,0x10] +// CHECK: encoding: [0x62,0xa1,0x7e,0x08,0x2f,0xb4,0xf5,0x00,0x00,0x00,0x10] vcomxss xmm22, dword ptr [rbp + 8*r14 + 268435456] // CHECK: vcomxss xmm22, dword ptr [r8 + 4*rax + 291] -// CHECK: encoding: [0x62,0xc1,0x7f,0x08,0x2f,0xb4,0x80,0x23,0x01,0x00,0x00] +// CHECK: encoding: [0x62,0xc1,0x7e,0x08,0x2f,0xb4,0x80,0x23,0x01,0x00,0x00] vcomxss xmm22, dword ptr [r8 + 4*rax + 291] // CHECK: vcomxss xmm22, dword ptr [rip] -// CHECK: encoding: [0x62,0xe1,0x7f,0x08,0x2f,0x35,0x00,0x00,0x00,0x00] +// CHECK: encoding: [0x62,0xe1,0x7e,0x08,0x2f,0x35,0x00,0x00,0x00,0x00] vcomxss xmm22, dword ptr [rip] // CHECK: vcomxss xmm22, dword ptr [2*rbp - 128] -// CHECK: encoding: [0x62,0xe1,0x7f,0x08,0x2f,0x34,0x6d,0x80,0xff,0xff,0xff] +// CHECK: encoding: [0x62,0xe1,0x7e,0x08,0x2f,0x34,0x6d,0x80,0xff,0xff,0xff] vcomxss xmm22, dword ptr [2*rbp - 128] // CHECK: vcomxss xmm22, dword ptr [rcx + 508] -// CHECK: encoding: [0x62,0xe1,0x7f,0x08,0x2f,0x71,0x7f] +// CHECK: encoding: [0x62,0xe1,0x7e,0x08,0x2f,0x71,0x7f] vcomxss xmm22, dword ptr [rcx + 508] // CHECK: vcomxss xmm22, dword ptr [rdx - 512] -// CHECK: encoding: [0x62,0xe1,0x7f,0x08,0x2f,0x72,0x80] +// CHECK: encoding: [0x62,0xe1,0x7e,0x08,0x2f,0x72,0x80] vcomxss xmm22, dword ptr [rdx - 512] // CHECK: vucomxsd xmm22, xmm23 -// CHECK: encoding: [0x62,0xa1,0xfe,0x08,0x2e,0xf7] +// CHECK: encoding: [0x62,0xa1,0xff,0x08,0x2e,0xf7] vucomxsd xmm22, xmm23 // CHECK: vucomxsd xmm22, xmm23, {sae} -// CHECK: encoding: [0x62,0xa1,0xfe,0x18,0x2e,0xf7] +// CHECK: encoding: [0x62,0xa1,0xff,0x18,0x2e,0xf7] vucomxsd xmm22, xmm23, {sae} // CHECK: vucomxsd xmm22, qword ptr [rbp + 8*r14 + 268435456] -// CHECK: encoding: [0x62,0xa1,0xfe,0x08,0x2e,0xb4,0xf5,0x00,0x00,0x00,0x10] +// CHECK: encoding: [0x62,0xa1,0xff,0x08,0x2e,0xb4,0xf5,0x00,0x00,0x00,0x10] vucomxsd xmm22, qword ptr [rbp + 8*r14 + 268435456] // CHECK: vucomxsd xmm22, qword ptr [r8 + 4*rax + 291] -// CHECK: encoding: [0x62,0xc1,0xfe,0x08,0x2e,0xb4,0x80,0x23,0x01,0x00,0x00] +// CHECK: encoding: [0x62,0xc1,0xff,0x08,0x2e,0xb4,0x80,0x23,0x01,0x00,0x00] vucomxsd xmm22, qword ptr [r8 + 4*rax + 291] // CHECK: vucomxsd xmm22, qword ptr [rip] -// CHECK: encoding: [0x62,0xe1,0xfe,0x08,0x2e,0x35,0x00,0x00,0x00,0x00] +// CHECK: encoding: [0x62,0xe1,0xff,0x08,0x2e,0x35,0x00,0x00,0x00,0x00] vucomxsd xmm22, qword ptr [rip] // CHECK: vucomxsd xmm22, qword ptr [2*rbp - 256] -// CHECK: encoding: [0x62,0xe1,0xfe,0x08,0x2e,0x34,0x6d,0x00,0xff,0xff,0xff] +// CHECK: encoding: [0x62,0xe1,0xff,0x08,0x2e,0x34,0x6d,0x00,0xff,0xff,0xff] vucomxsd xmm22, qword ptr [2*rbp - 256] // CHECK: vucomxsd xmm22, qword ptr [rcx + 1016] -// CHECK: encoding: [0x62,0xe1,0xfe,0x08,0x2e,0x71,0x7f] +// CHECK: encoding: [0x62,0xe1,0xff,0x08,0x2e,0x71,0x7f] vucomxsd xmm22, qword ptr [rcx + 1016] // CHECK: vucomxsd xmm22, qword ptr [rdx - 1024] -// CHECK: encoding: [0x62,0xe1,0xfe,0x08,0x2e,0x72,0x80] +// CHECK: encoding: [0x62,0xe1,0xff,0x08,0x2e,0x72,0x80] vucomxsd xmm22, qword ptr [rdx - 1024] // CHECK: vucomxsh xmm22, xmm23 -// CHECK: encoding: [0x62,0xa5,0x7f,0x08,0x2e,0xf7] +// CHECK: encoding: [0x62,0xa5,0x7e,0x08,0x2e,0xf7] vucomxsh xmm22, xmm23 // CHECK: vucomxsh xmm22, xmm23, {sae} -// CHECK: encoding: [0x62,0xa5,0x7f,0x18,0x2e,0xf7] +// CHECK: encoding: [0x62,0xa5,0x7e,0x18,0x2e,0xf7] vucomxsh xmm22, xmm23, {sae} // CHECK: vucomxsh xmm22, word ptr [rbp + 8*r14 + 268435456] -// CHECK: encoding: [0x62,0xa5,0x7f,0x08,0x2e,0xb4,0xf5,0x00,0x00,0x00,0x10] +// CHECK: encoding: [0x62,0xa5,0x7e,0x08,0x2e,0xb4,0xf5,0x00,0x00,0x00,0x10] vucomxsh xmm22, word ptr [rbp + 8*r14 + 268435456] // CHECK: vucomxsh xmm22, word ptr [r8 + 4*rax + 291] -// CHECK: encoding: [0x62,0xc5,0x7f,0x08,0x2e,0xb4,0x80,0x23,0x01,0x00,0x00] +// CHECK: encoding: [0x62,0xc5,0x7e,0x08,0x2e,0xb4,0x80,0x23,0x01,0x00,0x00] vucomxsh xmm22, word ptr [r8 + 4*rax + 291] // CHECK: vucomxsh xmm22, word ptr [rip] -// CHECK: encoding: [0x62,0xe5,0x7f,0x08,0x2e,0x35,0x00,0x00,0x00,0x00] +// CHECK: encoding: [0x62,0xe5,0x7e,0x08,0x2e,0x35,0x00,0x00,0x00,0x00] vucomxsh xmm22, word ptr [rip] // CHECK: vucomxsh xmm22, word ptr [2*rbp - 64] -// CHECK: encoding: [0x62,0xe5,0x7f,0x08,0x2e,0x34,0x6d,0xc0,0xff,0xff,0xff] +// CHECK: encoding: [0x62,0xe5,0x7e,0x08,0x2e,0x34,0x6d,0xc0,0xff,0xff,0xff] vucomxsh xmm22, word ptr [2*rbp - 64] // CHECK: vucomxsh xmm22, word ptr [rcx + 254] -// CHECK: encoding: [0x62,0xe5,0x7f,0x08,0x2e,0x71,0x7f] +// CHECK: encoding: [0x62,0xe5,0x7e,0x08,0x2e,0x71,0x7f] vucomxsh xmm22, word ptr [rcx + 254] // CHECK: vucomxsh xmm22, word ptr [rdx - 256] -// CHECK: encoding: [0x62,0xe5,0x7f,0x08,0x2e,0x72,0x80] +// CHECK: encoding: [0x62,0xe5,0x7e,0x08,0x2e,0x72,0x80] vucomxsh xmm22, word ptr [rdx - 256] // CHECK: vucomxss xmm22, xmm23 -// CHECK: encoding: [0x62,0xa1,0x7f,0x08,0x2e,0xf7] +// CHECK: encoding: [0x62,0xa1,0x7e,0x08,0x2e,0xf7] vucomxss xmm22, xmm23 // CHECK: vucomxss xmm22, xmm23, {sae} -// CHECK: encoding: [0x62,0xa1,0x7f,0x18,0x2e,0xf7] +// CHECK: encoding: [0x62,0xa1,0x7e,0x18,0x2e,0xf7] vucomxss xmm22, xmm23, {sae} // CHECK: vucomxss xmm22, dword ptr [rbp + 8*r14 + 268435456] -// CHECK: encoding: [0x62,0xa1,0x7f,0x08,0x2e,0xb4,0xf5,0x00,0x00,0x00,0x10] +// CHECK: encoding: [0x62,0xa1,0x7e,0x08,0x2e,0xb4,0xf5,0x00,0x00,0x00,0x10] vucomxss xmm22, dword ptr [rbp + 8*r14 + 268435456] // CHECK: vucomxss xmm22, dword ptr [r8 + 4*rax + 291] -// CHECK: encoding: [0x62,0xc1,0x7f,0x08,0x2e,0xb4,0x80,0x23,0x01,0x00,0x00] +// CHECK: encoding: [0x62,0xc1,0x7e,0x08,0x2e,0xb4,0x80,0x23,0x01,0x00,0x00] vucomxss xmm22, dword ptr [r8 + 4*rax + 291] // CHECK: vucomxss xmm22, dword ptr [rip] -// CHECK: encoding: [0x62,0xe1,0x7f,0x08,0x2e,0x35,0x00,0x00,0x00,0x00] +// CHECK: encoding: [0x62,0xe1,0x7e,0x08,0x2e,0x35,0x00,0x00,0x00,0x00] vucomxss xmm22, dword ptr [rip] // CHECK: vucomxss xmm22, dword ptr [2*rbp - 128] -// CHECK: encoding: [0x62,0xe1,0x7f,0x08,0x2e,0x34,0x6d,0x80,0xff,0xff,0xff] +// CHECK: encoding: [0x62,0xe1,0x7e,0x08,0x2e,0x34,0x6d,0x80,0xff,0xff,0xff] vucomxss xmm22, dword ptr [2*rbp - 128] // CHECK: vucomxss xmm22, dword ptr [rcx + 508] -// CHECK: encoding: [0x62,0xe1,0x7f,0x08,0x2e,0x71,0x7f] +// CHECK: encoding: [0x62,0xe1,0x7e,0x08,0x2e,0x71,0x7f] vucomxss xmm22, dword ptr [rcx + 508] // CHECK: vucomxss xmm22, dword ptr [rdx - 512] -// CHECK: encoding: [0x62,0xe1,0x7f,0x08,0x2e,0x72,0x80] +// CHECK: encoding: [0x62,0xe1,0x7e,0x08,0x2e,0x72,0x80] vucomxss xmm22, dword ptr [rdx - 512] From 66825a89b8e0e9e1d202cb4d3824791b81afdc98 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Martin=20Storsj=C3=B6?= Date: Fri, 21 Mar 2025 15:33:25 +0200 Subject: [PATCH 003/154] [LLD] [COFF] Add a few more mingw libs to skip autoexports for (#132289) "libmsvcrt-os" was added to the list of excluded libs in binutils in 9d9c67b06c1bf4c4550e3de0eb575c2bfbe96df9 in 2017. "libucrt" was added in c4a8df19ba0a82aa8dea88d9f72ed9e63cb1fa84 in 2022. "libucrtapp" isn't in the binutils exclusion list yet, but a patch for adding it has been submitted. Since 0d403d5dd13ce22c07418058f3b640708992890c in mingw-w64 (in 2020), there's such a third variant of the UCRT import library available. Since 18df3e8323dcf9fdfec56b5f12c04a9c723a0931 in 2025, "libpthread" and "libwinpthread" are also excluded. (cherry picked from commit af93db9344919085551fac38d6d6a4f774a7220a) --- lld/COFF/MinGW.cpp | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/lld/COFF/MinGW.cpp b/lld/COFF/MinGW.cpp index 76f5a0a7500b9..097cf228f7d6e 100644 --- a/lld/COFF/MinGW.cpp +++ b/lld/COFF/MinGW.cpp @@ -54,7 +54,12 @@ AutoExporter::AutoExporter( "libFortranDecimal", "libunwind", "libmsvcrt", + "libmsvcrt-os", "libucrtbase", + "libucrt", + "libucrtapp", + "libpthread", + "libwinpthread", }; excludeObjects = { From fcd0ad23f668bce4b3a3731c5baa115434dc3269 Mon Sep 17 00:00:00 2001 From: David Green Date: Tue, 4 Mar 2025 21:46:55 +0000 Subject: [PATCH 004/154] [AArch64] Add test for scalar copysign. NFC (cherry picked from commit 4c2d1b4c53def85e16d3612b92379a347d76baf0) --- ...e-streaming-mode-fixed-length-fcopysign.ll | 228 ++++++++++++++++++ 1 file changed, 228 insertions(+) diff --git a/llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-fcopysign.ll b/llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-fcopysign.ll index 2282e74af5d00..238c124b7cb06 100644 --- a/llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-fcopysign.ll +++ b/llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-fcopysign.ll @@ -8,6 +8,234 @@ target datalayout = "e-m:o-i64:64-i128:128-n32:64-S128" target triple = "aarch64-unknown-linux-gnu" +define void @test_copysign_f16(ptr %ap, ptr %bp) { +; SVE-LABEL: test_copysign_f16: +; SVE: // %bb.0: +; SVE-NEXT: adrp x8, .LCPI0_0 +; SVE-NEXT: ldr h1, [x0] +; SVE-NEXT: ldr h2, [x1] +; SVE-NEXT: ldr q0, [x8, :lo12:.LCPI0_0] +; SVE-NEXT: adrp x8, .LCPI0_1 +; SVE-NEXT: ldr q4, [x8, :lo12:.LCPI0_1] +; SVE-NEXT: mov z3.d, z0.d +; SVE-NEXT: fmov s0, s1 +; SVE-NEXT: fmov s3, s2 +; SVE-NEXT: bif v0.16b, v3.16b, v4.16b +; SVE-NEXT: str h0, [x0] +; SVE-NEXT: ret +; +; SVE2-LABEL: test_copysign_f16: +; SVE2: // %bb.0: +; SVE2-NEXT: adrp x8, .LCPI0_0 +; SVE2-NEXT: ldr h1, [x0] +; SVE2-NEXT: ldr h2, [x1] +; SVE2-NEXT: ldr q0, [x8, :lo12:.LCPI0_0] +; SVE2-NEXT: adrp x8, .LCPI0_1 +; SVE2-NEXT: ldr q4, [x8, :lo12:.LCPI0_1] +; SVE2-NEXT: mov z3.d, z0.d +; SVE2-NEXT: fmov s0, s1 +; SVE2-NEXT: fmov s3, s2 +; SVE2-NEXT: bif v0.16b, v3.16b, v4.16b +; SVE2-NEXT: str h0, [x0] +; SVE2-NEXT: ret +; +; NONEON-NOSVE-LABEL: test_copysign_f16: +; NONEON-NOSVE: // %bb.0: +; NONEON-NOSVE-NEXT: sub sp, sp, #16 +; NONEON-NOSVE-NEXT: .cfi_def_cfa_offset 16 +; NONEON-NOSVE-NEXT: ldr h0, [x0] +; NONEON-NOSVE-NEXT: ldr h1, [x1] +; NONEON-NOSVE-NEXT: fcvt s0, h0 +; NONEON-NOSVE-NEXT: str h1, [sp, #12] +; NONEON-NOSVE-NEXT: ldrb w8, [sp, #13] +; NONEON-NOSVE-NEXT: tst w8, #0x80 +; NONEON-NOSVE-NEXT: fabs s0, s0 +; NONEON-NOSVE-NEXT: fneg s1, s0 +; NONEON-NOSVE-NEXT: fcsel s0, s1, s0, ne +; NONEON-NOSVE-NEXT: fcvt h0, s0 +; NONEON-NOSVE-NEXT: str h0, [x0] +; NONEON-NOSVE-NEXT: add sp, sp, #16 +; NONEON-NOSVE-NEXT: ret + %a = load half, ptr %ap + %b = load half, ptr %bp + %r = call half @llvm.copysign.f16(half %a, half %b) + store half %r, ptr %ap + ret void +} + +define void @test_copysign_bf16(ptr %ap, ptr %bp) { +; SVE-LABEL: test_copysign_bf16: +; SVE: // %bb.0: +; SVE-NEXT: adrp x8, .LCPI1_0 +; SVE-NEXT: ldr h1, [x0] +; SVE-NEXT: ldr h2, [x1] +; SVE-NEXT: ldr q0, [x8, :lo12:.LCPI1_0] +; SVE-NEXT: adrp x8, .LCPI1_1 +; SVE-NEXT: ldr q4, [x8, :lo12:.LCPI1_1] +; SVE-NEXT: mov z3.d, z0.d +; SVE-NEXT: fmov s0, s1 +; SVE-NEXT: fmov s3, s2 +; SVE-NEXT: bif v0.16b, v3.16b, v4.16b +; SVE-NEXT: str h0, [x0] +; SVE-NEXT: ret +; +; SVE2-LABEL: test_copysign_bf16: +; SVE2: // %bb.0: +; SVE2-NEXT: adrp x8, .LCPI1_0 +; SVE2-NEXT: ldr h1, [x0] +; SVE2-NEXT: ldr h2, [x1] +; SVE2-NEXT: ldr q0, [x8, :lo12:.LCPI1_0] +; SVE2-NEXT: adrp x8, .LCPI1_1 +; SVE2-NEXT: ldr q4, [x8, :lo12:.LCPI1_1] +; SVE2-NEXT: mov z3.d, z0.d +; SVE2-NEXT: fmov s0, s1 +; SVE2-NEXT: fmov s3, s2 +; SVE2-NEXT: bif v0.16b, v3.16b, v4.16b +; SVE2-NEXT: str h0, [x0] +; SVE2-NEXT: ret +; +; NONEON-NOSVE-LABEL: test_copysign_bf16: +; NONEON-NOSVE: // %bb.0: +; NONEON-NOSVE-NEXT: sub sp, sp, #80 +; NONEON-NOSVE-NEXT: .cfi_def_cfa_offset 80 +; NONEON-NOSVE-NEXT: ldr h0, [x0] +; NONEON-NOSVE-NEXT: ldr h1, [x1] +; NONEON-NOSVE-NEXT: str h0, [sp, #40] +; NONEON-NOSVE-NEXT: ldr d0, [sp, #40] +; NONEON-NOSVE-NEXT: str h1, [sp, #76] +; NONEON-NOSVE-NEXT: ushll v0.4s, v0.4h, #0 +; NONEON-NOSVE-NEXT: str q0, [sp] +; NONEON-NOSVE-NEXT: ldr w8, [sp, #12] +; NONEON-NOSVE-NEXT: lsl w9, w8, #16 +; NONEON-NOSVE-NEXT: ldr w8, [sp, #8] +; NONEON-NOSVE-NEXT: lsl w8, w8, #16 +; NONEON-NOSVE-NEXT: stp w8, w9, [sp, #24] +; NONEON-NOSVE-NEXT: ldr w8, [sp, #4] +; NONEON-NOSVE-NEXT: lsl w9, w8, #16 +; NONEON-NOSVE-NEXT: ldr w8, [sp] +; NONEON-NOSVE-NEXT: lsl w8, w8, #16 +; NONEON-NOSVE-NEXT: stp w8, w9, [sp, #16] +; NONEON-NOSVE-NEXT: ldrb w8, [sp, #77] +; NONEON-NOSVE-NEXT: ldr q0, [sp, #16] +; NONEON-NOSVE-NEXT: tst w8, #0x80 +; NONEON-NOSVE-NEXT: str q0, [sp, #48] +; NONEON-NOSVE-NEXT: ldr s0, [sp, #48] +; NONEON-NOSVE-NEXT: fabs s0, s0 +; NONEON-NOSVE-NEXT: fneg s1, s0 +; NONEON-NOSVE-NEXT: fcsel s0, s1, s0, ne +; NONEON-NOSVE-NEXT: fmov w8, s0 +; NONEON-NOSVE-NEXT: lsr w8, w8, #16 +; NONEON-NOSVE-NEXT: fmov s0, w8 +; NONEON-NOSVE-NEXT: str h0, [x0] +; NONEON-NOSVE-NEXT: add sp, sp, #80 +; NONEON-NOSVE-NEXT: ret + %a = load bfloat, ptr %ap + %b = load bfloat, ptr %bp + %r = call bfloat @llvm.copysign.bf16(bfloat %a, bfloat %b) + store bfloat %r, ptr %ap + ret void +} + +define void @test_copysign_f32(ptr %ap, ptr %bp) { +; SVE-LABEL: test_copysign_f32: +; SVE: // %bb.0: +; SVE-NEXT: adrp x8, .LCPI2_0 +; SVE-NEXT: ldr s1, [x0] +; SVE-NEXT: ldr s2, [x1] +; SVE-NEXT: ldr q0, [x8, :lo12:.LCPI2_0] +; SVE-NEXT: adrp x8, .LCPI2_1 +; SVE-NEXT: ldr q4, [x8, :lo12:.LCPI2_1] +; SVE-NEXT: mov z3.d, z0.d +; SVE-NEXT: fmov s0, s1 +; SVE-NEXT: fmov s3, s2 +; SVE-NEXT: bif v0.16b, v3.16b, v4.16b +; SVE-NEXT: str s0, [x0] +; SVE-NEXT: ret +; +; SVE2-LABEL: test_copysign_f32: +; SVE2: // %bb.0: +; SVE2-NEXT: adrp x8, .LCPI2_0 +; SVE2-NEXT: ldr s1, [x0] +; SVE2-NEXT: ldr s2, [x1] +; SVE2-NEXT: ldr q0, [x8, :lo12:.LCPI2_0] +; SVE2-NEXT: adrp x8, .LCPI2_1 +; SVE2-NEXT: ldr q4, [x8, :lo12:.LCPI2_1] +; SVE2-NEXT: mov z3.d, z0.d +; SVE2-NEXT: fmov s0, s1 +; SVE2-NEXT: fmov s3, s2 +; SVE2-NEXT: bif v0.16b, v3.16b, v4.16b +; SVE2-NEXT: str s0, [x0] +; SVE2-NEXT: ret +; +; NONEON-NOSVE-LABEL: test_copysign_f32: +; NONEON-NOSVE: // %bb.0: +; NONEON-NOSVE-NEXT: ldr s0, [x0] +; NONEON-NOSVE-NEXT: ldr w8, [x1] +; NONEON-NOSVE-NEXT: fabs s0, s0 +; NONEON-NOSVE-NEXT: tst w8, #0x80000000 +; NONEON-NOSVE-NEXT: fneg s1, s0 +; NONEON-NOSVE-NEXT: fcsel s0, s1, s0, ne +; NONEON-NOSVE-NEXT: str s0, [x0] +; NONEON-NOSVE-NEXT: ret + %a = load float, ptr %ap + %b = load float, ptr %bp + %r = call float @llvm.copysign.f32(float %a, float %b) + store float %r, ptr %ap + ret void +} + +define void @test_copysign_f64(ptr %ap, ptr %bp) { +; SVE-LABEL: test_copysign_f64: +; SVE: // %bb.0: +; SVE-NEXT: adrp x8, .LCPI3_1 +; SVE-NEXT: ptrue p0.d, vl2 +; SVE-NEXT: ldr d2, [x0] +; SVE-NEXT: ldr q0, [x8, :lo12:.LCPI3_1] +; SVE-NEXT: adrp x8, .LCPI3_0 +; SVE-NEXT: ldr d3, [x1] +; SVE-NEXT: ldr q1, [x8, :lo12:.LCPI3_0] +; SVE-NEXT: fneg z0.d, p0/m, z0.d +; SVE-NEXT: mov z4.d, z1.d +; SVE-NEXT: fmov d1, d2 +; SVE-NEXT: fmov d4, d3 +; SVE-NEXT: bsl v0.16b, v1.16b, v4.16b +; SVE-NEXT: str d0, [x0] +; SVE-NEXT: ret +; +; SVE2-LABEL: test_copysign_f64: +; SVE2: // %bb.0: +; SVE2-NEXT: adrp x8, .LCPI3_1 +; SVE2-NEXT: ptrue p0.d, vl2 +; SVE2-NEXT: ldr d2, [x0] +; SVE2-NEXT: ldr q0, [x8, :lo12:.LCPI3_1] +; SVE2-NEXT: adrp x8, .LCPI3_0 +; SVE2-NEXT: ldr d3, [x1] +; SVE2-NEXT: ldr q1, [x8, :lo12:.LCPI3_0] +; SVE2-NEXT: fneg z0.d, p0/m, z0.d +; SVE2-NEXT: mov z4.d, z1.d +; SVE2-NEXT: fmov d1, d2 +; SVE2-NEXT: fmov d4, d3 +; SVE2-NEXT: bsl v0.16b, v1.16b, v4.16b +; SVE2-NEXT: str d0, [x0] +; SVE2-NEXT: ret +; +; NONEON-NOSVE-LABEL: test_copysign_f64: +; NONEON-NOSVE: // %bb.0: +; NONEON-NOSVE-NEXT: ldr d0, [x0] +; NONEON-NOSVE-NEXT: ldr x8, [x1] +; NONEON-NOSVE-NEXT: fabs d0, d0 +; NONEON-NOSVE-NEXT: tst x8, #0x8000000000000000 +; NONEON-NOSVE-NEXT: fneg d1, d0 +; NONEON-NOSVE-NEXT: fcsel d0, d1, d0, ne +; NONEON-NOSVE-NEXT: str d0, [x0] +; NONEON-NOSVE-NEXT: ret + %a = load double, ptr %ap + %b = load double, ptr %bp + %r = call double @llvm.copysign.f64(double %a, double %b) + store double %r, ptr %ap + ret void +} + ;============ f16 define void @test_copysign_v4f16_v4f16(ptr %ap, ptr %bp) { From dc7b743515d3a463465dd38a62869ab9f77704cd Mon Sep 17 00:00:00 2001 From: David Green Date: Wed, 5 Mar 2025 17:18:07 +0000 Subject: [PATCH 005/154] [AArch64] Fix SVE scalar fcopysign lowering without neon. (#129787) Without this we can try to generate invalid instructions or create illegal types. This patch generates a SVE fcopysign instead and use its lowering. BF16 is left out of the moment as it doesn't lower successfully (but could use the same code as fp16). (cherry picked from commit d4ab3df320f9eebf11cc5fb600a0919f93678abe) --- .../Target/AArch64/AArch64ISelLowering.cpp | 19 +++ ...e-streaming-mode-fixed-length-fcopysign.ll | 139 +++++++----------- 2 files changed, 74 insertions(+), 84 deletions(-) diff --git a/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp b/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp index 16fcd589cecd1..cfd0fc32357ce 100644 --- a/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp +++ b/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp @@ -10683,6 +10683,25 @@ SDValue AArch64TargetLowering::LowerFCOPYSIGN(SDValue Op, return convertFromScalableVector(DAG, VT, Res); } + // With SVE, but without Neon, extend the scalars to scalable vectors and use + // a SVE FCOPYSIGN. + if (!VT.isVector() && !Subtarget->isNeonAvailable() && + Subtarget->isSVEorStreamingSVEAvailable()) { + if (VT != MVT::f16 && VT != MVT::f32 && VT != MVT::f64) + return SDValue(); + EVT SVT = getPackedSVEVectorVT(VT); + + SDValue Ins1 = + DAG.getNode(ISD::INSERT_VECTOR_ELT, DL, SVT, DAG.getUNDEF(SVT), In1, + DAG.getConstant(0, DL, MVT::i64)); + SDValue Ins2 = + DAG.getNode(ISD::INSERT_VECTOR_ELT, DL, SVT, DAG.getUNDEF(SVT), In2, + DAG.getConstant(0, DL, MVT::i64)); + SDValue FCS = DAG.getNode(ISD::FCOPYSIGN, DL, SVT, Ins1, Ins2); + return DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, VT, FCS, + DAG.getConstant(0, DL, MVT::i64)); + } + auto BitCast = [this](EVT VT, SDValue Op, SelectionDAG &DAG) { if (VT.isScalableVector()) return getSVESafeBitCast(VT, Op, DAG); diff --git a/llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-fcopysign.ll b/llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-fcopysign.ll index 238c124b7cb06..79921e25caf53 100644 --- a/llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-fcopysign.ll +++ b/llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-fcopysign.ll @@ -11,32 +11,21 @@ target triple = "aarch64-unknown-linux-gnu" define void @test_copysign_f16(ptr %ap, ptr %bp) { ; SVE-LABEL: test_copysign_f16: ; SVE: // %bb.0: -; SVE-NEXT: adrp x8, .LCPI0_0 +; SVE-NEXT: ldr h0, [x1] ; SVE-NEXT: ldr h1, [x0] -; SVE-NEXT: ldr h2, [x1] -; SVE-NEXT: ldr q0, [x8, :lo12:.LCPI0_0] -; SVE-NEXT: adrp x8, .LCPI0_1 -; SVE-NEXT: ldr q4, [x8, :lo12:.LCPI0_1] -; SVE-NEXT: mov z3.d, z0.d -; SVE-NEXT: fmov s0, s1 -; SVE-NEXT: fmov s3, s2 -; SVE-NEXT: bif v0.16b, v3.16b, v4.16b +; SVE-NEXT: and z0.h, z0.h, #0x8000 +; SVE-NEXT: and z1.h, z1.h, #0x7fff +; SVE-NEXT: orr z0.d, z1.d, z0.d ; SVE-NEXT: str h0, [x0] ; SVE-NEXT: ret ; ; SVE2-LABEL: test_copysign_f16: ; SVE2: // %bb.0: -; SVE2-NEXT: adrp x8, .LCPI0_0 -; SVE2-NEXT: ldr h1, [x0] -; SVE2-NEXT: ldr h2, [x1] -; SVE2-NEXT: ldr q0, [x8, :lo12:.LCPI0_0] -; SVE2-NEXT: adrp x8, .LCPI0_1 -; SVE2-NEXT: ldr q4, [x8, :lo12:.LCPI0_1] -; SVE2-NEXT: mov z3.d, z0.d -; SVE2-NEXT: fmov s0, s1 -; SVE2-NEXT: fmov s3, s2 -; SVE2-NEXT: bif v0.16b, v3.16b, v4.16b -; SVE2-NEXT: str h0, [x0] +; SVE2-NEXT: mov z0.h, #32767 // =0x7fff +; SVE2-NEXT: ldr h1, [x1] +; SVE2-NEXT: ldr h2, [x0] +; SVE2-NEXT: bsl z2.d, z2.d, z1.d, z0.d +; SVE2-NEXT: str h2, [x0] ; SVE2-NEXT: ret ; ; NONEON-NOSVE-LABEL: test_copysign_f16: @@ -66,32 +55,40 @@ define void @test_copysign_f16(ptr %ap, ptr %bp) { define void @test_copysign_bf16(ptr %ap, ptr %bp) { ; SVE-LABEL: test_copysign_bf16: ; SVE: // %bb.0: -; SVE-NEXT: adrp x8, .LCPI1_0 -; SVE-NEXT: ldr h1, [x0] -; SVE-NEXT: ldr h2, [x1] -; SVE-NEXT: ldr q0, [x8, :lo12:.LCPI1_0] -; SVE-NEXT: adrp x8, .LCPI1_1 -; SVE-NEXT: ldr q4, [x8, :lo12:.LCPI1_1] -; SVE-NEXT: mov z3.d, z0.d -; SVE-NEXT: fmov s0, s1 -; SVE-NEXT: fmov s3, s2 -; SVE-NEXT: bif v0.16b, v3.16b, v4.16b +; SVE-NEXT: sub sp, sp, #16 +; SVE-NEXT: .cfi_def_cfa_offset 16 +; SVE-NEXT: ldr h0, [x0] +; SVE-NEXT: ldr h1, [x1] +; SVE-NEXT: fmov w8, s0 +; SVE-NEXT: str h1, [sp, #12] +; SVE-NEXT: ldrb w9, [sp, #13] +; SVE-NEXT: and w8, w8, #0x7fff +; SVE-NEXT: tst w9, #0x80 +; SVE-NEXT: fmov s0, w8 +; SVE-NEXT: eor w8, w8, #0x8000 +; SVE-NEXT: fmov s1, w8 +; SVE-NEXT: fcsel h0, h1, h0, ne ; SVE-NEXT: str h0, [x0] +; SVE-NEXT: add sp, sp, #16 ; SVE-NEXT: ret ; ; SVE2-LABEL: test_copysign_bf16: ; SVE2: // %bb.0: -; SVE2-NEXT: adrp x8, .LCPI1_0 -; SVE2-NEXT: ldr h1, [x0] -; SVE2-NEXT: ldr h2, [x1] -; SVE2-NEXT: ldr q0, [x8, :lo12:.LCPI1_0] -; SVE2-NEXT: adrp x8, .LCPI1_1 -; SVE2-NEXT: ldr q4, [x8, :lo12:.LCPI1_1] -; SVE2-NEXT: mov z3.d, z0.d -; SVE2-NEXT: fmov s0, s1 -; SVE2-NEXT: fmov s3, s2 -; SVE2-NEXT: bif v0.16b, v3.16b, v4.16b +; SVE2-NEXT: sub sp, sp, #16 +; SVE2-NEXT: .cfi_def_cfa_offset 16 +; SVE2-NEXT: ldr h0, [x0] +; SVE2-NEXT: ldr h1, [x1] +; SVE2-NEXT: fmov w8, s0 +; SVE2-NEXT: str h1, [sp, #12] +; SVE2-NEXT: ldrb w9, [sp, #13] +; SVE2-NEXT: and w8, w8, #0x7fff +; SVE2-NEXT: tst w9, #0x80 +; SVE2-NEXT: fmov s0, w8 +; SVE2-NEXT: eor w8, w8, #0x8000 +; SVE2-NEXT: fmov s1, w8 +; SVE2-NEXT: fcsel h0, h1, h0, ne ; SVE2-NEXT: str h0, [x0] +; SVE2-NEXT: add sp, sp, #16 ; SVE2-NEXT: ret ; ; NONEON-NOSVE-LABEL: test_copysign_bf16: @@ -139,32 +136,21 @@ define void @test_copysign_bf16(ptr %ap, ptr %bp) { define void @test_copysign_f32(ptr %ap, ptr %bp) { ; SVE-LABEL: test_copysign_f32: ; SVE: // %bb.0: -; SVE-NEXT: adrp x8, .LCPI2_0 +; SVE-NEXT: ldr s0, [x1] ; SVE-NEXT: ldr s1, [x0] -; SVE-NEXT: ldr s2, [x1] -; SVE-NEXT: ldr q0, [x8, :lo12:.LCPI2_0] -; SVE-NEXT: adrp x8, .LCPI2_1 -; SVE-NEXT: ldr q4, [x8, :lo12:.LCPI2_1] -; SVE-NEXT: mov z3.d, z0.d -; SVE-NEXT: fmov s0, s1 -; SVE-NEXT: fmov s3, s2 -; SVE-NEXT: bif v0.16b, v3.16b, v4.16b +; SVE-NEXT: and z0.s, z0.s, #0x80000000 +; SVE-NEXT: and z1.s, z1.s, #0x7fffffff +; SVE-NEXT: orr z0.d, z1.d, z0.d ; SVE-NEXT: str s0, [x0] ; SVE-NEXT: ret ; ; SVE2-LABEL: test_copysign_f32: ; SVE2: // %bb.0: -; SVE2-NEXT: adrp x8, .LCPI2_0 -; SVE2-NEXT: ldr s1, [x0] -; SVE2-NEXT: ldr s2, [x1] -; SVE2-NEXT: ldr q0, [x8, :lo12:.LCPI2_0] -; SVE2-NEXT: adrp x8, .LCPI2_1 -; SVE2-NEXT: ldr q4, [x8, :lo12:.LCPI2_1] -; SVE2-NEXT: mov z3.d, z0.d -; SVE2-NEXT: fmov s0, s1 -; SVE2-NEXT: fmov s3, s2 -; SVE2-NEXT: bif v0.16b, v3.16b, v4.16b -; SVE2-NEXT: str s0, [x0] +; SVE2-NEXT: mov z0.s, #0x7fffffff +; SVE2-NEXT: ldr s1, [x1] +; SVE2-NEXT: ldr s2, [x0] +; SVE2-NEXT: bsl z2.d, z2.d, z1.d, z0.d +; SVE2-NEXT: str s2, [x0] ; SVE2-NEXT: ret ; ; NONEON-NOSVE-LABEL: test_copysign_f32: @@ -187,36 +173,21 @@ define void @test_copysign_f32(ptr %ap, ptr %bp) { define void @test_copysign_f64(ptr %ap, ptr %bp) { ; SVE-LABEL: test_copysign_f64: ; SVE: // %bb.0: -; SVE-NEXT: adrp x8, .LCPI3_1 -; SVE-NEXT: ptrue p0.d, vl2 -; SVE-NEXT: ldr d2, [x0] -; SVE-NEXT: ldr q0, [x8, :lo12:.LCPI3_1] -; SVE-NEXT: adrp x8, .LCPI3_0 -; SVE-NEXT: ldr d3, [x1] -; SVE-NEXT: ldr q1, [x8, :lo12:.LCPI3_0] -; SVE-NEXT: fneg z0.d, p0/m, z0.d -; SVE-NEXT: mov z4.d, z1.d -; SVE-NEXT: fmov d1, d2 -; SVE-NEXT: fmov d4, d3 -; SVE-NEXT: bsl v0.16b, v1.16b, v4.16b +; SVE-NEXT: ldr d0, [x1] +; SVE-NEXT: ldr d1, [x0] +; SVE-NEXT: and z0.d, z0.d, #0x8000000000000000 +; SVE-NEXT: and z1.d, z1.d, #0x7fffffffffffffff +; SVE-NEXT: orr z0.d, z1.d, z0.d ; SVE-NEXT: str d0, [x0] ; SVE-NEXT: ret ; ; SVE2-LABEL: test_copysign_f64: ; SVE2: // %bb.0: -; SVE2-NEXT: adrp x8, .LCPI3_1 -; SVE2-NEXT: ptrue p0.d, vl2 +; SVE2-NEXT: mov z0.d, #0x7fffffffffffffff +; SVE2-NEXT: ldr d1, [x1] ; SVE2-NEXT: ldr d2, [x0] -; SVE2-NEXT: ldr q0, [x8, :lo12:.LCPI3_1] -; SVE2-NEXT: adrp x8, .LCPI3_0 -; SVE2-NEXT: ldr d3, [x1] -; SVE2-NEXT: ldr q1, [x8, :lo12:.LCPI3_0] -; SVE2-NEXT: fneg z0.d, p0/m, z0.d -; SVE2-NEXT: mov z4.d, z1.d -; SVE2-NEXT: fmov d1, d2 -; SVE2-NEXT: fmov d4, d3 -; SVE2-NEXT: bsl v0.16b, v1.16b, v4.16b -; SVE2-NEXT: str d0, [x0] +; SVE2-NEXT: bsl z2.d, z2.d, z1.d, z0.d +; SVE2-NEXT: str d2, [x0] ; SVE2-NEXT: ret ; ; NONEON-NOSVE-LABEL: test_copysign_f64: From 0383020b6c1a1fdfa765050b9ef6172a422d332b Mon Sep 17 00:00:00 2001 From: Nicholas Guy Date: Wed, 19 Mar 2025 13:44:02 +0000 Subject: [PATCH 006/154] [llvm] Fix crash when complex deinterleaving operates on an unrolled loop (#129735) When attempting to perform complex deinterleaving on an unrolled loop containing a reduction, the complex deinterleaving pass would fail to accommodate the wider types when accumulating the unrolled paths. Instead of trying to alter the incoming IR to fit expectations, the pass should instead decide against processing any reduction that results in a non-complex or non-vector value. (cherry picked from commit 3f4b2f12a1e3e87e4bfb86937cc1ccdd4d38dcf5) --- .../lib/CodeGen/ComplexDeinterleavingPass.cpp | 11 + .../complex-deinterleaving-unrolled-cdot.ll | 191 ++++++++++++++++++ 2 files changed, 202 insertions(+) create mode 100644 llvm/test/CodeGen/AArch64/complex-deinterleaving-unrolled-cdot.ll diff --git a/llvm/lib/CodeGen/ComplexDeinterleavingPass.cpp b/llvm/lib/CodeGen/ComplexDeinterleavingPass.cpp index 92053ed561901..4cd378f9aa595 100644 --- a/llvm/lib/CodeGen/ComplexDeinterleavingPass.cpp +++ b/llvm/lib/CodeGen/ComplexDeinterleavingPass.cpp @@ -1741,6 +1741,17 @@ void ComplexDeinterleavingGraph::identifyReductionNodes() { LLVM_DEBUG( dbgs() << "Identified single reduction starting from instruction: " << *Real << "/" << *ReductionInfo[Real].second << "\n"); + + // Reducing to a single vector is not supported, only permit reducing down + // to scalar values. + // Doing this here will leave the prior node in the graph, + // however with no uses the node will be unreachable by the replacement + // process. That along with the usage outside the graph should prevent the + // replacement process from kicking off at all for this graph. + // TODO Add support for reducing to a single vector value + if (ReductionInfo[Real].second->getType()->isVectorTy()) + continue; + Processed[i] = true; auto RootNode = prepareCompositeNode( ComplexDeinterleavingOperation::ReductionSingle, Real, nullptr); diff --git a/llvm/test/CodeGen/AArch64/complex-deinterleaving-unrolled-cdot.ll b/llvm/test/CodeGen/AArch64/complex-deinterleaving-unrolled-cdot.ll new file mode 100644 index 0000000000000..faefaf9bad7b1 --- /dev/null +++ b/llvm/test/CodeGen/AArch64/complex-deinterleaving-unrolled-cdot.ll @@ -0,0 +1,191 @@ +; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 5 +; RUN: opt -S --passes=complex-deinterleaving %s --mattr=+sve2 -o - | FileCheck %s --check-prefix=CHECK-SVE2 +; RUN: opt -S --passes=complex-deinterleaving %s --mattr=+sve -o - | FileCheck %s --check-prefix=CHECK-SVE +; RUN: opt -S --passes=complex-deinterleaving %s -o - | FileCheck %s --check-prefix=CHECK-NOSVE + +target datalayout = "e-m:e-i8:8:32-i16:16:32-i64:64-i128:128-n32:64-S128" +target triple = "aarch64-none-unknown-elf" + +define i32 @cdotp_i8_rot0( %a0, %b0, %a1, %b1) { +; CHECK-SVE2-LABEL: define i32 @cdotp_i8_rot0( +; CHECK-SVE2-SAME: [[A0:%.*]], [[B0:%.*]], [[A1:%.*]], [[B1:%.*]]) #[[ATTR0:[0-9]+]] { +; CHECK-SVE2-NEXT: [[ENTRY:.*]]: +; CHECK-SVE2-NEXT: br label %[[VECTOR_BODY:.*]] +; CHECK-SVE2: [[VECTOR_BODY]]: +; CHECK-SVE2-NEXT: [[VEC_PHI:%.*]] = phi [ zeroinitializer, %[[ENTRY]] ], [ [[PARTIAL_REDUCE33:%.*]], %[[VECTOR_BODY]] ] +; CHECK-SVE2-NEXT: [[VEC_PHI25:%.*]] = phi [ zeroinitializer, %[[ENTRY]] ], [ [[PARTIAL_REDUCE34:%.*]], %[[VECTOR_BODY]] ] +; CHECK-SVE2-NEXT: [[A0_DEINTERLEAVED:%.*]] = tail call { , } @llvm.vector.deinterleave2.nxv32i8( [[A0]]) +; CHECK-SVE2-NEXT: [[A0_REAL:%.*]] = extractvalue { , } [[A0_DEINTERLEAVED]], 0 +; CHECK-SVE2-NEXT: [[A0_IMAG:%.*]] = extractvalue { , } [[A0_DEINTERLEAVED]], 1 +; CHECK-SVE2-NEXT: [[A1_DEINTERLEAVED:%.*]] = tail call { , } @llvm.vector.deinterleave2.nxv32i8( [[A1]]) +; CHECK-SVE2-NEXT: [[A1_REAL:%.*]] = extractvalue { , } [[A1_DEINTERLEAVED]], 0 +; CHECK-SVE2-NEXT: [[A1_IMAG:%.*]] = extractvalue { , } [[A1_DEINTERLEAVED]], 1 +; CHECK-SVE2-NEXT: [[A0_REAL_EXT:%.*]] = sext [[A0_REAL]] to +; CHECK-SVE2-NEXT: [[A1_REAL_EXT:%.*]] = sext [[A1_REAL]] to +; CHECK-SVE2-NEXT: [[B0_DEINTERLEAVED:%.*]] = tail call { , } @llvm.vector.deinterleave2.nxv32i8( [[B0]]) +; CHECK-SVE2-NEXT: [[B0_REAL:%.*]] = extractvalue { , } [[B0_DEINTERLEAVED]], 0 +; CHECK-SVE2-NEXT: [[B0_IMAG:%.*]] = extractvalue { , } [[B0_DEINTERLEAVED]], 1 +; CHECK-SVE2-NEXT: [[B1_DEINTERLEAVED:%.*]] = tail call { , } @llvm.vector.deinterleave2.nxv32i8( [[B1]]) +; CHECK-SVE2-NEXT: [[B1_REAL:%.*]] = extractvalue { , } [[B1_DEINTERLEAVED]], 0 +; CHECK-SVE2-NEXT: [[B1_IMAG:%.*]] = extractvalue { , } [[B1_DEINTERLEAVED]], 1 +; CHECK-SVE2-NEXT: [[B0_REAL_EXT:%.*]] = sext [[B0_REAL]] to +; CHECK-SVE2-NEXT: [[B1_REAL_EXT:%.*]] = sext [[B1_REAL]] to +; CHECK-SVE2-NEXT: [[TMP0:%.*]] = mul nsw [[B0_REAL_EXT]], [[A0_REAL_EXT]] +; CHECK-SVE2-NEXT: [[TMP1:%.*]] = mul nsw [[B1_REAL_EXT]], [[A1_REAL_EXT]] +; CHECK-SVE2-NEXT: [[A0_IMAG_EXT:%.*]] = sext [[A0_IMAG]] to +; CHECK-SVE2-NEXT: [[A1_IMAG_EXT:%.*]] = sext [[A1_IMAG]] to +; CHECK-SVE2-NEXT: [[B0_IMAG_EXT:%.*]] = sext [[B0_IMAG]] to +; CHECK-SVE2-NEXT: [[B1_IMAG_EXT:%.*]] = sext [[B1_IMAG]] to +; CHECK-SVE2-NEXT: [[TMP2:%.*]] = mul nsw [[B0_IMAG_EXT]], [[A0_IMAG_EXT]] +; CHECK-SVE2-NEXT: [[TMP3:%.*]] = mul nsw [[B1_IMAG_EXT]], [[A1_IMAG_EXT]] +; CHECK-SVE2-NEXT: [[PARTIAL_REDUCE:%.*]] = tail call @llvm.experimental.vector.partial.reduce.add.nxv4i32.nxv16i32( [[VEC_PHI]], [[TMP0]]) +; CHECK-SVE2-NEXT: [[PARTIAL_REDUCE32:%.*]] = tail call @llvm.experimental.vector.partial.reduce.add.nxv4i32.nxv16i32( [[VEC_PHI25]], [[TMP1]]) +; CHECK-SVE2-NEXT: [[TMP4:%.*]] = sub nsw zeroinitializer, [[TMP2]] +; CHECK-SVE2-NEXT: [[TMP5:%.*]] = sub nsw zeroinitializer, [[TMP3]] +; CHECK-SVE2-NEXT: [[PARTIAL_REDUCE33]] = tail call @llvm.experimental.vector.partial.reduce.add.nxv4i32.nxv16i32( [[PARTIAL_REDUCE]], [[TMP4]]) +; CHECK-SVE2-NEXT: [[PARTIAL_REDUCE34]] = tail call @llvm.experimental.vector.partial.reduce.add.nxv4i32.nxv16i32( [[PARTIAL_REDUCE32]], [[TMP5]]) +; CHECK-SVE2-NEXT: br i1 true, label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]] +; CHECK-SVE2: [[MIDDLE_BLOCK]]: +; CHECK-SVE2-NEXT: [[BIN_RDX:%.*]] = add [[PARTIAL_REDUCE34]], [[PARTIAL_REDUCE33]] +; CHECK-SVE2-NEXT: [[TMP23:%.*]] = tail call i32 @llvm.vector.reduce.add.nxv4i32( [[BIN_RDX]]) +; CHECK-SVE2-NEXT: ret i32 [[TMP23]] +; +; CHECK-SVE-LABEL: define i32 @cdotp_i8_rot0( +; CHECK-SVE-SAME: [[A0:%.*]], [[B0:%.*]], [[A1:%.*]], [[B1:%.*]]) #[[ATTR0:[0-9]+]] { +; CHECK-SVE-NEXT: [[ENTRY:.*]]: +; CHECK-SVE-NEXT: br label %[[VECTOR_BODY:.*]] +; CHECK-SVE: [[VECTOR_BODY]]: +; CHECK-SVE-NEXT: [[VEC_PHI:%.*]] = phi [ zeroinitializer, %[[ENTRY]] ], [ [[PARTIAL_REDUCE33:%.*]], %[[VECTOR_BODY]] ] +; CHECK-SVE-NEXT: [[VEC_PHI25:%.*]] = phi [ zeroinitializer, %[[ENTRY]] ], [ [[PARTIAL_REDUCE34:%.*]], %[[VECTOR_BODY]] ] +; CHECK-SVE-NEXT: [[A0_DEINTERLEAVED:%.*]] = tail call { , } @llvm.vector.deinterleave2.nxv32i8( [[A0]]) +; CHECK-SVE-NEXT: [[A0_REAL:%.*]] = extractvalue { , } [[A0_DEINTERLEAVED]], 0 +; CHECK-SVE-NEXT: [[A0_IMAG:%.*]] = extractvalue { , } [[A0_DEINTERLEAVED]], 1 +; CHECK-SVE-NEXT: [[A1_DEINTERLEAVED:%.*]] = tail call { , } @llvm.vector.deinterleave2.nxv32i8( [[A1]]) +; CHECK-SVE-NEXT: [[A1_REAL:%.*]] = extractvalue { , } [[A1_DEINTERLEAVED]], 0 +; CHECK-SVE-NEXT: [[A1_IMAG:%.*]] = extractvalue { , } [[A1_DEINTERLEAVED]], 1 +; CHECK-SVE-NEXT: [[A0_REAL_EXT:%.*]] = sext [[A0_REAL]] to +; CHECK-SVE-NEXT: [[A1_REAL_EXT:%.*]] = sext [[A1_REAL]] to +; CHECK-SVE-NEXT: [[B0_DEINTERLEAVED:%.*]] = tail call { , } @llvm.vector.deinterleave2.nxv32i8( [[B0]]) +; CHECK-SVE-NEXT: [[B0_REAL:%.*]] = extractvalue { , } [[B0_DEINTERLEAVED]], 0 +; CHECK-SVE-NEXT: [[B0_IMAG:%.*]] = extractvalue { , } [[B0_DEINTERLEAVED]], 1 +; CHECK-SVE-NEXT: [[B1_DEINTERLEAVED:%.*]] = tail call { , } @llvm.vector.deinterleave2.nxv32i8( [[B1]]) +; CHECK-SVE-NEXT: [[B1_REAL:%.*]] = extractvalue { , } [[B1_DEINTERLEAVED]], 0 +; CHECK-SVE-NEXT: [[B1_IMAG:%.*]] = extractvalue { , } [[B1_DEINTERLEAVED]], 1 +; CHECK-SVE-NEXT: [[B0_REAL_EXT:%.*]] = sext [[B0_REAL]] to +; CHECK-SVE-NEXT: [[B1_REAL_EXT:%.*]] = sext [[B1_REAL]] to +; CHECK-SVE-NEXT: [[TMP0:%.*]] = mul nsw [[B0_REAL_EXT]], [[A0_REAL_EXT]] +; CHECK-SVE-NEXT: [[TMP1:%.*]] = mul nsw [[B1_REAL_EXT]], [[A1_REAL_EXT]] +; CHECK-SVE-NEXT: [[A0_IMAG_EXT:%.*]] = sext [[A0_IMAG]] to +; CHECK-SVE-NEXT: [[A1_IMAG_EXT:%.*]] = sext [[A1_IMAG]] to +; CHECK-SVE-NEXT: [[B0_IMAG_EXT:%.*]] = sext [[B0_IMAG]] to +; CHECK-SVE-NEXT: [[B1_IMAG_EXT:%.*]] = sext [[B1_IMAG]] to +; CHECK-SVE-NEXT: [[TMP2:%.*]] = mul nsw [[B0_IMAG_EXT]], [[A0_IMAG_EXT]] +; CHECK-SVE-NEXT: [[TMP3:%.*]] = mul nsw [[B1_IMAG_EXT]], [[A1_IMAG_EXT]] +; CHECK-SVE-NEXT: [[PARTIAL_REDUCE:%.*]] = tail call @llvm.experimental.vector.partial.reduce.add.nxv4i32.nxv16i32( [[VEC_PHI]], [[TMP0]]) +; CHECK-SVE-NEXT: [[PARTIAL_REDUCE32:%.*]] = tail call @llvm.experimental.vector.partial.reduce.add.nxv4i32.nxv16i32( [[VEC_PHI25]], [[TMP1]]) +; CHECK-SVE-NEXT: [[TMP4:%.*]] = sub nsw zeroinitializer, [[TMP2]] +; CHECK-SVE-NEXT: [[TMP5:%.*]] = sub nsw zeroinitializer, [[TMP3]] +; CHECK-SVE-NEXT: [[PARTIAL_REDUCE33]] = tail call @llvm.experimental.vector.partial.reduce.add.nxv4i32.nxv16i32( [[PARTIAL_REDUCE]], [[TMP4]]) +; CHECK-SVE-NEXT: [[PARTIAL_REDUCE34]] = tail call @llvm.experimental.vector.partial.reduce.add.nxv4i32.nxv16i32( [[PARTIAL_REDUCE32]], [[TMP5]]) +; CHECK-SVE-NEXT: br i1 true, label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]] +; CHECK-SVE: [[MIDDLE_BLOCK]]: +; CHECK-SVE-NEXT: [[BIN_RDX:%.*]] = add [[PARTIAL_REDUCE34]], [[PARTIAL_REDUCE33]] +; CHECK-SVE-NEXT: [[TMP6:%.*]] = tail call i32 @llvm.vector.reduce.add.nxv4i32( [[BIN_RDX]]) +; CHECK-SVE-NEXT: ret i32 [[TMP6]] +; +; CHECK-NOSVE-LABEL: define i32 @cdotp_i8_rot0( +; CHECK-NOSVE-SAME: [[A0:%.*]], [[B0:%.*]], [[A1:%.*]], [[B1:%.*]]) { +; CHECK-NOSVE-NEXT: [[ENTRY:.*]]: +; CHECK-NOSVE-NEXT: br label %[[VECTOR_BODY:.*]] +; CHECK-NOSVE: [[VECTOR_BODY]]: +; CHECK-NOSVE-NEXT: [[VEC_PHI:%.*]] = phi [ zeroinitializer, %[[ENTRY]] ], [ [[PARTIAL_REDUCE33:%.*]], %[[VECTOR_BODY]] ] +; CHECK-NOSVE-NEXT: [[VEC_PHI25:%.*]] = phi [ zeroinitializer, %[[ENTRY]] ], [ [[PARTIAL_REDUCE34:%.*]], %[[VECTOR_BODY]] ] +; CHECK-NOSVE-NEXT: [[A0_DEINTERLEAVED:%.*]] = tail call { , } @llvm.vector.deinterleave2.nxv32i8( [[A0]]) +; CHECK-NOSVE-NEXT: [[A0_REAL:%.*]] = extractvalue { , } [[A0_DEINTERLEAVED]], 0 +; CHECK-NOSVE-NEXT: [[A0_IMAG:%.*]] = extractvalue { , } [[A0_DEINTERLEAVED]], 1 +; CHECK-NOSVE-NEXT: [[A1_DEINTERLEAVED:%.*]] = tail call { , } @llvm.vector.deinterleave2.nxv32i8( [[A1]]) +; CHECK-NOSVE-NEXT: [[A1_REAL:%.*]] = extractvalue { , } [[A1_DEINTERLEAVED]], 0 +; CHECK-NOSVE-NEXT: [[A1_IMAG:%.*]] = extractvalue { , } [[A1_DEINTERLEAVED]], 1 +; CHECK-NOSVE-NEXT: [[A0_REAL_EXT:%.*]] = sext [[A0_REAL]] to +; CHECK-NOSVE-NEXT: [[A1_REAL_EXT:%.*]] = sext [[A1_REAL]] to +; CHECK-NOSVE-NEXT: [[B0_DEINTERLEAVED:%.*]] = tail call { , } @llvm.vector.deinterleave2.nxv32i8( [[B0]]) +; CHECK-NOSVE-NEXT: [[B0_REAL:%.*]] = extractvalue { , } [[B0_DEINTERLEAVED]], 0 +; CHECK-NOSVE-NEXT: [[B0_IMAG:%.*]] = extractvalue { , } [[B0_DEINTERLEAVED]], 1 +; CHECK-NOSVE-NEXT: [[B1_DEINTERLEAVED:%.*]] = tail call { , } @llvm.vector.deinterleave2.nxv32i8( [[B1]]) +; CHECK-NOSVE-NEXT: [[B1_REAL:%.*]] = extractvalue { , } [[B1_DEINTERLEAVED]], 0 +; CHECK-NOSVE-NEXT: [[B1_IMAG:%.*]] = extractvalue { , } [[B1_DEINTERLEAVED]], 1 +; CHECK-NOSVE-NEXT: [[B0_REAL_EXT:%.*]] = sext [[B0_REAL]] to +; CHECK-NOSVE-NEXT: [[B1_REAL_EXT:%.*]] = sext [[B1_REAL]] to +; CHECK-NOSVE-NEXT: [[TMP0:%.*]] = mul nsw [[B0_REAL_EXT]], [[A0_REAL_EXT]] +; CHECK-NOSVE-NEXT: [[TMP1:%.*]] = mul nsw [[B1_REAL_EXT]], [[A1_REAL_EXT]] +; CHECK-NOSVE-NEXT: [[A0_IMAG_EXT:%.*]] = sext [[A0_IMAG]] to +; CHECK-NOSVE-NEXT: [[A1_IMAG_EXT:%.*]] = sext [[A1_IMAG]] to +; CHECK-NOSVE-NEXT: [[B0_IMAG_EXT:%.*]] = sext [[B0_IMAG]] to +; CHECK-NOSVE-NEXT: [[B1_IMAG_EXT:%.*]] = sext [[B1_IMAG]] to +; CHECK-NOSVE-NEXT: [[TMP2:%.*]] = mul nsw [[B0_IMAG_EXT]], [[A0_IMAG_EXT]] +; CHECK-NOSVE-NEXT: [[TMP3:%.*]] = mul nsw [[B1_IMAG_EXT]], [[A1_IMAG_EXT]] +; CHECK-NOSVE-NEXT: [[PARTIAL_REDUCE:%.*]] = tail call @llvm.experimental.vector.partial.reduce.add.nxv4i32.nxv16i32( [[VEC_PHI]], [[TMP0]]) +; CHECK-NOSVE-NEXT: [[PARTIAL_REDUCE32:%.*]] = tail call @llvm.experimental.vector.partial.reduce.add.nxv4i32.nxv16i32( [[VEC_PHI25]], [[TMP1]]) +; CHECK-NOSVE-NEXT: [[TMP4:%.*]] = sub nsw zeroinitializer, [[TMP2]] +; CHECK-NOSVE-NEXT: [[TMP5:%.*]] = sub nsw zeroinitializer, [[TMP3]] +; CHECK-NOSVE-NEXT: [[PARTIAL_REDUCE33]] = tail call @llvm.experimental.vector.partial.reduce.add.nxv4i32.nxv16i32( [[PARTIAL_REDUCE]], [[TMP4]]) +; CHECK-NOSVE-NEXT: [[PARTIAL_REDUCE34]] = tail call @llvm.experimental.vector.partial.reduce.add.nxv4i32.nxv16i32( [[PARTIAL_REDUCE32]], [[TMP5]]) +; CHECK-NOSVE-NEXT: br i1 true, label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]] +; CHECK-NOSVE: [[MIDDLE_BLOCK]]: +; CHECK-NOSVE-NEXT: [[BIN_RDX:%.*]] = add [[PARTIAL_REDUCE34]], [[PARTIAL_REDUCE33]] +; CHECK-NOSVE-NEXT: [[TMP6:%.*]] = tail call i32 @llvm.vector.reduce.add.nxv4i32( [[BIN_RDX]]) +; CHECK-NOSVE-NEXT: ret i32 [[TMP6]] +; +entry: + br label %vector.body + +vector.body: ; preds = %vector.body, %entry + %vec.phi = phi [ zeroinitializer, %entry ], [ %partial.reduce33, %vector.body ] + %vec.phi25 = phi [ zeroinitializer, %entry ], [ %partial.reduce34, %vector.body ] + %a0.deinterleaved = tail call { , } @llvm.vector.deinterleave2.nxv32i8( %a0) + %a0.real = extractvalue { , } %a0.deinterleaved, 0 + %a0.imag = extractvalue { , } %a0.deinterleaved, 1 + %a1.deinterleaved = tail call { , } @llvm.vector.deinterleave2.nxv32i8( %a1) + %a1.real = extractvalue { , } %a1.deinterleaved, 0 + %a1.imag = extractvalue { , } %a1.deinterleaved, 1 + %a0.real.ext = sext %a0.real to + %a1.real.ext = sext %a1.real to + %b0.deinterleaved = tail call { , } @llvm.vector.deinterleave2.nxv32i8( %b0) + %b0.real = extractvalue { , } %b0.deinterleaved, 0 + %b0.imag = extractvalue { , } %b0.deinterleaved, 1 + %b1.deinterleaved = tail call { , } @llvm.vector.deinterleave2.nxv32i8( %b1) + %b1.real = extractvalue { , } %b1.deinterleaved, 0 + %b1.imag = extractvalue { , } %b1.deinterleaved, 1 + %b0.real.ext = sext %b0.real to + %b1.real.ext = sext %b1.real to + %18 = mul nsw %b0.real.ext, %a0.real.ext + %19 = mul nsw %b1.real.ext, %a1.real.ext + %a0.imag.ext = sext %a0.imag to + %a1.imag.ext = sext %a1.imag to + %b0.imag.ext = sext %b0.imag to + %b1.imag.ext = sext %b1.imag to + %24 = mul nsw %b0.imag.ext, %a0.imag.ext + %25 = mul nsw %b1.imag.ext, %a1.imag.ext + %partial.reduce = tail call @llvm.experimental.vector.partial.reduce.add.nxv4i32.nxv16i32( %vec.phi, %18) + %partial.reduce32 = tail call @llvm.experimental.vector.partial.reduce.add.nxv4i32.nxv16i32( %vec.phi25, %19) + %26 = sub nsw zeroinitializer, %24 + %27 = sub nsw zeroinitializer, %25 + %partial.reduce33 = tail call @llvm.experimental.vector.partial.reduce.add.nxv4i32.nxv16i32( %partial.reduce, %26) + %partial.reduce34 = tail call @llvm.experimental.vector.partial.reduce.add.nxv4i32.nxv16i32( %partial.reduce32, %27) + br i1 true, label %middle.block, label %vector.body + +middle.block: ; preds = %vector.body + %bin.rdx = add %partial.reduce34, %partial.reduce33 + %29 = tail call i32 @llvm.vector.reduce.add.nxv4i32( %bin.rdx) + ret i32 %29 +} + +declare @llvm.experimental.vector.partial.reduce.add.nxv8i16.nxv16i32(, ) +declare @llvm.experimental.vector.partial.reduce.add.nxv4i32.nxv16i32(, ) +declare @llvm.experimental.vector.partial.reduce.add.nxv2i64.nxv8i32(, ) + +declare <4 x i32> @llvm.experimental.vector.partial.reduce.add.v4i32.v16i32(<4 x i32>, <16 x i32>) +declare i32 @llvm.vector.reduce.add.v4i32(<4 x i32>) + +declare i32 @llvm.vector.reduce.add.nxv4i32() +declare i64 @llvm.vector.reduce.add.nxv2i64() From 2198410a8a8a0723dc744af5a2b7239ee5722662 Mon Sep 17 00:00:00 2001 From: David Tellenbach Date: Mon, 17 Mar 2025 17:23:58 -0700 Subject: [PATCH 007/154] [compiler-rt][Darwin][x86] Fix instrprof-darwin-exports test (#131425) ld64 issues a warning about section alignment which was counted as an unexpected exported symbol and the test failed. Fixed by disabling all linker warnings using -Wl,-w. (cherry picked from commit 94426df66a8d7c2321f9e197e5ef9636b0d5ce70) --- compiler-rt/test/profile/instrprof-darwin-exports.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/compiler-rt/test/profile/instrprof-darwin-exports.c b/compiler-rt/test/profile/instrprof-darwin-exports.c index 079d5d28ed24d..1a2ac8c813272 100644 --- a/compiler-rt/test/profile/instrprof-darwin-exports.c +++ b/compiler-rt/test/profile/instrprof-darwin-exports.c @@ -7,13 +7,13 @@ // just "_main" produces no warnings or errors. // // RUN: echo "_main" > %t.exports -// RUN: %clang_pgogen -Werror -Wl,-exported_symbols_list,%t.exports -o %t %s 2>&1 | tee %t.log -// RUN: %clang_profgen -Werror -fcoverage-mapping -Wl,-exported_symbols_list,%t.exports -o %t %s 2>&1 | tee -a %t.log +// RUN: %clang_pgogen -Werror -Wl,-exported_symbols_list,%t.exports -Wl,-w -o %t %s 2>&1 | tee %t.log +// RUN: %clang_profgen -Werror -fcoverage-mapping -Wl,-exported_symbols_list,%t.exports -Wl,-w -o %t %s 2>&1 | tee -a %t.log // RUN: cat %t.log | count 0 // 2) Ditto (1), but for GCOV. // -// RUN: %clang -Werror -Wl,-exported_symbols_list,%t.exports --coverage -o %t.gcov %s | tee -a %t.gcov.log +// RUN: %clang -Werror -Wl,-exported_symbols_list,%t.exports -Wl,-w --coverage -o %t.gcov %s | tee -a %t.gcov.log // RUN: cat %t.gcov.log | count 0 // 3) The default set of weak external symbols should match the set of symbols From e0e8071815c76b935c2145699c72bf833f0d0af5 Mon Sep 17 00:00:00 2001 From: Alexey Karyakin Date: Tue, 11 Mar 2025 10:41:01 -0500 Subject: [PATCH 008/154] [hexagon] Prevent alignment search beyond a label (#130631) When searching for packets to .align, don't consider ones which would require padding beyond a label. There are two problems with padding beyond a label: - the distance between labels may increase for some offsets to become too large; - u/sleb128 values that encode a difference will not be updated because they are computed before the align command is handled. This is more a short-term fix/hack. The proper solution would be to unify `.align` and `.falign` handling and move it to the layout loop. (cherry picked from commit 1fe463182cead6e7c6119ab410eae9e9d969325a) --- .../Hexagon/MCTargetDesc/HexagonAsmBackend.cpp | 18 ++++++++++++++++++ llvm/test/MC/Hexagon/align-leb128.s | 18 ++++++++++++++++++ llvm/test/MC/Hexagon/align.s | 13 +++++++++++++ 3 files changed, 49 insertions(+) create mode 100644 llvm/test/MC/Hexagon/align-leb128.s diff --git a/llvm/lib/Target/Hexagon/MCTargetDesc/HexagonAsmBackend.cpp b/llvm/lib/Target/Hexagon/MCTargetDesc/HexagonAsmBackend.cpp index 98b1dde8fa3fc..725067e0c9bdd 100644 --- a/llvm/lib/Target/Hexagon/MCTargetDesc/HexagonAsmBackend.cpp +++ b/llvm/lib/Target/Hexagon/MCTargetDesc/HexagonAsmBackend.cpp @@ -728,6 +728,24 @@ class HexagonAsmBackend : public MCAsmBackend { MCContext &Context = Asm.getContext(); auto &RF = cast(*Frags[K]); auto &Inst = const_cast(RF.getInst()); + + const bool WouldTraverseLabel = llvm::any_of( + Asm.symbols(), [&Asm, &RF, &Inst](MCSymbol const &sym) { + uint64_t Offset = 0; + const bool HasOffset = Asm.getSymbolOffset(sym, Offset); + const unsigned PacketSizeBytes = + HexagonMCInstrInfo::bundleSize(Inst) * + HEXAGON_INSTR_SIZE; + const bool OffsetPastSym = + Offset <= (Asm.getFragmentOffset(RF) + PacketSizeBytes); + return !sym.isVariable() && Offset != 0 && HasOffset && + OffsetPastSym; + }); + if (WouldTraverseLabel) { + Size = 0; + break; + } + while (Size > 0 && HexagonMCInstrInfo::bundleSize(Inst) < MaxPacketSize) { MCInst *Nop = Context.createMCInst(); diff --git a/llvm/test/MC/Hexagon/align-leb128.s b/llvm/test/MC/Hexagon/align-leb128.s new file mode 100644 index 0000000000000..77018f0114311 --- /dev/null +++ b/llvm/test/MC/Hexagon/align-leb128.s @@ -0,0 +1,18 @@ +# RUN: llvm-mc -triple=hexagon -filetype=obj %s | llvm-readelf -x .data - \ +# RUN: | FileCheck %s --match-full-lines + +# Illustrate the case when padding packets across labels also breaks leb128 +# relocations. This happens because .align padding is inserted once at the +# very end of the section layout. +L1: + nop +L2: +.size L1, L2-L1 +.align 16 + nop +.data +.word L2-L1 +.uleb128 L2-L1 + +# CHECK: Hex dump of section '.data': +# CHECK-NEXT: 0x00000000 04000000 04 ..... diff --git a/llvm/test/MC/Hexagon/align.s b/llvm/test/MC/Hexagon/align.s index 9c2978df71373..e17d09cfd8c96 100644 --- a/llvm/test/MC/Hexagon/align.s +++ b/llvm/test/MC/Hexagon/align.s @@ -58,3 +58,16 @@ r0 = vextract(v0, r0) r1 = sub (##1, r1) } .align 16 { r0 = sub (#1, r0) } + +# Don't search backwards to pad packets beyond a label: +{ r1 = add(r1, r0) } +# CHECK-NEXT: { r1 = add(r1,r0) +# CHECK-NOT: nop + +post_label: +.align 16 +# CHECK-LABEL: post_label +# CHECK-NEXT: { nop +# CHECK-NEXT: nop } +# CHECK-NEXT: { r1 = sub(#1,r1) } +{ r1 = sub(#1, r1) } From 95763651e25c6d31a6f41c28e7a22e9203a4dba9 Mon Sep 17 00:00:00 2001 From: Abinaya Saravanan Date: Thu, 13 Mar 2025 03:28:26 +0530 Subject: [PATCH 009/154] [HEXAGON] Add support to lower "FREEZE a half(f16)" instruction on Hexagon and fix the isel-buildvector-v2f16.ll assertion (#130977) (cherry picked from commit 9c65e6ac115a7d8566c874537791125c3ace7c1a) --- llvm/lib/Target/Hexagon/HexagonISelLowering.h | 1 + .../Target/Hexagon/HexagonISelLoweringHVX.cpp | 22 +++++----- llvm/test/CodeGen/Hexagon/fp16-promote.ll | 44 +++++++++++++++++++ 3 files changed, 56 insertions(+), 11 deletions(-) create mode 100644 llvm/test/CodeGen/Hexagon/fp16-promote.ll diff --git a/llvm/lib/Target/Hexagon/HexagonISelLowering.h b/llvm/lib/Target/Hexagon/HexagonISelLowering.h index aaa9c65c1e07e..4df88b3a8abd7 100644 --- a/llvm/lib/Target/Hexagon/HexagonISelLowering.h +++ b/llvm/lib/Target/Hexagon/HexagonISelLowering.h @@ -362,6 +362,7 @@ class HexagonTargetLowering : public TargetLowering { shouldExpandAtomicRMWInIR(AtomicRMWInst *AI) const override { return AtomicExpansionKind::LLSC; } + bool softPromoteHalfType() const override { return true; } private: void initializeHVXLowering(); diff --git a/llvm/lib/Target/Hexagon/HexagonISelLoweringHVX.cpp b/llvm/lib/Target/Hexagon/HexagonISelLoweringHVX.cpp index 1a19e81a68f08..a7eb20a3e5ff9 100644 --- a/llvm/lib/Target/Hexagon/HexagonISelLoweringHVX.cpp +++ b/llvm/lib/Target/Hexagon/HexagonISelLoweringHVX.cpp @@ -1618,17 +1618,6 @@ HexagonTargetLowering::LowerHvxBuildVector(SDValue Op, SelectionDAG &DAG) for (unsigned i = 0; i != Size; ++i) Ops.push_back(Op.getOperand(i)); - // First, split the BUILD_VECTOR for vector pairs. We could generate - // some pairs directly (via splat), but splats should be generated - // by the combiner prior to getting here. - if (VecTy.getSizeInBits() == 16*Subtarget.getVectorLength()) { - ArrayRef A(Ops); - MVT SingleTy = typeSplit(VecTy).first; - SDValue V0 = buildHvxVectorReg(A.take_front(Size/2), dl, SingleTy, DAG); - SDValue V1 = buildHvxVectorReg(A.drop_front(Size/2), dl, SingleTy, DAG); - return DAG.getNode(ISD::CONCAT_VECTORS, dl, VecTy, V0, V1); - } - if (VecTy.getVectorElementType() == MVT::i1) return buildHvxVectorPred(Ops, dl, VecTy, DAG); @@ -1645,6 +1634,17 @@ HexagonTargetLowering::LowerHvxBuildVector(SDValue Op, SelectionDAG &DAG) return DAG.getBitcast(tyVector(VecTy, MVT::f16), T0); } + // First, split the BUILD_VECTOR for vector pairs. We could generate + // some pairs directly (via splat), but splats should be generated + // by the combiner prior to getting here. + if (VecTy.getSizeInBits() == 16 * Subtarget.getVectorLength()) { + ArrayRef A(Ops); + MVT SingleTy = typeSplit(VecTy).first; + SDValue V0 = buildHvxVectorReg(A.take_front(Size / 2), dl, SingleTy, DAG); + SDValue V1 = buildHvxVectorReg(A.drop_front(Size / 2), dl, SingleTy, DAG); + return DAG.getNode(ISD::CONCAT_VECTORS, dl, VecTy, V0, V1); + } + return buildHvxVectorReg(Ops, dl, VecTy, DAG); } diff --git a/llvm/test/CodeGen/Hexagon/fp16-promote.ll b/llvm/test/CodeGen/Hexagon/fp16-promote.ll new file mode 100644 index 0000000000000..1ef0a133ce30a --- /dev/null +++ b/llvm/test/CodeGen/Hexagon/fp16-promote.ll @@ -0,0 +1,44 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +; RUN: llc -march=hexagon < %s | FileCheck %s + +define half @freeze_half_undef() nounwind { +; CHECK-LABEL: freeze_half_undef: +; CHECK: // %bb.0: +; CHECK-NEXT: { +; CHECK-NEXT: call __truncsfhf2 +; CHECK-NEXT: r0 = #0 +; CHECK-NEXT: allocframe(#0) +; CHECK-NEXT: } +; CHECK-NEXT: { +; CHECK-NEXT: call __extendhfsf2 +; CHECK-NEXT: } +; CHECK-NEXT: { +; CHECK-NEXT: call __truncsfhf2 +; CHECK-NEXT: r0 = sfadd(r0,r0) +; CHECK-NEXT: } +; CHECK-NEXT: { +; CHECK-NEXT: r31:30 = dealloc_return(r30):raw +; CHECK-NEXT: } + %y1 = freeze half undef + %t1 = fadd half %y1, %y1 + ret half %t1 +} + +define half @freeze_half_poison(half %maybe.poison) { +; CHECK-LABEL: freeze_half_poison: +; CHECK: // %bb.0: +; CHECK: { +; CHECK-NEXT: call __extendhfsf2 +; CHECK-NEXT: allocframe(r29,#0):raw +; CHECK-NEXT: } +; CHECK-NEXT: { +; CHECK-NEXT: call __truncsfhf2 +; CHECK-NEXT: r0 = sfadd(r0,r0) +; CHECK-NEXT: } +; CHECK-NEXT: { +; CHECK-NEXT: r31:30 = dealloc_return(r30):raw +; CHECK-NEXT: } + %y1 = freeze half %maybe.poison + %t1 = fadd half %y1, %y1 + ret half %t1 +} From 6034661369c40b82538f44e6d742a7120ec418c4 Mon Sep 17 00:00:00 2001 From: Qi Zhao Date: Fri, 21 Mar 2025 10:55:50 +0800 Subject: [PATCH 010/154] [LoongArch] Pre-commit test for fixing tls-le symbol type The symbol type of tls-le must be `TLS`, it was incorrectly set as `NOTYPE`. A later commit will fix it. (cherry picked from commit 87adafcd2e248fa69d1f776a9e60f95df03b885d) --- .../CodeGen/LoongArch/fix-tle-le-sym-type.ll | 24 +++++++++++++++++++ 1 file changed, 24 insertions(+) create mode 100644 llvm/test/CodeGen/LoongArch/fix-tle-le-sym-type.ll diff --git a/llvm/test/CodeGen/LoongArch/fix-tle-le-sym-type.ll b/llvm/test/CodeGen/LoongArch/fix-tle-le-sym-type.ll new file mode 100644 index 0000000000000..fe5f2195f0dc7 --- /dev/null +++ b/llvm/test/CodeGen/LoongArch/fix-tle-le-sym-type.ll @@ -0,0 +1,24 @@ +; RUN: llc --mtriple=loongarch32 --filetype=obj %s -o %t-la32 +; RUN: llvm-readelf -s %t-la32 | FileCheck %s --check-prefix=LA32 + +; RUN: llc --mtriple=loongarch64 --filetype=obj %s -o %t-la64 +; RUN: llvm-readelf -s %t-la64 | FileCheck %s --check-prefix=LA64 + +; LA32: Symbol table '.symtab' contains [[#]] entries: +; LA32-NEXT: Num: Value Size Type Bind Vis Ndx Name +; LA32: 00000000 0 NOTYPE GLOBAL DEFAULT UND tls_sym + +; LA64: Symbol table '.symtab' contains [[#]] entries: +; LA64-NEXT: Num: Value Size Type Bind Vis Ndx Name +; LA64: 0000000000000000 0 NOTYPE GLOBAL DEFAULT UND tls_sym + +@tls_sym = external thread_local(localexec) global i32 + +define dso_local signext i32 @test_tlsle() nounwind { +entry: + %0 = call ptr @llvm.threadlocal.address.p0(ptr @tls_sym) + %1 = load i32, ptr %0 + ret i32 %1 +} + +declare nonnull ptr @llvm.threadlocal.address.p0(ptr nonnull) From a311bc81d957c74739f39889329d13161673a696 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Martin=20Storsj=C3=B6?= Date: Thu, 20 Feb 2025 21:49:19 -0100 Subject: [PATCH 011/154] [llvm-dlltool] Implement the --identify option (#127465) This option prints the name of the DLL that gets imported, when linking against an import library. This is implemented using the same strategy as GNU dlltool does; looking for the contents of .idata$6 or .idata$7 chunks. The right section name to check for is chosen by identifying whether the library is GNU or LLVM style. In the case of GNU import libraries, the DLL name is in an .idata$7 chunk. However there are also other chunks with that section name (for entries for the IAT or ILT); identify these by looking for whether a chunk contains relocations. Alternatively, one could also just look for .idata$2 chunks, look for relocations at the right offset, and locate data at the symbol that the relocation points at (which may be in the same or in another object file). (cherry picked from commit dcc08a17c781a5066ab17b9791e1c455f7cedbf7) --- .../llvm-dlltool/DlltoolDriver.cpp | 146 +++++++++++++++++- llvm/lib/ToolDrivers/llvm-dlltool/Options.td | 5 + .../llvm-dlltool/Inputs/gnu_foo_lib_h.yaml | 133 ++++++++++++++++ .../Inputs/gnu_foo_lib_s00000.yaml | 116 ++++++++++++++ .../llvm-dlltool/Inputs/gnu_foo_lib_t.yaml | 119 ++++++++++++++ .../llvm-dlltool/Inputs/llvm_foo_dll_1.yaml | 69 +++++++++ .../llvm-dlltool/Inputs/llvm_foo_dll_2.yaml | 18 +++ .../llvm-dlltool/Inputs/llvm_foo_dll_3.yaml | 23 +++ llvm/test/tools/llvm-dlltool/identify.test | 69 +++++++++ 9 files changed, 697 insertions(+), 1 deletion(-) create mode 100644 llvm/test/tools/llvm-dlltool/Inputs/gnu_foo_lib_h.yaml create mode 100644 llvm/test/tools/llvm-dlltool/Inputs/gnu_foo_lib_s00000.yaml create mode 100644 llvm/test/tools/llvm-dlltool/Inputs/gnu_foo_lib_t.yaml create mode 100644 llvm/test/tools/llvm-dlltool/Inputs/llvm_foo_dll_1.yaml create mode 100644 llvm/test/tools/llvm-dlltool/Inputs/llvm_foo_dll_2.yaml create mode 100644 llvm/test/tools/llvm-dlltool/Inputs/llvm_foo_dll_3.yaml create mode 100644 llvm/test/tools/llvm-dlltool/identify.test diff --git a/llvm/lib/ToolDrivers/llvm-dlltool/DlltoolDriver.cpp b/llvm/lib/ToolDrivers/llvm-dlltool/DlltoolDriver.cpp index 1782e24287860..380fbd8b6fc6c 100644 --- a/llvm/lib/ToolDrivers/llvm-dlltool/DlltoolDriver.cpp +++ b/llvm/lib/ToolDrivers/llvm-dlltool/DlltoolDriver.cpp @@ -12,6 +12,7 @@ #include "llvm/ToolDrivers/llvm-dlltool/DlltoolDriver.h" #include "llvm/ADT/StringSwitch.h" +#include "llvm/Object/Archive.h" #include "llvm/Object/COFF.h" #include "llvm/Object/COFFImportFile.h" #include "llvm/Object/COFFModuleDefinition.h" @@ -158,6 +159,143 @@ bool parseModuleDefinition(StringRef DefFileName, MachineTypes Machine, return true; } +int printError(llvm::Error E, Twine File) { + if (!E) + return 0; + handleAllErrors(std::move(E), [&](const llvm::ErrorInfoBase &EIB) { + llvm::errs() << "error opening " << File << ": " << EIB.message() << "\n"; + }); + return 1; +} + +template +int forEachCoff(object::Archive &Archive, StringRef Name, Callable Callback) { + Error Err = Error::success(); + for (auto &C : Archive.children(Err)) { + Expected NameOrErr = C.getName(); + if (!NameOrErr) + return printError(NameOrErr.takeError(), Name); + StringRef Name = *NameOrErr; + + Expected ChildMB = C.getMemoryBufferRef(); + if (!ChildMB) + return printError(ChildMB.takeError(), Name); + + if (identify_magic(ChildMB->getBuffer()) == file_magic::coff_object) { + auto Obj = object::COFFObjectFile::create(*ChildMB); + if (!Obj) + return printError(Obj.takeError(), Name); + if (!Callback(*Obj->get(), Name)) + return 1; + } + } + if (Err) + return printError(std::move(Err), Name); + return 0; +} + +// To find the named of the imported DLL from an import library, we can either +// inspect the object files that form the import table entries, or we could +// just look at the archive member names, for MSVC style import libraries. +// Looking at the archive member names doesn't work for GNU style import +// libraries though, while inspecting the import table entries works for +// both. (MSVC style import libraries contain a couple regular object files +// for the header/trailers.) +// +// This implementation does the same as GNU dlltool does; look at the +// content of ".idata$7" sections, or for MSVC style libraries, look +// at ".idata$6" sections. +// +// For GNU style import libraries, there are also other data chunks in sections +// named ".idata$7" (entries to the IAT or ILT); these are distinguished +// by seeing that they contain relocations. (They also look like an empty +// string when looking for null termination.) +// +// Alternatively, we could do things differently - look for any .idata$2 +// section; this would be import directory entries. At offset 0xc in them +// there is the RVA of the import DLL name; look for a relocation at this +// spot and locate the symbol that it points at. That symbol may either +// be within the same object file (in the case of MSVC style import libraries) +// or another object file (in the case of GNU import libraries). +bool identifyImportName(const COFFObjectFile &Obj, StringRef ObjName, + std::vector &Names, bool IsMsStyleImplib) { + StringRef TargetName = IsMsStyleImplib ? ".idata$6" : ".idata$7"; + for (const auto &S : Obj.sections()) { + Expected NameOrErr = S.getName(); + if (!NameOrErr) { + printError(NameOrErr.takeError(), ObjName); + return false; + } + StringRef Name = *NameOrErr; + if (Name != TargetName) + continue; + + // GNU import libraries contain .idata$7 section in the per function + // objects too, but they contain relocations. + if (!IsMsStyleImplib && !S.relocations().empty()) + continue; + + Expected ContentsOrErr = S.getContents(); + if (!ContentsOrErr) { + printError(ContentsOrErr.takeError(), ObjName); + return false; + } + StringRef Contents = *ContentsOrErr; + Contents = Contents.substr(0, Contents.find('\0')); + if (Contents.empty()) + continue; + Names.push_back(Contents); + return true; + } + return true; +} + +int doIdentify(StringRef File, bool IdentifyStrict) { + ErrorOr> MaybeBuf = MemoryBuffer::getFile( + File, /*IsText=*/false, /*RequiredNullTerminator=*/false); + if (!MaybeBuf) + return printError(errorCodeToError(MaybeBuf.getError()), File); + if (identify_magic(MaybeBuf.get()->getBuffer()) != file_magic::archive) { + llvm::errs() << File << " is not a library\n"; + return 1; + } + + std::unique_ptr B = std::move(MaybeBuf.get()); + Error Err = Error::success(); + object::Archive Archive(B->getMemBufferRef(), Err); + if (Err) + return printError(std::move(Err), B->getBufferIdentifier()); + + bool IsMsStyleImplib = false; + for (const auto &S : Archive.symbols()) { + if (S.getName() == "__NULL_IMPORT_DESCRIPTOR") { + IsMsStyleImplib = true; + break; + } + } + std::vector Names; + if (forEachCoff(Archive, B->getBufferIdentifier(), + [&](const COFFObjectFile &Obj, StringRef ObjName) -> bool { + return identifyImportName(Obj, ObjName, Names, + IsMsStyleImplib); + })) + return 1; + + if (Names.empty()) { + llvm::errs() << "No DLL import name found in " << File << "\n"; + return 1; + } + if (Names.size() > 1 && IdentifyStrict) { + llvm::errs() << File << "contains imports for two or more DLLs\n"; + return 1; + } + + for (StringRef S : Names) + llvm::outs() << S << "\n"; + + return 0; +} + } // namespace int llvm::dlltoolDriverMain(llvm::ArrayRef ArgsArr) { @@ -173,7 +311,8 @@ int llvm::dlltoolDriverMain(llvm::ArrayRef ArgsArr) { // Handle when no input or output is specified if (Args.hasArgNoClaim(OPT_INPUT) || - (!Args.hasArgNoClaim(OPT_d) && !Args.hasArgNoClaim(OPT_l))) { + (!Args.hasArgNoClaim(OPT_d) && !Args.hasArgNoClaim(OPT_l) && + !Args.hasArgNoClaim(OPT_I))) { Table.printHelp(outs(), "llvm-dlltool [options] file...", "llvm-dlltool", false); llvm::outs() @@ -185,6 +324,11 @@ int llvm::dlltoolDriverMain(llvm::ArrayRef ArgsArr) { llvm::errs() << "ignoring unknown argument: " << Arg->getAsString(Args) << "\n"; + if (Args.hasArg(OPT_I)) { + return doIdentify(Args.getLastArg(OPT_I)->getValue(), + Args.hasArg(OPT_identify_strict)); + } + if (!Args.hasArg(OPT_d)) { llvm::errs() << "no definition file specified\n"; return 1; diff --git a/llvm/lib/ToolDrivers/llvm-dlltool/Options.td b/llvm/lib/ToolDrivers/llvm-dlltool/Options.td index 7810694c98e36..4fd80189aff29 100644 --- a/llvm/lib/ToolDrivers/llvm-dlltool/Options.td +++ b/llvm/lib/ToolDrivers/llvm-dlltool/Options.td @@ -21,6 +21,11 @@ def k_alias: Flag<["--"], "kill-at">, Alias; def no_leading_underscore: Flag<["--"], "no-leading-underscore">, HelpText<"Don't add leading underscores on symbols">; +def I: JoinedOrSeparate<["-"], "I">, HelpText<"Identify DLL name from import library">; +def I_long : JoinedOrSeparate<["--"], "identify">, Alias; + +def identify_strict : Flag<["--"], "identify-strict">, HelpText<"Error out if the --identify option detects more than one DLL">; + //============================================================================== // The flags below do nothing. They are defined only for dlltool compatibility. //============================================================================== diff --git a/llvm/test/tools/llvm-dlltool/Inputs/gnu_foo_lib_h.yaml b/llvm/test/tools/llvm-dlltool/Inputs/gnu_foo_lib_h.yaml new file mode 100644 index 0000000000000..26f3493d62143 --- /dev/null +++ b/llvm/test/tools/llvm-dlltool/Inputs/gnu_foo_lib_h.yaml @@ -0,0 +1,133 @@ +--- !COFF +header: + Machine: IMAGE_FILE_MACHINE_I386 + Characteristics: [ IMAGE_FILE_LINE_NUMS_STRIPPED, IMAGE_FILE_32BIT_MACHINE ] +sections: + - Name: .text + Characteristics: [ IMAGE_SCN_CNT_CODE, IMAGE_SCN_MEM_EXECUTE, IMAGE_SCN_MEM_READ ] + Alignment: 4 + SectionData: '' + - Name: .data + Characteristics: [ IMAGE_SCN_CNT_INITIALIZED_DATA, IMAGE_SCN_MEM_READ, IMAGE_SCN_MEM_WRITE ] + Alignment: 4 + SectionData: '' + - Name: .bss + Characteristics: [ IMAGE_SCN_CNT_UNINITIALIZED_DATA, IMAGE_SCN_MEM_READ, IMAGE_SCN_MEM_WRITE ] + Alignment: 4 + SectionData: '' + - Name: '.idata$2' + Characteristics: [ IMAGE_SCN_CNT_INITIALIZED_DATA, IMAGE_SCN_MEM_READ, IMAGE_SCN_MEM_WRITE ] + Alignment: 4 + SectionData: '0000000000000000000000000000000000000000' + SizeOfRawData: 20 + Relocations: + - VirtualAddress: 0 + SymbolName: '.idata$4' + Type: IMAGE_REL_I386_DIR32NB + - VirtualAddress: 12 + SymbolName: __foo_lib_iname + Type: IMAGE_REL_I386_DIR32NB + - VirtualAddress: 16 + SymbolName: '.idata$5' + Type: IMAGE_REL_I386_DIR32NB + - Name: '.idata$5' + Characteristics: [ IMAGE_SCN_CNT_INITIALIZED_DATA, IMAGE_SCN_MEM_READ, IMAGE_SCN_MEM_WRITE ] + Alignment: 4 + SectionData: '' + - Name: '.idata$4' + Characteristics: [ IMAGE_SCN_CNT_INITIALIZED_DATA, IMAGE_SCN_MEM_READ, IMAGE_SCN_MEM_WRITE ] + Alignment: 4 + SectionData: '' +symbols: + - Name: .file + Value: 0 + SectionNumber: -2 + SimpleType: IMAGE_SYM_TYPE_NULL + ComplexType: IMAGE_SYM_DTYPE_NULL + StorageClass: IMAGE_SYM_CLASS_FILE + File: fake + - Name: hname + Value: 0 + SectionNumber: 6 + SimpleType: IMAGE_SYM_TYPE_NULL + ComplexType: IMAGE_SYM_DTYPE_NULL + StorageClass: IMAGE_SYM_CLASS_STATIC + - Name: fthunk + Value: 0 + SectionNumber: 5 + SimpleType: IMAGE_SYM_TYPE_NULL + ComplexType: IMAGE_SYM_DTYPE_NULL + StorageClass: IMAGE_SYM_CLASS_STATIC + - Name: .text + Value: 0 + SectionNumber: 1 + SimpleType: IMAGE_SYM_TYPE_NULL + ComplexType: IMAGE_SYM_DTYPE_NULL + StorageClass: IMAGE_SYM_CLASS_STATIC + SectionDefinition: + Length: 0 + NumberOfRelocations: 0 + NumberOfLinenumbers: 0 + CheckSum: 0 + Number: 0 + - Name: .data + Value: 0 + SectionNumber: 2 + SimpleType: IMAGE_SYM_TYPE_NULL + ComplexType: IMAGE_SYM_DTYPE_NULL + StorageClass: IMAGE_SYM_CLASS_STATIC + SectionDefinition: + Length: 0 + NumberOfRelocations: 0 + NumberOfLinenumbers: 0 + CheckSum: 0 + Number: 0 + - Name: .bss + Value: 0 + SectionNumber: 3 + SimpleType: IMAGE_SYM_TYPE_NULL + ComplexType: IMAGE_SYM_DTYPE_NULL + StorageClass: IMAGE_SYM_CLASS_STATIC + SectionDefinition: + Length: 0 + NumberOfRelocations: 0 + NumberOfLinenumbers: 0 + CheckSum: 0 + Number: 0 + - Name: '.idata$2' + Value: 0 + SectionNumber: 4 + SimpleType: IMAGE_SYM_TYPE_NULL + ComplexType: IMAGE_SYM_DTYPE_NULL + StorageClass: IMAGE_SYM_CLASS_STATIC + SectionDefinition: + Length: 20 + NumberOfRelocations: 3 + NumberOfLinenumbers: 0 + CheckSum: 0 + Number: 0 + - Name: '.idata$4' + Value: 0 + SectionNumber: 6 + SimpleType: IMAGE_SYM_TYPE_NULL + ComplexType: IMAGE_SYM_DTYPE_NULL + StorageClass: IMAGE_SYM_CLASS_STATIC + - Name: '.idata$5' + Value: 0 + SectionNumber: 5 + SimpleType: IMAGE_SYM_TYPE_NULL + ComplexType: IMAGE_SYM_DTYPE_NULL + StorageClass: IMAGE_SYM_CLASS_STATIC + - Name: __head_foo_lib + Value: 0 + SectionNumber: 4 + SimpleType: IMAGE_SYM_TYPE_NULL + ComplexType: IMAGE_SYM_DTYPE_NULL + StorageClass: IMAGE_SYM_CLASS_EXTERNAL + - Name: __foo_lib_iname + Value: 0 + SectionNumber: 0 + SimpleType: IMAGE_SYM_TYPE_NULL + ComplexType: IMAGE_SYM_DTYPE_NULL + StorageClass: IMAGE_SYM_CLASS_EXTERNAL +... diff --git a/llvm/test/tools/llvm-dlltool/Inputs/gnu_foo_lib_s00000.yaml b/llvm/test/tools/llvm-dlltool/Inputs/gnu_foo_lib_s00000.yaml new file mode 100644 index 0000000000000..f09437fc99255 --- /dev/null +++ b/llvm/test/tools/llvm-dlltool/Inputs/gnu_foo_lib_s00000.yaml @@ -0,0 +1,116 @@ +--- !COFF +header: + Machine: IMAGE_FILE_MACHINE_I386 + Characteristics: [ IMAGE_FILE_LINE_NUMS_STRIPPED, IMAGE_FILE_32BIT_MACHINE ] +sections: + - Name: .text + Characteristics: [ IMAGE_SCN_CNT_CODE, IMAGE_SCN_MEM_EXECUTE, IMAGE_SCN_MEM_READ ] + Alignment: 4 + SectionData: FF25000000009090 + SizeOfRawData: 8 + Relocations: + - VirtualAddress: 2 + SymbolName: '.idata$5' + Type: IMAGE_REL_I386_DIR32 + - Name: .data + Characteristics: [ IMAGE_SCN_CNT_INITIALIZED_DATA, IMAGE_SCN_MEM_READ, IMAGE_SCN_MEM_WRITE ] + Alignment: 4 + SectionData: '' + - Name: .bss + Characteristics: [ IMAGE_SCN_CNT_UNINITIALIZED_DATA, IMAGE_SCN_MEM_READ, IMAGE_SCN_MEM_WRITE ] + Alignment: 4 + SectionData: '' + - Name: '.idata$7' + Characteristics: [ IMAGE_SCN_MEM_READ, IMAGE_SCN_MEM_WRITE ] + Alignment: 4 + SectionData: '00000000' + SizeOfRawData: 4 + Relocations: + - VirtualAddress: 0 + SymbolName: __head_foo_lib + Type: IMAGE_REL_I386_DIR32NB + - Name: '.idata$5' + Characteristics: [ IMAGE_SCN_MEM_READ, IMAGE_SCN_MEM_WRITE ] + Alignment: 4 + SectionData: '00000000' + SizeOfRawData: 4 + Relocations: + - VirtualAddress: 0 + SymbolName: '.idata$6' + Type: IMAGE_REL_I386_DIR32NB + - Name: '.idata$4' + Characteristics: [ IMAGE_SCN_MEM_READ, IMAGE_SCN_MEM_WRITE ] + Alignment: 4 + SectionData: '00000000' + SizeOfRawData: 4 + Relocations: + - VirtualAddress: 0 + SymbolName: '.idata$6' + Type: IMAGE_REL_I386_DIR32NB + - Name: '.idata$6' + Characteristics: [ IMAGE_SCN_MEM_READ, IMAGE_SCN_MEM_WRITE ] + Alignment: 2 + SectionData: '010066756E633100' + SizeOfRawData: 8 +symbols: + - Name: .text + Value: 0 + SectionNumber: 1 + SimpleType: IMAGE_SYM_TYPE_NULL + ComplexType: IMAGE_SYM_DTYPE_NULL + StorageClass: IMAGE_SYM_CLASS_STATIC + - Name: .data + Value: 0 + SectionNumber: 2 + SimpleType: IMAGE_SYM_TYPE_NULL + ComplexType: IMAGE_SYM_DTYPE_NULL + StorageClass: IMAGE_SYM_CLASS_STATIC + - Name: .bss + Value: 0 + SectionNumber: 3 + SimpleType: IMAGE_SYM_TYPE_NULL + ComplexType: IMAGE_SYM_DTYPE_NULL + StorageClass: IMAGE_SYM_CLASS_STATIC + - Name: '.idata$7' + Value: 0 + SectionNumber: 4 + SimpleType: IMAGE_SYM_TYPE_NULL + ComplexType: IMAGE_SYM_DTYPE_NULL + StorageClass: IMAGE_SYM_CLASS_STATIC + - Name: '.idata$5' + Value: 0 + SectionNumber: 5 + SimpleType: IMAGE_SYM_TYPE_NULL + ComplexType: IMAGE_SYM_DTYPE_NULL + StorageClass: IMAGE_SYM_CLASS_STATIC + - Name: '.idata$4' + Value: 0 + SectionNumber: 6 + SimpleType: IMAGE_SYM_TYPE_NULL + ComplexType: IMAGE_SYM_DTYPE_NULL + StorageClass: IMAGE_SYM_CLASS_STATIC + - Name: '.idata$6' + Value: 0 + SectionNumber: 7 + SimpleType: IMAGE_SYM_TYPE_NULL + ComplexType: IMAGE_SYM_DTYPE_NULL + StorageClass: IMAGE_SYM_CLASS_STATIC + - Name: _func1 + Value: 0 + SectionNumber: 1 + SimpleType: IMAGE_SYM_TYPE_NULL + ComplexType: IMAGE_SYM_DTYPE_NULL + StorageClass: IMAGE_SYM_CLASS_EXTERNAL + - Name: __imp__func1 + Value: 0 + SectionNumber: 5 + SimpleType: IMAGE_SYM_TYPE_NULL + ComplexType: IMAGE_SYM_DTYPE_NULL + StorageClass: IMAGE_SYM_CLASS_EXTERNAL + - Name: __head_foo_lib + Value: 0 + SectionNumber: 0 + SimpleType: IMAGE_SYM_TYPE_NULL + ComplexType: IMAGE_SYM_DTYPE_NULL + StorageClass: IMAGE_SYM_CLASS_EXTERNAL +... diff --git a/llvm/test/tools/llvm-dlltool/Inputs/gnu_foo_lib_t.yaml b/llvm/test/tools/llvm-dlltool/Inputs/gnu_foo_lib_t.yaml new file mode 100644 index 0000000000000..e4465293bec1a --- /dev/null +++ b/llvm/test/tools/llvm-dlltool/Inputs/gnu_foo_lib_t.yaml @@ -0,0 +1,119 @@ +--- !COFF +header: + Machine: IMAGE_FILE_MACHINE_I386 + Characteristics: [ IMAGE_FILE_RELOCS_STRIPPED, IMAGE_FILE_LINE_NUMS_STRIPPED, IMAGE_FILE_32BIT_MACHINE ] +sections: + - Name: .text + Characteristics: [ IMAGE_SCN_CNT_CODE, IMAGE_SCN_MEM_EXECUTE, IMAGE_SCN_MEM_READ ] + Alignment: 4 + SectionData: '' + - Name: .data + Characteristics: [ IMAGE_SCN_CNT_INITIALIZED_DATA, IMAGE_SCN_MEM_READ, IMAGE_SCN_MEM_WRITE ] + Alignment: 4 + SectionData: '' + - Name: .bss + Characteristics: [ IMAGE_SCN_CNT_UNINITIALIZED_DATA, IMAGE_SCN_MEM_READ, IMAGE_SCN_MEM_WRITE ] + Alignment: 4 + SectionData: '' + - Name: '.idata$4' + Characteristics: [ IMAGE_SCN_CNT_INITIALIZED_DATA, IMAGE_SCN_MEM_READ, IMAGE_SCN_MEM_WRITE ] + Alignment: 4 + SectionData: '00000000' + SizeOfRawData: 4 + - Name: '.idata$5' + Characteristics: [ IMAGE_SCN_CNT_INITIALIZED_DATA, IMAGE_SCN_MEM_READ, IMAGE_SCN_MEM_WRITE ] + Alignment: 4 + SectionData: '00000000' + SizeOfRawData: 4 + - Name: '.idata$7' + Characteristics: [ IMAGE_SCN_CNT_INITIALIZED_DATA, IMAGE_SCN_MEM_READ, IMAGE_SCN_MEM_WRITE ] + Alignment: 4 + SectionData: 666F6F2E646C6C00 + SizeOfRawData: 8 +symbols: + - Name: .file + Value: 0 + SectionNumber: -2 + SimpleType: IMAGE_SYM_TYPE_NULL + ComplexType: IMAGE_SYM_DTYPE_NULL + StorageClass: IMAGE_SYM_CLASS_FILE + File: fake + - Name: .text + Value: 0 + SectionNumber: 1 + SimpleType: IMAGE_SYM_TYPE_NULL + ComplexType: IMAGE_SYM_DTYPE_NULL + StorageClass: IMAGE_SYM_CLASS_STATIC + SectionDefinition: + Length: 0 + NumberOfRelocations: 0 + NumberOfLinenumbers: 0 + CheckSum: 0 + Number: 0 + - Name: .data + Value: 0 + SectionNumber: 2 + SimpleType: IMAGE_SYM_TYPE_NULL + ComplexType: IMAGE_SYM_DTYPE_NULL + StorageClass: IMAGE_SYM_CLASS_STATIC + SectionDefinition: + Length: 0 + NumberOfRelocations: 0 + NumberOfLinenumbers: 0 + CheckSum: 0 + Number: 0 + - Name: .bss + Value: 0 + SectionNumber: 3 + SimpleType: IMAGE_SYM_TYPE_NULL + ComplexType: IMAGE_SYM_DTYPE_NULL + StorageClass: IMAGE_SYM_CLASS_STATIC + SectionDefinition: + Length: 0 + NumberOfRelocations: 0 + NumberOfLinenumbers: 0 + CheckSum: 0 + Number: 0 + - Name: '.idata$4' + Value: 0 + SectionNumber: 4 + SimpleType: IMAGE_SYM_TYPE_NULL + ComplexType: IMAGE_SYM_DTYPE_NULL + StorageClass: IMAGE_SYM_CLASS_STATIC + SectionDefinition: + Length: 4 + NumberOfRelocations: 0 + NumberOfLinenumbers: 0 + CheckSum: 0 + Number: 0 + - Name: '.idata$5' + Value: 0 + SectionNumber: 5 + SimpleType: IMAGE_SYM_TYPE_NULL + ComplexType: IMAGE_SYM_DTYPE_NULL + StorageClass: IMAGE_SYM_CLASS_STATIC + SectionDefinition: + Length: 4 + NumberOfRelocations: 0 + NumberOfLinenumbers: 0 + CheckSum: 0 + Number: 0 + - Name: '.idata$7' + Value: 0 + SectionNumber: 6 + SimpleType: IMAGE_SYM_TYPE_NULL + ComplexType: IMAGE_SYM_DTYPE_NULL + StorageClass: IMAGE_SYM_CLASS_STATIC + SectionDefinition: + Length: 8 + NumberOfRelocations: 0 + NumberOfLinenumbers: 0 + CheckSum: 0 + Number: 0 + - Name: __foo_lib_iname + Value: 0 + SectionNumber: 6 + SimpleType: IMAGE_SYM_TYPE_NULL + ComplexType: IMAGE_SYM_DTYPE_NULL + StorageClass: IMAGE_SYM_CLASS_EXTERNAL +... diff --git a/llvm/test/tools/llvm-dlltool/Inputs/llvm_foo_dll_1.yaml b/llvm/test/tools/llvm-dlltool/Inputs/llvm_foo_dll_1.yaml new file mode 100644 index 0000000000000..f3f669d63bcad --- /dev/null +++ b/llvm/test/tools/llvm-dlltool/Inputs/llvm_foo_dll_1.yaml @@ -0,0 +1,69 @@ +--- !COFF +header: + Machine: IMAGE_FILE_MACHINE_AMD64 + Characteristics: [ ] +sections: + - Name: '.idata$2' + Characteristics: [ IMAGE_SCN_CNT_INITIALIZED_DATA, IMAGE_SCN_MEM_READ, IMAGE_SCN_MEM_WRITE ] + Alignment: 4 + SectionData: '0000000000000000000000000000000000000000' + SizeOfRawData: 20 + Relocations: + - VirtualAddress: 12 + SymbolName: '.idata$6' + Type: IMAGE_REL_AMD64_ADDR32NB + - VirtualAddress: 0 + SymbolName: '.idata$4' + Type: IMAGE_REL_AMD64_ADDR32NB + - VirtualAddress: 16 + SymbolName: '.idata$5' + Type: IMAGE_REL_AMD64_ADDR32NB + - Name: '.idata$6' + Characteristics: [ IMAGE_SCN_CNT_INITIALIZED_DATA, IMAGE_SCN_MEM_READ, IMAGE_SCN_MEM_WRITE ] + Alignment: 2 + SectionData: 666F6F2E646C6C00 + SizeOfRawData: 8 +symbols: + - Name: __IMPORT_DESCRIPTOR_foo + Value: 0 + SectionNumber: 1 + SimpleType: IMAGE_SYM_TYPE_NULL + ComplexType: IMAGE_SYM_DTYPE_NULL + StorageClass: IMAGE_SYM_CLASS_EXTERNAL + - Name: '.idata$2' + Value: 0 + SectionNumber: 1 + SimpleType: IMAGE_SYM_TYPE_NULL + ComplexType: IMAGE_SYM_DTYPE_NULL + StorageClass: IMAGE_SYM_CLASS_SECTION + - Name: '.idata$6' + Value: 0 + SectionNumber: 2 + SimpleType: IMAGE_SYM_TYPE_NULL + ComplexType: IMAGE_SYM_DTYPE_NULL + StorageClass: IMAGE_SYM_CLASS_STATIC + - Name: '.idata$4' + Value: 0 + SectionNumber: 0 + SimpleType: IMAGE_SYM_TYPE_NULL + ComplexType: IMAGE_SYM_DTYPE_NULL + StorageClass: IMAGE_SYM_CLASS_SECTION + - Name: '.idata$5' + Value: 0 + SectionNumber: 0 + SimpleType: IMAGE_SYM_TYPE_NULL + ComplexType: IMAGE_SYM_DTYPE_NULL + StorageClass: IMAGE_SYM_CLASS_SECTION + - Name: __NULL_IMPORT_DESCRIPTOR + Value: 0 + SectionNumber: 0 + SimpleType: IMAGE_SYM_TYPE_NULL + ComplexType: IMAGE_SYM_DTYPE_NULL + StorageClass: IMAGE_SYM_CLASS_EXTERNAL + - Name: "foo_NULL_THUNK_DATA" + Value: 0 + SectionNumber: 0 + SimpleType: IMAGE_SYM_TYPE_NULL + ComplexType: IMAGE_SYM_DTYPE_NULL + StorageClass: IMAGE_SYM_CLASS_EXTERNAL +... diff --git a/llvm/test/tools/llvm-dlltool/Inputs/llvm_foo_dll_2.yaml b/llvm/test/tools/llvm-dlltool/Inputs/llvm_foo_dll_2.yaml new file mode 100644 index 0000000000000..26b601fb74c54 --- /dev/null +++ b/llvm/test/tools/llvm-dlltool/Inputs/llvm_foo_dll_2.yaml @@ -0,0 +1,18 @@ +--- !COFF +header: + Machine: IMAGE_FILE_MACHINE_AMD64 + Characteristics: [ ] +sections: + - Name: '.idata$3' + Characteristics: [ IMAGE_SCN_CNT_INITIALIZED_DATA, IMAGE_SCN_MEM_READ, IMAGE_SCN_MEM_WRITE ] + Alignment: 4 + SectionData: '0000000000000000000000000000000000000000' + SizeOfRawData: 20 +symbols: + - Name: __NULL_IMPORT_DESCRIPTOR + Value: 0 + SectionNumber: 1 + SimpleType: IMAGE_SYM_TYPE_NULL + ComplexType: IMAGE_SYM_DTYPE_NULL + StorageClass: IMAGE_SYM_CLASS_EXTERNAL +... diff --git a/llvm/test/tools/llvm-dlltool/Inputs/llvm_foo_dll_3.yaml b/llvm/test/tools/llvm-dlltool/Inputs/llvm_foo_dll_3.yaml new file mode 100644 index 0000000000000..68248597cbaeb --- /dev/null +++ b/llvm/test/tools/llvm-dlltool/Inputs/llvm_foo_dll_3.yaml @@ -0,0 +1,23 @@ +--- !COFF +header: + Machine: IMAGE_FILE_MACHINE_AMD64 + Characteristics: [ ] +sections: + - Name: '.idata$5' + Characteristics: [ IMAGE_SCN_CNT_INITIALIZED_DATA, IMAGE_SCN_MEM_READ, IMAGE_SCN_MEM_WRITE ] + Alignment: 8 + SectionData: '0000000000000000' + SizeOfRawData: 8 + - Name: '.idata$4' + Characteristics: [ IMAGE_SCN_CNT_INITIALIZED_DATA, IMAGE_SCN_MEM_READ, IMAGE_SCN_MEM_WRITE ] + Alignment: 8 + SectionData: '0000000000000000' + SizeOfRawData: 8 +symbols: + - Name: "foo_NULL_THUNK_DATA" + Value: 0 + SectionNumber: 1 + SimpleType: IMAGE_SYM_TYPE_NULL + ComplexType: IMAGE_SYM_DTYPE_NULL + StorageClass: IMAGE_SYM_CLASS_EXTERNAL +... diff --git a/llvm/test/tools/llvm-dlltool/identify.test b/llvm/test/tools/llvm-dlltool/identify.test new file mode 100644 index 0000000000000..eb2792a8e41ae --- /dev/null +++ b/llvm/test/tools/llvm-dlltool/identify.test @@ -0,0 +1,69 @@ +Test the -I / --identify option. + +Test with both GNU style and LLVM style import libraries; using +sources from yaml to preserve the checking behaviour even if the +output of llvm-dlltool itself would change. + +RUN: rm -rf %t && mkdir -p %t +RUN: split-file %s %t + +RUN: yaml2obj %S/Inputs/gnu_foo_lib_h.yaml > %t/gnu_foo_lib_h.o +RUN: yaml2obj %S/Inputs/gnu_foo_lib_s00000.yaml > %t/gnu_foo_lib_s00000.o +RUN: yaml2obj %S/Inputs/gnu_foo_lib_t.yaml > %t/gnu_foo_lib_t.o +RUN: llvm-ar rcs %t/gnu.a %t/gnu_foo_lib_h.o %t/gnu_foo_lib_s00000.o %t/gnu_foo_lib_t.o + +RUN: yaml2obj %S/Inputs/llvm_foo_dll_1.yaml > %t/llvm_foo_dll_1.o +RUN: yaml2obj %S/Inputs/llvm_foo_dll_2.yaml > %t/llvm_foo_dll_2.o +RUN: yaml2obj %S/Inputs/llvm_foo_dll_3.yaml > %t/llvm_foo_dll_3.o +RUN: llvm-ar rcs %t/llvm.a %t/llvm_foo_dll_1.o %t/llvm_foo_dll_2.o %t/llvm_foo_dll_3.o + + +Check that we can identify the DLL name from a GNU style import library. + +RUN: llvm-dlltool -I %t/gnu.a | FileCheck --check-prefix=FOO %s +RUN: llvm-dlltool --identify %t/gnu.a | count 1 + +FOO: foo.dll + + +Check that we successfully can identify run while passing the +--identify-strict option. + +RUN: llvm-dlltool -I %t/gnu.a --identify-strict | FileCheck --check-prefix=FOO %s + + +Check that we can identify the DLL name from an LLVM style import library. + +RUN: llvm-dlltool -I %t/llvm.a | FileCheck --check-prefix=FOO %s +RUN: llvm-dlltool -I %t/llvm.a | count 1 + + +Check that we can identify the DLL names from an import library that +contains imports for multiple DLLs. + +RUN: llvm-dlltool -m i386:x86-64 -d %t/lib1.def -l %t/lib1.a +RUN: llvm-dlltool -m i386:x86-64 -d %t/lib2.def -l %t/lib2.a +RUN: llvm-ar qcsL %t/merged.a %t/lib1.a %t/lib2.a + +RUN: llvm-dlltool -I %t/merged.a | FileCheck --check-prefix=MERGED %s + +MERGED-DAG: lib1.dll +MERGED-DAG: lib2.dll + +Check that --identify-strict fails this case, when there are multiple +outputs. + +RUN: not llvm-dlltool -I %t/merged.a --identify-strict 2>&1 | FileCheck --check-prefix=ERROR %s + +ERROR: contains imports for two or more DLLs + + +#--- lib1.def +LIBRARY lib1.dll +EXPORTS + func1 + +#--- lib2.def +LIBRARY lib2.dll +EXPORTS + func2 From f7b6f23c6bb7a1276b8de569014672b7d29c2b7e Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Martin=20Storsj=C3=B6?= Date: Fri, 21 Feb 2025 01:20:29 +0200 Subject: [PATCH 012/154] [llvm-dlltool] Add a missing dependency This was missed in dcc08a17c781a5066ab17b9791e1c455f7cedbf7. (cherry picked from commit 1ca93b15482d3bfa1560b35960ab46fea65b3074) --- llvm/lib/ToolDrivers/llvm-dlltool/CMakeLists.txt | 1 + 1 file changed, 1 insertion(+) diff --git a/llvm/lib/ToolDrivers/llvm-dlltool/CMakeLists.txt b/llvm/lib/ToolDrivers/llvm-dlltool/CMakeLists.txt index 855ae5f048ff7..5db08e7852d03 100644 --- a/llvm/lib/ToolDrivers/llvm-dlltool/CMakeLists.txt +++ b/llvm/lib/ToolDrivers/llvm-dlltool/CMakeLists.txt @@ -6,6 +6,7 @@ add_llvm_component_library(LLVMDlltoolDriver DlltoolDriver.cpp LINK_COMPONENTS + BinaryFormat Object Option Support From c86df914dee1b48ff54dda0eaec87f59bad069bb Mon Sep 17 00:00:00 2001 From: Younan Zhang Date: Sat, 22 Mar 2025 22:55:58 +0800 Subject: [PATCH 013/154] release/20.x: [Clang] Fix various bugs in alias CTAD transform This patch cherry-picks 032ad590d6, 868c89ff0 and 38d71c9bd onto the 20 release branch. The first patch addresses recently surfaced CTAD problems, which we believe it would be nice to roll out the fix quickly, given the release window is not closed yet. The second patch is a follow-up to the first and fixed a test failure on the arm32 platform. The third patch follows-up on the previous patch that I cherry-picked to the 20 release branch, which removes a unnecessary assertion. --- clang/docs/ReleaseNotes.rst | 2 + clang/lib/Sema/SemaTemplateDeductionGuide.cpp | 28 ++-- clang/lib/Sema/SemaTemplateInstantiate.cpp | 46 ++++-- clang/lib/Sema/TreeTransform.h | 84 ++++++----- clang/test/SemaCXX/ctad.cpp | 132 +++++++++++++++++- clang/test/SemaTemplate/deduction-guide.cpp | 48 +++++++ 6 files changed, 283 insertions(+), 57 deletions(-) diff --git a/clang/docs/ReleaseNotes.rst b/clang/docs/ReleaseNotes.rst index 03b68271b7864..955325026f369 100644 --- a/clang/docs/ReleaseNotes.rst +++ b/clang/docs/ReleaseNotes.rst @@ -1060,6 +1060,8 @@ Bug Fixes to C++ Support - Clang is now better at keeping track of friend function template instance contexts. (#GH55509) - Fixed an integer overflow bug in computing template parameter depths when synthesizing CTAD guides. (#GH128691) - Fixed an incorrect pointer access when checking access-control on concepts. (#GH131530) +- Fixed various alias CTAD bugs involving variadic template arguments. (#GH123591), (#GH127539), (#GH129077), + (#GH129620), and (#GH129998). Bug Fixes to AST Handling ^^^^^^^^^^^^^^^^^^^^^^^^^ diff --git a/clang/lib/Sema/SemaTemplateDeductionGuide.cpp b/clang/lib/Sema/SemaTemplateDeductionGuide.cpp index b424de9c8a945..6728857edc6d8 100644 --- a/clang/lib/Sema/SemaTemplateDeductionGuide.cpp +++ b/clang/lib/Sema/SemaTemplateDeductionGuide.cpp @@ -377,12 +377,10 @@ struct ConvertConstructorToDeductionGuideTransform { if (NestedPattern) Args.addOuterRetainedLevels(NestedPattern->getTemplateDepth()); auto [Depth, Index] = getDepthAndIndex(Param); - // Depth can still be 0 if FTD belongs to an explicit class template - // specialization with an empty template parameter list. In that case, - // we don't want the NewDepth to overflow, and it should remain 0. - assert(Depth || - cast(FTD->getDeclContext()) - ->isExplicitSpecialization()); + // Depth can be 0 if FTD belongs to a non-template class/a class + // template specialization with an empty template parameter list. In + // that case, we don't want the NewDepth to overflow, and it should + // remain 0. NamedDecl *NewParam = transformTemplateParameter( SemaRef, DC, Param, Args, Index + Depth1IndexAdjustment, Depth ? Depth - 1 : 0); @@ -989,6 +987,19 @@ getRHSTemplateDeclAndArgs(Sema &SemaRef, TypeAliasTemplateDecl *AliasTemplate) { return {Template, AliasRhsTemplateArgs}; } +bool IsNonDeducedArgument(const TemplateArgument &TA) { + // The following cases indicate the template argument is non-deducible: + // 1. The result is null. E.g. When it comes from a default template + // argument that doesn't appear in the alias declaration. + // 2. The template parameter is a pack and that cannot be deduced from + // the arguments within the alias declaration. + // Non-deducible template parameters will persist in the transformed + // deduction guide. + return TA.isNull() || + (TA.getKind() == TemplateArgument::Pack && + llvm::any_of(TA.pack_elements(), IsNonDeducedArgument)); +} + // Build deduction guides for a type alias template from the given underlying // deduction guide F. FunctionTemplateDecl * @@ -1057,7 +1068,8 @@ BuildDeductionGuideForTypeAlias(Sema &SemaRef, // !!NOTE: DeduceResults respects the sequence of template parameters of // the deduction guide f. for (unsigned Index = 0; Index < DeduceResults.size(); ++Index) { - if (const auto &D = DeduceResults[Index]; !D.isNull()) // Deduced + const auto &D = DeduceResults[Index]; + if (!IsNonDeducedArgument(D)) DeducedArgs.push_back(D); else NonDeducedTemplateParamsInFIndex.push_back(Index); @@ -1121,7 +1133,7 @@ BuildDeductionGuideForTypeAlias(Sema &SemaRef, Args.addOuterTemplateArguments(TransformedDeducedAliasArgs); for (unsigned Index = 0; Index < DeduceResults.size(); ++Index) { const auto &D = DeduceResults[Index]; - if (D.isNull()) { + if (IsNonDeducedArgument(D)) { // 2): Non-deduced template parameters would be substituted later. continue; } diff --git a/clang/lib/Sema/SemaTemplateInstantiate.cpp b/clang/lib/Sema/SemaTemplateInstantiate.cpp index cf29d8a101b43..73567f3be814d 100644 --- a/clang/lib/Sema/SemaTemplateInstantiate.cpp +++ b/clang/lib/Sema/SemaTemplateInstantiate.cpp @@ -1347,6 +1347,16 @@ std::optional Sema::isSFINAEContext() const { return std::nullopt; } +static TemplateArgument +getPackSubstitutedTemplateArgument(Sema &S, TemplateArgument Arg) { + assert(S.ArgumentPackSubstitutionIndex >= 0); + assert(S.ArgumentPackSubstitutionIndex < (int)Arg.pack_size()); + Arg = Arg.pack_begin()[S.ArgumentPackSubstitutionIndex]; + if (Arg.isPackExpansion()) + Arg = Arg.getPackExpansionPattern(); + return Arg; +} + //===----------------------------------------------------------------------===/ // Template Instantiation for Types //===----------------------------------------------------------------------===/ @@ -1466,11 +1476,13 @@ namespace { } } - static TemplateArgument + TemplateArgument getTemplateArgumentPackPatternForRewrite(const TemplateArgument &TA) { if (TA.getKind() != TemplateArgument::Pack) return TA; - assert(TA.pack_size() == 1 && + if (SemaRef.ArgumentPackSubstitutionIndex != -1) + return getPackSubstitutedTemplateArgument(SemaRef, TA); + assert(TA.pack_size() == 1 && TA.pack_begin()->isPackExpansion() && "unexpected pack arguments in template rewrite"); TemplateArgument Arg = *TA.pack_begin(); if (Arg.isPackExpansion()) @@ -1629,6 +1641,9 @@ namespace { std::vector TArgs; switch (Arg.getKind()) { case TemplateArgument::Pack: + assert(SemaRef.CodeSynthesisContexts.empty() || + SemaRef.CodeSynthesisContexts.back().Kind == + Sema::CodeSynthesisContext::BuildingDeductionGuides); // Literally rewrite the template argument pack, instead of unpacking // it. for (auto &pack : Arg.getPackAsArray()) { @@ -1649,6 +1664,23 @@ namespace { return inherited::TransformTemplateArgument(Input, Output, Uneval); } + std::optional ComputeSizeOfPackExprWithoutSubstitution( + ArrayRef PackArgs) { + // Don't do this when rewriting template parameters for CTAD: + // 1) The heuristic needs the unpacked Subst* nodes to figure out the + // expanded size, but this never applies since Subst* nodes are not + // created in rewrite scenarios. + // + // 2) The heuristic substitutes into the pattern with pack expansion + // suppressed, which does not meet the requirements for argument + // rewriting when template arguments include a non-pack matching against + // a pack, particularly when rewriting an alias CTAD. + if (TemplateArgs.isRewrite()) + return std::nullopt; + + return inherited::ComputeSizeOfPackExprWithoutSubstitution(PackArgs); + } + template QualType TransformFunctionProtoType(TypeLocBuilder &TLB, FunctionProtoTypeLoc TL, @@ -1867,16 +1899,6 @@ bool TemplateInstantiator::AlreadyTransformed(QualType T) { return true; } -static TemplateArgument -getPackSubstitutedTemplateArgument(Sema &S, TemplateArgument Arg) { - assert(S.ArgumentPackSubstitutionIndex >= 0); - assert(S.ArgumentPackSubstitutionIndex < (int)Arg.pack_size()); - Arg = Arg.pack_begin()[S.ArgumentPackSubstitutionIndex]; - if (Arg.isPackExpansion()) - Arg = Arg.getPackExpansionPattern(); - return Arg; -} - Decl *TemplateInstantiator::TransformDecl(SourceLocation Loc, Decl *D) { if (!D) return nullptr; diff --git a/clang/lib/Sema/TreeTransform.h b/clang/lib/Sema/TreeTransform.h index 2a5e354ff716a..3e8f0ec485e9b 100644 --- a/clang/lib/Sema/TreeTransform.h +++ b/clang/lib/Sema/TreeTransform.h @@ -3660,6 +3660,9 @@ class TreeTransform { return SemaRef.BuildCXXNoexceptExpr(Range.getBegin(), Arg, Range.getEnd()); } + std::optional + ComputeSizeOfPackExprWithoutSubstitution(ArrayRef PackArgs); + /// Build a new expression to compute the length of a parameter pack. ExprResult RebuildSizeOfPackExpr(SourceLocation OperatorLoc, NamedDecl *Pack, SourceLocation PackLoc, @@ -15877,6 +15880,49 @@ TreeTransform::TransformPackExpansionExpr(PackExpansionExpr *E) { E->getNumExpansions()); } +template +std::optional +TreeTransform::ComputeSizeOfPackExprWithoutSubstitution( + ArrayRef PackArgs) { + std::optional Result = 0; + for (const TemplateArgument &Arg : PackArgs) { + if (!Arg.isPackExpansion()) { + Result = *Result + 1; + continue; + } + + TemplateArgumentLoc ArgLoc; + InventTemplateArgumentLoc(Arg, ArgLoc); + + // Find the pattern of the pack expansion. + SourceLocation Ellipsis; + std::optional OrigNumExpansions; + TemplateArgumentLoc Pattern = + getSema().getTemplateArgumentPackExpansionPattern(ArgLoc, Ellipsis, + OrigNumExpansions); + + // Substitute under the pack expansion. Do not expand the pack (yet). + TemplateArgumentLoc OutPattern; + Sema::ArgumentPackSubstitutionIndexRAII SubstIndex(getSema(), -1); + if (getDerived().TransformTemplateArgument(Pattern, OutPattern, + /*Uneval*/ true)) + return true; + + // See if we can determine the number of arguments from the result. + std::optional NumExpansions = + getSema().getFullyPackExpandedSize(OutPattern.getArgument()); + if (!NumExpansions) { + // No: we must be in an alias template expansion, and we're going to + // need to actually expand the packs. + Result = std::nullopt; + break; + } + + Result = *Result + *NumExpansions; + } + return Result; +} + template ExprResult TreeTransform::TransformSizeOfPackExpr(SizeOfPackExpr *E) { @@ -15942,42 +15988,8 @@ TreeTransform::TransformSizeOfPackExpr(SizeOfPackExpr *E) { } // Try to compute the result without performing a partial substitution. - std::optional Result = 0; - for (const TemplateArgument &Arg : PackArgs) { - if (!Arg.isPackExpansion()) { - Result = *Result + 1; - continue; - } - - TemplateArgumentLoc ArgLoc; - InventTemplateArgumentLoc(Arg, ArgLoc); - - // Find the pattern of the pack expansion. - SourceLocation Ellipsis; - std::optional OrigNumExpansions; - TemplateArgumentLoc Pattern = - getSema().getTemplateArgumentPackExpansionPattern(ArgLoc, Ellipsis, - OrigNumExpansions); - - // Substitute under the pack expansion. Do not expand the pack (yet). - TemplateArgumentLoc OutPattern; - Sema::ArgumentPackSubstitutionIndexRAII SubstIndex(getSema(), -1); - if (getDerived().TransformTemplateArgument(Pattern, OutPattern, - /*Uneval*/ true)) - return true; - - // See if we can determine the number of arguments from the result. - std::optional NumExpansions = - getSema().getFullyPackExpandedSize(OutPattern.getArgument()); - if (!NumExpansions) { - // No: we must be in an alias template expansion, and we're going to need - // to actually expand the packs. - Result = std::nullopt; - break; - } - - Result = *Result + *NumExpansions; - } + std::optional Result = + getDerived().ComputeSizeOfPackExprWithoutSubstitution(PackArgs); // Common case: we could determine the number of expansions without // substituting. diff --git a/clang/test/SemaCXX/ctad.cpp b/clang/test/SemaCXX/ctad.cpp index 10806f107b4ee..00a861d0f567c 100644 --- a/clang/test/SemaCXX/ctad.cpp +++ b/clang/test/SemaCXX/ctad.cpp @@ -1,5 +1,4 @@ // RUN: %clang_cc1 -fsyntax-only -verify -Wno-unused-value -std=c++20 %s -// expected-no-diagnostics namespace GH64347 { @@ -17,3 +16,134 @@ void k() { } } // namespace GH64347 + +namespace GH123591 { + + +template < typename... _Types > +struct variant { + template + variant(_Types...); +}; + +template +using AstNode = variant; + +AstNode tree(42, 43, 44); + +} + +namespace GH123591_2 { + +template +using enable_if_t = char; + +template < typename... Types > +struct variant { + template < enable_if_t> + variant(); +}; + +template +using AstNode = variant<>; +// expected-note@-1 {{couldn't infer template argument ''}} \ +// expected-note@-1 2{{implicit deduction guide declared as}} \ +// expected-note@-1 {{candidate function template not viable}} + + +AstNode tree; // expected-error {{no viable constructor or deduction guide}} + +} + +namespace GH127539 { + +template +struct A { + template + A(ArgTs...) {} +}; + +template +A(ArgTs...) -> A; + +template +using AA = A; + +AA a{}; + +} + +namespace GH129077 { + +using size_t = decltype(sizeof(0)); + +struct index_type +{ + size_t value = 0; + index_type() = default; + constexpr index_type(size_t i) noexcept : value(i) {} +}; + +template +struct extents +{ + constexpr extents(decltype(Extents)...) noexcept {} +}; + +template +extents(Extents...) -> extents<(requires { Extents::value; } ? Extents{} : ~0ull)...>; + +template +using index = extents; + +int main() +{ + extents i{0,0}; + auto j = extents<64,{}>({}, 42); + + index k{0,0}; + auto l = index<64,{}>({}, 42); + + return 0; +} + +} + +namespace GH129620 { + +template +struct A { + constexpr A(Ts...) {} +}; + +template +using Foo = A; + +template +using Bar = Foo; + +Bar a{0, 0}; + +} + +namespace GH129998 { + +struct converible_to_one { + constexpr operator int() const noexcept { return 1; } +}; + +template +struct class_template { + class_template() = default; + constexpr class_template(auto&&...) noexcept {} +}; + +template +class_template(Extents...) -> class_template<(true ? 0 : +Extents{})...>; + +template +using alias_template = class_template; + +alias_template var2{converible_to_one{}, 2}; + +} diff --git a/clang/test/SemaTemplate/deduction-guide.cpp b/clang/test/SemaTemplate/deduction-guide.cpp index ecd152abebd74..6db132ca37c7e 100644 --- a/clang/test/SemaTemplate/deduction-guide.cpp +++ b/clang/test/SemaTemplate/deduction-guide.cpp @@ -723,3 +723,51 @@ void test() { NewDeleteAllocator abc(42); } // expected-error {{no viable constr // CHECK-NEXT: `-ParmVarDecl {{.+}} 'T' } // namespace GH128691 + +namespace GH132616_DeductionGuide { + +template struct A { + template + A(U); +}; + +template +struct B : A { + using A::A; +}; + +template +B(T) -> B; + +B b(24); + +// CHECK-LABEL: Dumping GH132616_DeductionGuide::: +// CHECK-NEXT: FunctionTemplateDecl {{.+}} implicit +// CHECK-NEXT: |-TemplateTypeParmDecl {{.+}} typename depth 0 index 0 +// CHECK-NEXT: |-TemplateTypeParmDecl {{.+}} class depth 0 index 1 U +// CHECK-NEXT: `-CXXDeductionGuideDecl {{.+}} implicit 'auto (U) -> B' +// CHECK-NEXT: `-ParmVarDecl {{.+}} 'U' + +struct C { + template + C(U); +}; + +template +struct D : C { + using C::C; +}; + +template +D(T) -> D; + +D d(24); + +// CHECK-LABEL: Dumping GH132616_DeductionGuide::: +// CHECK-NEXT: FunctionTemplateDecl {{.+}} implicit +// CHECK-NEXT: |-TemplateTypeParmDecl {{.+}} typename depth 0 index 0 +// CHECK-NEXT: |-TemplateTypeParmDecl {{.+}} class depth 0 index 1 U +// CHECK-NEXT: `-CXXDeductionGuideDecl {{.+}} implicit 'auto (U) -> D' +// CHECK-NEXT: `-ParmVarDecl {{.+}} 'U' + +} // namespace GH132616_DeductionGuide From ecde8c235e5e09ff71789725c96416f8daf93cd7 Mon Sep 17 00:00:00 2001 From: Matheus Izvekov Date: Sat, 8 Mar 2025 20:32:14 -0300 Subject: [PATCH 014/154] [clang] fix matching of nested template template parameters When checking the template template parameters of template template parameters, the PartialOrdering context was not correctly propagated. This also has a few drive-by fixes, such as checking the template parameter lists of template template parameters, which was previously missing and would have been it's own bug, but we need to fix it in order to prevent crashes in error recovery in a simple way. Fixes #130362 Backport of: https://github.com/llvm/llvm-project/pull/130447 --- clang/docs/ReleaseNotes.rst | 3 ++ clang/include/clang/Sema/Sema.h | 8 +++-- clang/lib/Sema/SemaDecl.cpp | 2 +- clang/lib/Sema/SemaDeclCXX.cpp | 2 +- clang/lib/Sema/SemaTemplate.cpp | 36 ++++++++++++------- clang/lib/Sema/SemaTemplateDeduction.cpp | 16 +++++---- .../lib/Sema/SemaTemplateInstantiateDecl.cpp | 2 +- clang/test/SemaTemplate/cwg2398.cpp | 22 ++++++------ .../SemaTemplate/temp_arg_template_p0522.cpp | 3 +- clang/unittests/AST/DeclPrinterTest.cpp | 16 ++++----- 10 files changed, 64 insertions(+), 46 deletions(-) diff --git a/clang/docs/ReleaseNotes.rst b/clang/docs/ReleaseNotes.rst index 955325026f369..c921ac3518f01 100644 --- a/clang/docs/ReleaseNotes.rst +++ b/clang/docs/ReleaseNotes.rst @@ -1058,6 +1058,9 @@ Bug Fixes to C++ Support - Fixed a substitution bug in transforming CTAD aliases when the type alias contains a non-pack template argument corresponding to a pack parameter (#GH124715) - Clang is now better at keeping track of friend function template instance contexts. (#GH55509) +- Fixes matching of nested template template parameters. (#GH130362) +- Correctly diagnoses template template paramters which have a pack parameter + not in the last position. - Fixed an integer overflow bug in computing template parameter depths when synthesizing CTAD guides. (#GH128691) - Fixed an incorrect pointer access when checking access-control on concepts. (#GH131530) - Fixed various alias CTAD bugs involving variadic template arguments. (#GH123591), (#GH127539), (#GH129077), diff --git a/clang/include/clang/Sema/Sema.h b/clang/include/clang/Sema/Sema.h index cecf5cff332f4..d8cc0171c22c6 100644 --- a/clang/include/clang/Sema/Sema.h +++ b/clang/include/clang/Sema/Sema.h @@ -11279,14 +11279,16 @@ class Sema final : public SemaBase { /// The context in which we are checking a template parameter list. enum TemplateParamListContext { - TPC_ClassTemplate, - TPC_VarTemplate, + // For this context, Class, Variable, TypeAlias, and non-pack Template + // Template Parameters are treated uniformly. + TPC_Other, + TPC_FunctionTemplate, TPC_ClassTemplateMember, TPC_FriendClassTemplate, TPC_FriendFunctionTemplate, TPC_FriendFunctionTemplateDefinition, - TPC_TypeAliasTemplate + TPC_TemplateTemplateParameterPack, }; /// Checks the validity of a template parameter list, possibly diff --git a/clang/lib/Sema/SemaDecl.cpp b/clang/lib/Sema/SemaDecl.cpp index f70401ea33b4a..41d5f9f2f3420 100644 --- a/clang/lib/Sema/SemaDecl.cpp +++ b/clang/lib/Sema/SemaDecl.cpp @@ -8145,7 +8145,7 @@ NamedDecl *Sema::ActOnVariableDeclarator( (D.getCXXScopeSpec().isSet() && DC && DC->isRecord() && DC->isDependentContext()) ? TPC_ClassTemplateMember - : TPC_VarTemplate)) + : TPC_Other)) NewVD->setInvalidDecl(); // If we are providing an explicit specialization of a static variable diff --git a/clang/lib/Sema/SemaDeclCXX.cpp b/clang/lib/Sema/SemaDeclCXX.cpp index e4e3bbad1f520..85de46c9adab4 100644 --- a/clang/lib/Sema/SemaDeclCXX.cpp +++ b/clang/lib/Sema/SemaDeclCXX.cpp @@ -13533,7 +13533,7 @@ Decl *Sema::ActOnAliasDeclaration(Scope *S, AccessSpecifier AS, // Merge any previous default template arguments into our parameters, // and check the parameter list. if (CheckTemplateParameterList(TemplateParams, OldTemplateParams, - TPC_TypeAliasTemplate)) + TPC_Other)) return nullptr; TypeAliasTemplateDecl *NewDecl = diff --git a/clang/lib/Sema/SemaTemplate.cpp b/clang/lib/Sema/SemaTemplate.cpp index 938671055333c..1c555b38277b0 100644 --- a/clang/lib/Sema/SemaTemplate.cpp +++ b/clang/lib/Sema/SemaTemplate.cpp @@ -1591,8 +1591,16 @@ NamedDecl *Sema::ActOnTemplateTemplateParameter( assert(S->isTemplateParamScope() && "Template template parameter not in template parameter scope!"); - // Construct the parameter object. bool IsParameterPack = EllipsisLoc.isValid(); + + bool Invalid = false; + if (CheckTemplateParameterList( + Params, + /*OldParams=*/nullptr, + IsParameterPack ? TPC_TemplateTemplateParameterPack : TPC_Other)) + Invalid = true; + + // Construct the parameter object. TemplateTemplateParmDecl *Param = TemplateTemplateParmDecl::Create( Context, Context.getTranslationUnitDecl(), NameLoc.isInvalid() ? TmpLoc : NameLoc, Depth, Position, IsParameterPack, @@ -1615,9 +1623,12 @@ NamedDecl *Sema::ActOnTemplateTemplateParameter( if (Params->size() == 0) { Diag(Param->getLocation(), diag::err_template_template_parm_no_parms) << SourceRange(Params->getLAngleLoc(), Params->getRAngleLoc()); - Param->setInvalidDecl(); + Invalid = true; } + if (Invalid) + Param->setInvalidDecl(); + // C++0x [temp.param]p9: // A default template-argument may be specified for any kind of // template-parameter that is not a template parameter pack. @@ -2066,7 +2077,7 @@ DeclResult Sema::CheckClassTemplate( SemanticContext->isDependentContext()) ? TPC_ClassTemplateMember : TUK == TagUseKind::Friend ? TPC_FriendClassTemplate - : TPC_ClassTemplate, + : TPC_Other, SkipBody)) Invalid = true; @@ -2208,9 +2219,8 @@ static bool DiagnoseDefaultTemplateArgument(Sema &S, SourceLocation ParamLoc, SourceRange DefArgRange) { switch (TPC) { - case Sema::TPC_ClassTemplate: - case Sema::TPC_VarTemplate: - case Sema::TPC_TypeAliasTemplate: + case Sema::TPC_Other: + case Sema::TPC_TemplateTemplateParameterPack: return false; case Sema::TPC_FunctionTemplate: @@ -2383,8 +2393,11 @@ bool Sema::CheckTemplateParameterList(TemplateParameterList *NewParams, MissingDefaultArg = true; } else if (NonTypeTemplateParmDecl *NewNonTypeParm = dyn_cast(*NewParam)) { - // Check for unexpanded parameter packs. - if (!NewNonTypeParm->isParameterPack() && + // Check for unexpanded parameter packs, except in a template template + // parameter pack, as in those any unexpanded packs should be expanded + // along with the parameter itself. + if (TPC != TPC_TemplateTemplateParameterPack && + !NewNonTypeParm->isParameterPack() && DiagnoseUnexpandedParameterPack(NewNonTypeParm->getLocation(), NewNonTypeParm->getTypeSourceInfo(), UPPC_NonTypeTemplateParameterType)) { @@ -2492,8 +2505,7 @@ bool Sema::CheckTemplateParameterList(TemplateParameterList *NewParams, // If a template parameter of a primary class template or alias template // is a template parameter pack, it shall be the last template parameter. if (SawParameterPack && (NewParam + 1) != NewParamEnd && - (TPC == TPC_ClassTemplate || TPC == TPC_VarTemplate || - TPC == TPC_TypeAliasTemplate)) { + (TPC == TPC_Other || TPC == TPC_TemplateTemplateParameterPack)) { Diag((*NewParam)->getLocation(), diag::err_template_param_pack_must_be_last_template_parameter); Invalid = true; @@ -2526,8 +2538,8 @@ bool Sema::CheckTemplateParameterList(TemplateParameterList *NewParams, << PrevModuleName; Invalid = true; } else if (MissingDefaultArg && - (TPC == TPC_ClassTemplate || TPC == TPC_FriendClassTemplate || - TPC == TPC_VarTemplate || TPC == TPC_TypeAliasTemplate)) { + (TPC == TPC_Other || TPC == TPC_TemplateTemplateParameterPack || + TPC == TPC_FriendClassTemplate)) { // C++ 23[temp.param]p14: // If a template-parameter of a class template, variable template, or // alias template has a default template argument, each subsequent diff --git a/clang/lib/Sema/SemaTemplateDeduction.cpp b/clang/lib/Sema/SemaTemplateDeduction.cpp index 5304b5a2155b4..7a880505a53ff 100644 --- a/clang/lib/Sema/SemaTemplateDeduction.cpp +++ b/clang/lib/Sema/SemaTemplateDeduction.cpp @@ -3427,9 +3427,9 @@ static TemplateDeductionResult FinishTemplateArgumentDeduction( if (!P.isPackExpansion() && !A.isPackExpansion()) { Info.Param = makeTemplateParameter(Template->getTemplateParameters()->getParam( - (PsStack.empty() ? TemplateArgs.end() - : PsStack.front().begin()) - - TemplateArgs.begin())); + (AsStack.empty() ? CTAI.CanonicalConverted.end() + : AsStack.front().begin()) - + 1 - CTAI.CanonicalConverted.begin())); Info.FirstArg = P; Info.SecondArg = A; return TemplateDeductionResult::NonDeducedMismatch; @@ -6625,17 +6625,19 @@ bool Sema::isTemplateTemplateParameterAtLeastAsSpecializedAs( TemplateDeductionResult TDK; runWithSufficientStackSpace(Info.getLocation(), [&] { - TDK = ::FinishTemplateArgumentDeduction( - *this, AArg, /*IsPartialOrdering=*/true, PArgs, Deduced, Info); + TDK = ::FinishTemplateArgumentDeduction(*this, AArg, PartialOrdering, PArgs, + Deduced, Info); }); switch (TDK) { case TemplateDeductionResult::Success: return true; // It doesn't seem possible to get a non-deduced mismatch when partial - // ordering TTPs. + // ordering TTPs, except with an invalid template parameter list which has + // a parameter after a pack. case TemplateDeductionResult::NonDeducedMismatch: - llvm_unreachable("Unexpected NonDeducedMismatch"); + assert(PArg->isInvalidDecl() && "Unexpected NonDeducedMismatch"); + return false; // Substitution failures should have already been diagnosed. case TemplateDeductionResult::AlreadyDiagnosed: diff --git a/clang/lib/Sema/SemaTemplateInstantiateDecl.cpp b/clang/lib/Sema/SemaTemplateInstantiateDecl.cpp index 89ad2a0a9b7bb..0c25b87439a95 100644 --- a/clang/lib/Sema/SemaTemplateInstantiateDecl.cpp +++ b/clang/lib/Sema/SemaTemplateInstantiateDecl.cpp @@ -1827,7 +1827,7 @@ Decl *TemplateDeclInstantiator::VisitClassTemplateDecl(ClassTemplateDecl *D) { // Do some additional validation, then merge default arguments // from the existing declarations. if (SemaRef.CheckTemplateParameterList(InstParams, PrevParams, - Sema::TPC_ClassTemplate)) + Sema::TPC_Other)) return nullptr; Inst->setAccess(PrevClassTemplate->getAccess()); diff --git a/clang/test/SemaTemplate/cwg2398.cpp b/clang/test/SemaTemplate/cwg2398.cpp index 8592be469bb50..33b288acce82a 100644 --- a/clang/test/SemaTemplate/cwg2398.cpp +++ b/clang/test/SemaTemplate/cwg2398.cpp @@ -650,6 +650,11 @@ namespace regression3 { template struct A>; // old-error@-1 {{different template}} } // namespace regression3 +namespace GH130362 { + template