Skip to content

[clang][x86] Add initial constexpr support for VPOPCNTDQ intrinsics #118017

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 1 commit into from
Nov 28, 2024

Conversation

RKSimon
Copy link
Collaborator

@RKSimon RKSimon commented Nov 28, 2024

Still working on how to make __builtin_ia32_select* intrinsic wrappers constexpr

Still working on how to make __builtin_ia32_select* intrinsic wrappers constexpr
@llvmbot llvmbot added clang Clang issues not falling into any other category backend:X86 clang:headers Headers provided by Clang, e.g. for intrinsics labels Nov 28, 2024
@llvmbot
Copy link
Member

llvmbot commented Nov 28, 2024

@llvm/pr-subscribers-clang

@llvm/pr-subscribers-backend-x86

Author: Simon Pilgrim (RKSimon)

Changes

Still working on how to make __builtin_ia32_select* intrinsic wrappers constexpr


Full diff: https://github.com/llvm/llvm-project/pull/118017.diff

5 Files Affected:

  • (modified) clang/lib/Headers/avx512vpopcntdqintrin.h (+10-2)
  • (modified) clang/lib/Headers/avx512vpopcntdqvlintrin.h (+12-4)
  • (modified) clang/test/CodeGen/X86/avx512vpopcntdq-builtins.c (+8)
  • (modified) clang/test/CodeGen/X86/avx512vpopcntdqvl-builtins.c (+15)
  • (modified) clang/test/CodeGen/X86/builtin_test_helpers.h (+20)
diff --git a/clang/lib/Headers/avx512vpopcntdqintrin.h b/clang/lib/Headers/avx512vpopcntdqintrin.h
index 7a77cbc2421b30..e24c2c5e1bcd9f 100644
--- a/clang/lib/Headers/avx512vpopcntdqintrin.h
+++ b/clang/lib/Headers/avx512vpopcntdqintrin.h
@@ -21,7 +21,14 @@
                  __target__("avx512vpopcntdq,evex512"),                        \
                  __min_vector_width__(512)))
 
-static __inline__ __m512i __DEFAULT_FN_ATTRS _mm512_popcnt_epi64(__m512i __A) {
+#if defined(__cplusplus) && (__cplusplus >= 201103L)
+#define __DEFAULT_FN_ATTRS_CONSTEXPR __DEFAULT_FN_ATTRS constexpr
+#else
+#define __DEFAULT_FN_ATTRS_CONSTEXPR __DEFAULT_FN_ATTRS
+#endif
+
+static __inline__ __m512i __DEFAULT_FN_ATTRS_CONSTEXPR
+_mm512_popcnt_epi64(__m512i __A) {
   return (__m512i)__builtin_elementwise_popcount((__v8du)__A);
 }
 
@@ -36,7 +43,8 @@ _mm512_maskz_popcnt_epi64(__mmask8 __U, __m512i __A) {
   return _mm512_mask_popcnt_epi64((__m512i)_mm512_setzero_si512(), __U, __A);
 }
 
-static __inline__ __m512i __DEFAULT_FN_ATTRS _mm512_popcnt_epi32(__m512i __A) {
+static __inline__ __m512i __DEFAULT_FN_ATTRS_CONSTEXPR
+_mm512_popcnt_epi32(__m512i __A) {
   return (__m512i)__builtin_elementwise_popcount((__v16su)__A);
 }
 
diff --git a/clang/lib/Headers/avx512vpopcntdqvlintrin.h b/clang/lib/Headers/avx512vpopcntdqvlintrin.h
index b049a57a4e2a59..b6c819b0cb85e2 100644
--- a/clang/lib/Headers/avx512vpopcntdqvlintrin.h
+++ b/clang/lib/Headers/avx512vpopcntdqvlintrin.h
@@ -25,7 +25,15 @@
                  __target__("avx512vpopcntdq,avx512vl,no-evex512"),            \
                  __min_vector_width__(256)))
 
-static __inline__ __m128i __DEFAULT_FN_ATTRS128
+#if defined(__cplusplus) && (__cplusplus >= 201103L)
+#define __DEFAULT_FN_ATTRS128_CONSTEXPR __DEFAULT_FN_ATTRS128 constexpr
+#define __DEFAULT_FN_ATTRS256_CONSTEXPR __DEFAULT_FN_ATTRS256 constexpr
+#else
+#define __DEFAULT_FN_ATTRS128_CONSTEXPR __DEFAULT_FN_ATTRS128
+#define __DEFAULT_FN_ATTRS256_CONSTEXPR __DEFAULT_FN_ATTRS256
+#endif
+
+static __inline__ __m128i __DEFAULT_FN_ATTRS128_CONSTEXPR
 _mm_popcnt_epi64(__m128i __A) {
   return (__m128i)__builtin_elementwise_popcount((__v2du)__A);
 }
@@ -41,7 +49,7 @@ _mm_maskz_popcnt_epi64(__mmask8 __U, __m128i __A) {
   return _mm_mask_popcnt_epi64((__m128i)_mm_setzero_si128(), __U, __A);
 }
 
-static __inline__ __m128i __DEFAULT_FN_ATTRS128
+static __inline__ __m128i __DEFAULT_FN_ATTRS128_CONSTEXPR
 _mm_popcnt_epi32(__m128i __A) {
   return (__m128i)__builtin_elementwise_popcount((__v4su)__A);
 }
@@ -57,7 +65,7 @@ _mm_maskz_popcnt_epi32(__mmask8 __U, __m128i __A) {
   return _mm_mask_popcnt_epi32((__m128i)_mm_setzero_si128(), __U, __A);
 }
 
-static __inline__ __m256i __DEFAULT_FN_ATTRS256
+static __inline__ __m256i __DEFAULT_FN_ATTRS256_CONSTEXPR
 _mm256_popcnt_epi64(__m256i __A) {
   return (__m256i)__builtin_elementwise_popcount((__v4du)__A);
 }
@@ -73,7 +81,7 @@ _mm256_maskz_popcnt_epi64(__mmask8 __U, __m256i __A) {
   return _mm256_mask_popcnt_epi64((__m256i)_mm256_setzero_si256(), __U, __A);
 }
 
-static __inline__ __m256i __DEFAULT_FN_ATTRS256
+static __inline__ __m256i __DEFAULT_FN_ATTRS256_CONSTEXPR
 _mm256_popcnt_epi32(__m256i __A) {
   return (__m256i)__builtin_elementwise_popcount((__v8su)__A);
 }
diff --git a/clang/test/CodeGen/X86/avx512vpopcntdq-builtins.c b/clang/test/CodeGen/X86/avx512vpopcntdq-builtins.c
index 78e73947b92c46..ca8f5e482cc561 100644
--- a/clang/test/CodeGen/X86/avx512vpopcntdq-builtins.c
+++ b/clang/test/CodeGen/X86/avx512vpopcntdq-builtins.c
@@ -1,35 +1,43 @@
 // RUN: %clang_cc1 -flax-vector-conversions=none -ffreestanding %s -triple=x86_64-apple-darwin -target-feature +avx512vpopcntdq -emit-llvm -o - -Wall -Werror | FileCheck %s
 
 #include <immintrin.h>
+#include "builtin_test_helpers.h"
 
 __m512i test_mm512_popcnt_epi64(__m512i __A) {
   // CHECK-LABEL: @test_mm512_popcnt_epi64
   // CHECK: @llvm.ctpop.v8i64
   return _mm512_popcnt_epi64(__A);
 }
+TEST_CONSTEXPR(match_v8di(_mm512_popcnt_epi64((__m512i)(__v8di){+5, -3, -10, +8, 0, -256, +256, -128}), 2, 31, 30, 1, 0, 24, 1, 25));
+
 __m512i test_mm512_mask_popcnt_epi64(__m512i __W, __mmask8 __U, __m512i __A) {
   // CHECK-LABEL: @test_mm512_mask_popcnt_epi64
   // CHECK: @llvm.ctpop.v8i64
   // CHECK: select <8 x i1> %{{[0-9]+}}, <8 x i64> %{{.*}}, <8 x i64> %{{.*}}
   return _mm512_mask_popcnt_epi64(__W, __U, __A);
 }
+
 __m512i test_mm512_maskz_popcnt_epi64(__mmask8 __U, __m512i __A) {
   // CHECK-LABEL: @test_mm512_maskz_popcnt_epi64
   // CHECK: @llvm.ctpop.v8i64
   // CHECK: select <8 x i1> %{{[0-9]+}}, <8 x i64> %{{.*}}, <8 x i64> %{{.*}}
   return _mm512_maskz_popcnt_epi64(__U, __A);
 }
+
 __m512i test_mm512_popcnt_epi32(__m512i __A) {
   // CHECK-LABEL: @test_mm512_popcnt_epi32
   // CHECK: @llvm.ctpop.v16i32
   return _mm512_popcnt_epi32(__A);
 }
+TEST_CONSTEXPR(match_v16si(_mm512_popcnt_epi32((__m512i)(__v16si){+5, -3, -10, +8, 0, -256, +256, -128, +3, +9, +15, +33, +63, +129, +511, +1025}), 2, 31, 30, 1, 0, 24, 1, 25, 2, 2, 4, 2, 6, 2, 9, 2));
+
 __m512i test_mm512_mask_popcnt_epi32(__m512i __W, __mmask16 __U, __m512i __A) {
   // CHECK-LABEL: @test_mm512_mask_popcnt_epi32
   // CHECK: @llvm.ctpop.v16i32
   // CHECK: select <16 x i1> %{{[0-9]+}}, <16 x i32> %{{.*}}, <16 x i32> %{{.*}}
   return _mm512_mask_popcnt_epi32(__W, __U, __A);
 }
+
 __m512i test_mm512_maskz_popcnt_epi32(__mmask16 __U, __m512i __A) {
   // CHECK-LABEL: @test_mm512_maskz_popcnt_epi32
   // CHECK: @llvm.ctpop.v16i32
diff --git a/clang/test/CodeGen/X86/avx512vpopcntdqvl-builtins.c b/clang/test/CodeGen/X86/avx512vpopcntdqvl-builtins.c
index 8865c028038ae7..5d18b68e469057 100644
--- a/clang/test/CodeGen/X86/avx512vpopcntdqvl-builtins.c
+++ b/clang/test/CodeGen/X86/avx512vpopcntdqvl-builtins.c
@@ -1,35 +1,43 @@
 // RUN: %clang_cc1 -flax-vector-conversions=none -ffreestanding %s -triple=x86_64-apple-darwin -target-feature +avx512vpopcntdq -target-feature +avx512vl -emit-llvm -o - -Wall -Werror | FileCheck %s
 
 #include <immintrin.h>
+#include "builtin_test_helpers.h"
 
 __m128i test_mm_popcnt_epi64(__m128i __A) {
   // CHECK-LABEL: @test_mm_popcnt_epi64
   // CHECK: @llvm.ctpop.v2i64
   return _mm_popcnt_epi64(__A);
 }
+TEST_CONSTEXPR(match_v2di(_mm_popcnt_epi64((__m128i)(__v2di){+5, -3}), 2, 63));
+
 __m128i test_mm_mask_popcnt_epi64(__m128i __W, __mmask8 __U, __m128i __A) {
   // CHECK-LABEL: @test_mm_mask_popcnt_epi64
   // CHECK: @llvm.ctpop.v2i64
   // CHECK: select <2 x i1> %{{.+}}, <2 x i64> %{{.*}}, <2 x i64> %{{.*}}
   return _mm_mask_popcnt_epi64(__W, __U, __A);
 }
+
 __m128i test_mm_maskz_popcnt_epi64(__mmask8 __U, __m128i __A) {
   // CHECK-LABEL: @test_mm_maskz_popcnt_epi64
   // CHECK: @llvm.ctpop.v2i64
   // CHECK: select <2 x i1> %{{.+}}, <2 x i64> %{{.*}}, <2 x i64> %{{.*}}
   return _mm_maskz_popcnt_epi64(__U, __A);
 }
+
 __m128i test_mm_popcnt_epi32(__m128i __A) {
   // CHECK-LABEL: @test_mm_popcnt_epi32
   // CHECK: @llvm.ctpop.v4i32
   return _mm_popcnt_epi32(__A);
 }
+TEST_CONSTEXPR(match_v4si(_mm_popcnt_epi32((__m128i)(__v4si){+5, -3, -10, +8}), 2, 31, 30, 1));
+
 __m128i test_mm_mask_popcnt_epi32(__m128i __W, __mmask8 __U, __m128i __A) {
   // CHECK-LABEL: @test_mm_mask_popcnt_epi32
   // CHECK: @llvm.ctpop.v4i32
   // CHECK: select <4 x i1> %{{.+}}, <4 x i32> %{{.*}}, <4 x i32> %{{.*}}
   return _mm_mask_popcnt_epi32(__W, __U, __A);
 }
+
 __m128i test_mm_maskz_popcnt_epi32(__mmask8 __U, __m128i __A) {
   // CHECK-LABEL: @test_mm_maskz_popcnt_epi32
   // CHECK: @llvm.ctpop.v4i32
@@ -42,29 +50,36 @@ __m256i test_mm256_popcnt_epi64(__m256i __A) {
   // CHECK: @llvm.ctpop.v4i64
   return _mm256_popcnt_epi64(__A);
 }
+TEST_CONSTEXPR(match_v4di(_mm256_popcnt_epi64((__m256i)(__v4di){+5, -3, -10, +8}), 2, 63, 62, 1));
+
 __m256i test_mm256_mask_popcnt_epi64(__m256i __W, __mmask8 __U, __m256i __A) {
   // CHECK-LABEL: @test_mm256_mask_popcnt_epi64
   // CHECK: @llvm.ctpop.v4i64
   // CHECK: select <4 x i1> %{{.+}}, <4 x i64> %{{.*}}, <4 x i64> %{{.*}}
   return _mm256_mask_popcnt_epi64(__W, __U, __A);
 }
+
 __m256i test_mm256_maskz_popcnt_epi64(__mmask8 __U, __m256i __A) {
   // CHECK-LABEL: @test_mm256_maskz_popcnt_epi64
   // CHECK: @llvm.ctpop.v4i64
   // CHECK: select <4 x i1> %{{.+}}, <4 x i64> %{{.*}}, <4 x i64> %{{.*}}
   return _mm256_maskz_popcnt_epi64(__U, __A);
 }
+
 __m256i test_mm256_popcnt_epi32(__m256i __A) {
   // CHECK-LABEL: @test_mm256_popcnt_epi32
   // CHECK: @llvm.ctpop.v8i32
   return _mm256_popcnt_epi32(__A);
 }
+TEST_CONSTEXPR(match_v8si(_mm256_popcnt_epi32((__m256i)(__v8si){+5, -3, -10, +8, 0, -256, +256, -128}), 2, 31, 30, 1, 0, 24, 1, 25));
+
 __m256i test_mm256_mask_popcnt_epi32(__m256i __W, __mmask8 __U, __m256i __A) {
   // CHECK-LABEL: @test_mm256_mask_popcnt_epi32
   // CHECK: @llvm.ctpop.v8i32
   // CHECK: select <8 x i1> %{{.+}}, <8 x i32> %{{.*}}, <8 x i32> %{{.*}}
   return _mm256_mask_popcnt_epi32(__W, __U, __A);
 }
+
 __m256i test_mm256_maskz_popcnt_epi32(__mmask8 __U, __m256i __A) {
   // CHECK-LABEL: @test_mm256_maskz_popcnt_epi32
   // CHECK: @llvm.ctpop.v8i32
diff --git a/clang/test/CodeGen/X86/builtin_test_helpers.h b/clang/test/CodeGen/X86/builtin_test_helpers.h
index c3307f749942b1..22a87ce9623be8 100644
--- a/clang/test/CodeGen/X86/builtin_test_helpers.h
+++ b/clang/test/CodeGen/X86/builtin_test_helpers.h
@@ -73,6 +73,16 @@ constexpr bool match_m256i(__m256i _v, unsigned long long a, unsigned long long
   return v[0] == a && v[1] == b && v[2] == c && v[3] == d;
 }
 
+constexpr bool match_v4di(__m256i _v, long long a, long long b, long long c, long long d) {
+  __v4di v = (__v4di)_v;
+  return v[0] == a && v[1] == b && v[2] == c && v[3] == d;
+}
+
+constexpr bool match_v8si(__m256i _v, int a, int b, int c, int d, int e, int f, int g, int h) {
+  __v8si v = (__v8si)_v;
+  return v[0] == a && v[1] == b && v[2] == c && v[3] == d && v[4] == e && v[5] == f && v[6] == g && v[7] == h;
+}
+
 constexpr bool match_m512(__m512 v, float a, float b, float c, float d, float e, float f, float g, float h, float i, float j, float k, float l, float m, float n, float o, float p) {
   return v[0] == a && v[1] == b && v[2] == c && v[3] == d && v[4] == e && v[5] == f && v[6] == g && v[7] == h && v[8] == i && v[9] == j && v[10] == k && v[11] == l && v[12] == m && v[13] == n && v[14] == o && v[15] == p;
 }
@@ -86,6 +96,16 @@ constexpr bool match_m512i(__m512i _v, unsigned long long a, unsigned long long
   return v[0] == a && v[1] == b && v[2] == c && v[3] == d && v[4] == e && v[5] == f && v[6] == g && v[7] == h;
 }
 
+constexpr bool match_v8di(__m512i _v, long long a, long long b, long long c, long long d, long long e, long long f, long long g, long long h) {
+  __v8di v = (__v8di)_v;
+  return v[0] == a && v[1] == b && v[2] == c && v[3] == d && v[4] == e && v[5] == f && v[6] == g && v[7] == h;
+}
+
+constexpr bool match_v16si(__m512i _v, int a, int b, int c, int d, int e, int f, int g, int h, int i, int j, int k, int l, int m, int n, int o, int p) {
+  __v16si v = (__v16si)_v;
+  return v[0] == a && v[1] == b && v[2] == c && v[3] == d && v[4] == e && v[5] == f && v[6] == g && v[7] == h && v[8] == i && v[9] == j && v[10] == k && v[11] == l && v[12] == m && v[13] == n && v[14] == o && v[15] == p;
+}
+
 #define TEST_CONSTEXPR(...) static_assert(__VA_ARGS__)
 
 #else

@RKSimon RKSimon merged commit 335cfb3 into llvm:main Nov 28, 2024
12 checks passed
@RKSimon RKSimon deleted the x86-vpopcntdq-constexpr branch November 28, 2024 17:53
@llvm-ci
Copy link
Collaborator

llvm-ci commented Nov 28, 2024

LLVM Buildbot has detected a new failure on builder libc-x86_64-debian-fullbuild-dbg-asan running on libc-x86_64-debian-fullbuild while building clang at step 4 "annotate".

Full details are available at: https://lab.llvm.org/buildbot/#/builders/171/builds/11280

Here is the relevant piece of the build log for the reference
Step 4 (annotate) failure: 'python ../llvm-zorg/zorg/buildbot/builders/annotated/libc-linux.py ...' (failure)
...
[==========] Running 4 tests from 1 test suite.
[ RUN      ] LlvmLibcHashTest.SanityCheck
[       OK ] LlvmLibcHashTest.SanityCheck (17 ms)
[ RUN      ] LlvmLibcHashTest.Avalanche
[       OK ] LlvmLibcHashTest.Avalanche (2067 ms)
[ RUN      ] LlvmLibcHashTest.UniformLSB
[       OK ] LlvmLibcHashTest.UniformLSB (203 ms)
[ RUN      ] LlvmLibcHashTest.UniformMSB
[       OK ] LlvmLibcHashTest.UniformMSB (138 us)
Ran 4 tests.  PASS: 4  FAIL: 0
command timed out: 1200 seconds without output running [b'python', b'../llvm-zorg/zorg/buildbot/builders/annotated/libc-linux.py', b'--debug', b'--asan'], attempting to kill
process killed by signal 9
program finished with exit code -1
elapsedTime=1262.162928
Step 8 (libc-unit-tests) failure: libc-unit-tests (failure)
...
[ RUN      ] LlvmLibcStrtoint64Test.InvalidBase
[       OK ] LlvmLibcStrtoint64Test.InvalidBase (16 us)
[ RUN      ] LlvmLibcStrtoint64Test.CleanBaseTenDecode
[       OK ] LlvmLibcStrtoint64Test.CleanBaseTenDecode (14 us)
[ RUN      ] LlvmLibcStrtoint64Test.MessyBaseTenDecode
[       OK ] LlvmLibcStrtoint64Test.MessyBaseTenDecode (10 us)
[ RUN      ] LlvmLibcStrtoint64Test.DecodeInOtherBases
[       OK ] LlvmLibcStrtoint64Test.DecodeInOtherBases (413 ms)
[ RUN      ] LlvmLibcStrtoint64Test.CleanBaseSixteenDecode
[       OK ] LlvmLibcStrtoint64Test.CleanBaseSixteenDecode (7 us)
[ RUN      ] LlvmLibcStrtoint64Test.MessyBaseSixteenDecode
[       OK ] LlvmLibcStrtoint64Test.MessyBaseSixteenDecode (3 us)
[ RUN      ] LlvmLibcStrtoint64Test.AutomaticBaseSelection
[       OK ] LlvmLibcStrtoint64Test.AutomaticBaseSelection (5 us)
[ RUN      ] LlvmLibcStrtouint64Test.InvalidBase
[       OK ] LlvmLibcStrtouint64Test.InvalidBase (12 us)
[ RUN      ] LlvmLibcStrtouint64Test.CleanBaseTenDecode
[       OK ] LlvmLibcStrtouint64Test.CleanBaseTenDecode (7 us)
[ RUN      ] LlvmLibcStrtouint64Test.MessyBaseTenDecode
[       OK ] LlvmLibcStrtouint64Test.MessyBaseTenDecode (6 us)
[ RUN      ] LlvmLibcStrtouint64Test.DecodeInOtherBases
[       OK ] LlvmLibcStrtouint64Test.DecodeInOtherBases (233 ms)
[ RUN      ] LlvmLibcStrtouint64Test.CleanBaseSixteenDecode
[       OK ] LlvmLibcStrtouint64Test.CleanBaseSixteenDecode (8 us)
[ RUN      ] LlvmLibcStrtouint64Test.MessyBaseSixteenDecode
[       OK ] LlvmLibcStrtouint64Test.MessyBaseSixteenDecode (4 us)
[ RUN      ] LlvmLibcStrtouint64Test.AutomaticBaseSelection
[       OK ] LlvmLibcStrtouint64Test.AutomaticBaseSelection (4 us)
Ran 14 tests.  PASS: 14  FAIL: 0
[1096/1098] Running unit test libc.test.src.time.nanosleep_test.__unit__
[==========] Running 1 test from 1 test suite.
[ RUN      ] LlvmLibcNanosleep.SmokeTest
[       OK ] LlvmLibcNanosleep.SmokeTest (104 us)
Ran 1 tests.  PASS: 1  FAIL: 0
[1097/1098] Running unit test libc.test.src.__support.hash_test.__unit__
[==========] Running 4 tests from 1 test suite.
[ RUN      ] LlvmLibcHashTest.SanityCheck
[       OK ] LlvmLibcHashTest.SanityCheck (17 ms)
[ RUN      ] LlvmLibcHashTest.Avalanche
[       OK ] LlvmLibcHashTest.Avalanche (2067 ms)
[ RUN      ] LlvmLibcHashTest.UniformLSB
[       OK ] LlvmLibcHashTest.UniformLSB (203 ms)
[ RUN      ] LlvmLibcHashTest.UniformMSB
[       OK ] LlvmLibcHashTest.UniformMSB (138 us)
Ran 4 tests.  PASS: 4  FAIL: 0

command timed out: 1200 seconds without output running [b'python', b'../llvm-zorg/zorg/buildbot/builders/annotated/libc-linux.py', b'--debug', b'--asan'], attempting to kill
process killed by signal 9
program finished with exit code -1
elapsedTime=1262.162928

Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment
Labels
backend:X86 clang:headers Headers provided by Clang, e.g. for intrinsics clang Clang issues not falling into any other category
Projects
None yet
Development

Successfully merging this pull request may close these issues.

3 participants