Skip to content

Commit e9b60db

Browse files
spalltmsri
authored andcommitted
[HLSL] Implement elementwise popcount (llvm#108121)
Add new elementwise popcount builtin to support HLSL function 'countbits'. elementwise popcount only accepts integer types. Add hlsl intrinsic 'countbits' Closes llvm#99094
1 parent 97b05d2 commit e9b60db

File tree

15 files changed

+369
-1
lines changed

15 files changed

+369
-1
lines changed

clang/docs/LanguageExtensions.rst

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -667,6 +667,7 @@ Unless specified otherwise operation(±0) = ±0 and operation(±infinity) = ±in
667667
T __builtin_elementwise_log(T x) return the natural logarithm of x floating point types
668668
T __builtin_elementwise_log2(T x) return the base 2 logarithm of x floating point types
669669
T __builtin_elementwise_log10(T x) return the base 10 logarithm of x floating point types
670+
T __builtin_elementwise_popcount(T x) return the number of 1 bits in x integer types
670671
T __builtin_elementwise_pow(T x, T y) return x raised to the power of y floating point types
671672
T __builtin_elementwise_bitreverse(T x) return the integer represented after reversing the bits of x integer types
672673
T __builtin_elementwise_exp(T x) returns the base-e exponential, e^x, of the specified value floating point types

clang/docs/ReleaseNotes.rst

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -116,6 +116,7 @@ C++ Language Changes
116116

117117
- Accept C++26 user-defined ``static_assert`` messages in C++11 as an extension.
118118

119+
- Add ``__builtin_elementwise_popcount`` builtin for integer types only.
119120

120121
C++2c Feature Support
121122
^^^^^^^^^^^^^^^^^^^^^

clang/include/clang/Basic/Builtins.td

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1322,6 +1322,12 @@ def ElementwiseLog10 : Builtin {
13221322
let Prototype = "void(...)";
13231323
}
13241324

1325+
def ElementwisePopcount : Builtin {
1326+
let Spellings = ["__builtin_elementwise_popcount"];
1327+
let Attributes = [NoThrow, Const, CustomTypeChecking];
1328+
let Prototype = "void(...)";
1329+
}
1330+
13251331
def ElementwisePow : Builtin {
13261332
let Spellings = ["__builtin_elementwise_pow"];
13271333
let Attributes = [NoThrow, Const, CustomTypeChecking];

clang/lib/CodeGen/CGBuiltin.cpp

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -3834,6 +3834,9 @@ RValue CodeGenFunction::EmitBuiltinExpr(const GlobalDecl GD, unsigned BuiltinID,
38343834
case Builtin::BI__builtin_elementwise_floor:
38353835
return RValue::get(emitBuiltinWithOneOverloadedType<1>(
38363836
*this, E, llvm::Intrinsic::floor, "elt.floor"));
3837+
case Builtin::BI__builtin_elementwise_popcount:
3838+
return RValue::get(emitBuiltinWithOneOverloadedType<1>(
3839+
*this, E, llvm::Intrinsic::ctpop, "elt.ctpop"));
38373840
case Builtin::BI__builtin_elementwise_roundeven:
38383841
return RValue::get(emitBuiltinWithOneOverloadedType<1>(
38393842
*this, E, llvm::Intrinsic::roundeven, "elt.roundeven"));

clang/lib/Headers/hlsl/hlsl_intrinsics.h

Lines changed: 71 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -650,6 +650,77 @@ float3 cosh(float3);
650650
_HLSL_BUILTIN_ALIAS(__builtin_elementwise_cosh)
651651
float4 cosh(float4);
652652

653+
//===----------------------------------------------------------------------===//
654+
// count bits builtins
655+
//===----------------------------------------------------------------------===//
656+
657+
/// \fn T countbits(T Val)
658+
/// \brief Return the number of bits (per component) set in the input integer.
659+
/// \param Val The input value.
660+
661+
#ifdef __HLSL_ENABLE_16_BIT
662+
_HLSL_AVAILABILITY(shadermodel, 6.2)
663+
_HLSL_BUILTIN_ALIAS(__builtin_elementwise_popcount)
664+
int16_t countbits(int16_t);
665+
_HLSL_AVAILABILITY(shadermodel, 6.2)
666+
_HLSL_BUILTIN_ALIAS(__builtin_elementwise_popcount)
667+
int16_t2 countbits(int16_t2);
668+
_HLSL_AVAILABILITY(shadermodel, 6.2)
669+
_HLSL_BUILTIN_ALIAS(__builtin_elementwise_popcount)
670+
int16_t3 countbits(int16_t3);
671+
_HLSL_AVAILABILITY(shadermodel, 6.2)
672+
_HLSL_BUILTIN_ALIAS(__builtin_elementwise_popcount)
673+
int16_t4 countbits(int16_t4);
674+
_HLSL_AVAILABILITY(shadermodel, 6.2)
675+
_HLSL_BUILTIN_ALIAS(__builtin_elementwise_popcount)
676+
uint16_t countbits(uint16_t);
677+
_HLSL_AVAILABILITY(shadermodel, 6.2)
678+
_HLSL_BUILTIN_ALIAS(__builtin_elementwise_popcount)
679+
uint16_t2 countbits(uint16_t2);
680+
_HLSL_AVAILABILITY(shadermodel, 6.2)
681+
_HLSL_BUILTIN_ALIAS(__builtin_elementwise_popcount)
682+
uint16_t3 countbits(uint16_t3);
683+
_HLSL_AVAILABILITY(shadermodel, 6.2)
684+
_HLSL_BUILTIN_ALIAS(__builtin_elementwise_popcount)
685+
uint16_t4 countbits(uint16_t4);
686+
#endif
687+
688+
_HLSL_BUILTIN_ALIAS(__builtin_elementwise_popcount)
689+
int countbits(int);
690+
_HLSL_BUILTIN_ALIAS(__builtin_elementwise_popcount)
691+
int2 countbits(int2);
692+
_HLSL_BUILTIN_ALIAS(__builtin_elementwise_popcount)
693+
int3 countbits(int3);
694+
_HLSL_BUILTIN_ALIAS(__builtin_elementwise_popcount)
695+
int4 countbits(int4);
696+
697+
_HLSL_BUILTIN_ALIAS(__builtin_elementwise_popcount)
698+
uint countbits(uint);
699+
_HLSL_BUILTIN_ALIAS(__builtin_elementwise_popcount)
700+
uint2 countbits(uint2);
701+
_HLSL_BUILTIN_ALIAS(__builtin_elementwise_popcount)
702+
uint3 countbits(uint3);
703+
_HLSL_BUILTIN_ALIAS(__builtin_elementwise_popcount)
704+
uint4 countbits(uint4);
705+
706+
_HLSL_BUILTIN_ALIAS(__builtin_elementwise_popcount)
707+
int64_t countbits(int64_t);
708+
_HLSL_BUILTIN_ALIAS(__builtin_elementwise_popcount)
709+
int64_t2 countbits(int64_t2);
710+
_HLSL_BUILTIN_ALIAS(__builtin_elementwise_popcount)
711+
int64_t3 countbits(int64_t3);
712+
_HLSL_BUILTIN_ALIAS(__builtin_elementwise_popcount)
713+
int64_t4 countbits(int64_t4);
714+
715+
_HLSL_BUILTIN_ALIAS(__builtin_elementwise_popcount)
716+
uint64_t countbits(uint64_t);
717+
_HLSL_BUILTIN_ALIAS(__builtin_elementwise_popcount)
718+
uint64_t2 countbits(uint64_t2);
719+
_HLSL_BUILTIN_ALIAS(__builtin_elementwise_popcount)
720+
uint64_t3 countbits(uint64_t3);
721+
_HLSL_BUILTIN_ALIAS(__builtin_elementwise_popcount)
722+
uint64_t4 countbits(uint64_t4);
723+
653724
//===----------------------------------------------------------------------===//
654725
// dot product builtins
655726
//===----------------------------------------------------------------------===//

clang/lib/Sema/SemaChecking.cpp

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -2795,7 +2795,7 @@ Sema::CheckBuiltinFunctionCall(FunctionDecl *FDecl, unsigned BuiltinID,
27952795
if (BuiltinElementwiseMath(TheCall))
27962796
return ExprError();
27972797
break;
2798-
2798+
case Builtin::BI__builtin_elementwise_popcount:
27992799
case Builtin::BI__builtin_elementwise_bitreverse: {
28002800
if (PrepareBuiltinElementwiseMathOneArgCall(TheCall))
28012801
return ExprError();

clang/test/CodeGen/builtins-elementwise-math.c

Lines changed: 37 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -570,6 +570,43 @@ void test_builtin_elementwise_log2(float f1, float f2, double d1, double d2,
570570
vf2 = __builtin_elementwise_log2(vf1);
571571
}
572572

573+
void test_builtin_elementwise_popcount(si8 vi1, si8 vi2,
574+
long long int i1, long long int i2, short si,
575+
_BitInt(31) bi1, _BitInt(31) bi2) {
576+
577+
578+
// CHECK: [[I1:%.+]] = load i64, ptr %i1.addr, align 8
579+
// CHECK-NEXT: call i64 @llvm.ctpop.i64(i64 [[I1]])
580+
i2 = __builtin_elementwise_popcount(i1);
581+
582+
// CHECK: [[VI1:%.+]] = load <8 x i16>, ptr %vi1.addr, align 16
583+
// CHECK-NEXT: call <8 x i16> @llvm.ctpop.v8i16(<8 x i16> [[VI1]])
584+
vi2 = __builtin_elementwise_popcount(vi1);
585+
586+
// CHECK: [[CVI2:%.+]] = load <8 x i16>, ptr %cvi2, align 16
587+
// CHECK-NEXT: call <8 x i16> @llvm.ctpop.v8i16(<8 x i16> [[CVI2]])
588+
const si8 cvi2 = vi2;
589+
vi2 = __builtin_elementwise_popcount(cvi2);
590+
591+
// CHECK: [[BI1:%.+]] = load i32, ptr %bi1.addr, align 4
592+
// CHECK-NEXT: [[LOADEDV:%.+]] = trunc i32 [[BI1]] to i31
593+
// CHECK-NEXT: call i31 @llvm.ctpop.i31(i31 [[LOADEDV]])
594+
bi2 = __builtin_elementwise_popcount(bi1);
595+
596+
// CHECK: [[IA1:%.+]] = load i32, ptr addrspace(1) @int_as_one, align 4
597+
// CHECK-NEXT: call i32 @llvm.ctpop.i32(i32 [[IA1]])
598+
b = __builtin_elementwise_popcount(int_as_one);
599+
600+
// CHECK: call i32 @llvm.ctpop.i32(i32 -10)
601+
b = __builtin_elementwise_popcount(-10);
602+
603+
// CHECK: [[SI:%.+]] = load i16, ptr %si.addr, align 2
604+
// CHECK-NEXT: [[SI_EXT:%.+]] = sext i16 [[SI]] to i32
605+
// CHECK-NEXT: [[RES:%.+]] = call i32 @llvm.ctpop.i32(i32 [[SI_EXT]])
606+
// CHECK-NEXT: = trunc i32 [[RES]] to i16
607+
si = __builtin_elementwise_popcount(si);
608+
}
609+
573610
void test_builtin_elementwise_pow(float f1, float f2, double d1, double d2,
574611
float4 vf1, float4 vf2) {
575612

Lines changed: 80 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,80 @@
1+
// RUN: %clang_cc1 -std=hlsl2021 -finclude-default-header -x hlsl -triple \
2+
// RUN: dxil-pc-shadermodel6.3-library %s -fnative-half-type \
3+
// RUN: -emit-llvm -disable-llvm-passes -O3 -o - | FileCheck %s
4+
5+
#ifdef __HLSL_ENABLE_16_BIT
6+
// CHECK-LABEL: test_countbits_ushort
7+
// CHECK: call i16 @llvm.ctpop.i16
8+
uint16_t test_countbits_ushort(uint16_t p0)
9+
{
10+
return countbits(p0);
11+
}
12+
// CHECK-LABEL: test_countbits_ushort2
13+
// CHECK: call <2 x i16> @llvm.ctpop.v2i16
14+
uint16_t2 test_countbits_ushort2(uint16_t2 p0)
15+
{
16+
return countbits(p0);
17+
}
18+
// CHECK-LABEL: test_countbits_ushort3
19+
// CHECK: call <3 x i16> @llvm.ctpop.v3i16
20+
uint16_t3 test_countbits_ushort3(uint16_t3 p0)
21+
{
22+
return countbits(p0);
23+
}
24+
// CHECK-LABEL: test_countbits_ushort4
25+
// CHECK: call <4 x i16> @llvm.ctpop.v4i16
26+
uint16_t4 test_countbits_ushort4(uint16_t4 p0)
27+
{
28+
return countbits(p0);
29+
}
30+
#endif
31+
32+
// CHECK-LABEL: test_countbits_uint
33+
// CHECK: call i32 @llvm.ctpop.i32
34+
int test_countbits_uint(uint p0)
35+
{
36+
return countbits(p0);
37+
}
38+
// CHECK-LABEL: test_countbits_uint2
39+
// CHECK: call <2 x i32> @llvm.ctpop.v2i32
40+
uint2 test_countbits_uint2(uint2 p0)
41+
{
42+
return countbits(p0);
43+
}
44+
// CHECK-LABEL: test_countbits_uint3
45+
// CHECK: call <3 x i32> @llvm.ctpop.v3i32
46+
uint3 test_countbits_uint3(uint3 p0)
47+
{
48+
return countbits(p0);
49+
}
50+
// CHECK-LABEL: test_countbits_uint4
51+
// CHECK: call <4 x i32> @llvm.ctpop.v4i32
52+
uint4 test_countbits_uint4(uint4 p0)
53+
{
54+
return countbits(p0);
55+
}
56+
57+
// CHECK-LABEL: test_countbits_long
58+
// CHECK: call i64 @llvm.ctpop.i64
59+
uint64_t test_countbits_long(uint64_t p0)
60+
{
61+
return countbits(p0);
62+
}
63+
// CHECK-LABEL: test_countbits_long2
64+
// CHECK: call <2 x i64> @llvm.ctpop.v2i64
65+
uint64_t2 test_countbits_long2(uint64_t2 p0)
66+
{
67+
return countbits(p0);
68+
}
69+
// CHECK-LABEL: test_countbits_long3
70+
// CHECK: call <3 x i64> @llvm.ctpop.v3i64
71+
uint64_t3 test_countbits_long3(uint64_t3 p0)
72+
{
73+
return countbits(p0);
74+
}
75+
// CHECK-LABEL: test_countbits_long4
76+
// CHECK: call <4 x i64> @llvm.ctpop.v4i64
77+
uint64_t4 test_countbits_long4(uint64_t4 p0)
78+
{
79+
return countbits(p0);
80+
}

clang/test/Sema/builtins-elementwise-math.c

Lines changed: 33 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -505,6 +505,39 @@ void test_builtin_elementwise_log2(int i, float f, double d, float4 v, int3 iv,
505505
// expected-error@-1 {{1st argument must be a floating point type (was 'unsigned4' (vector of 4 'unsigned int' values))}}
506506
}
507507

508+
void test_builtin_elementwise_popcount(int i, float f, double d, float4 v, int3 iv, unsigned u, unsigned4 uv) {
509+
510+
struct Foo s = __builtin_elementwise_popcount(i);
511+
// expected-error@-1 {{initializing 'struct Foo' with an expression of incompatible type 'int'}}
512+
513+
i = __builtin_elementwise_popcount();
514+
// expected-error@-1 {{too few arguments to function call, expected 1, have 0}}
515+
516+
i = __builtin_elementwise_popcount(f);
517+
// expected-error@-1 {{1st argument must be a vector of integers (was 'float')}}
518+
519+
i = __builtin_elementwise_popcount(f, f);
520+
// expected-error@-1 {{too many arguments to function call, expected 1, have 2}}
521+
522+
u = __builtin_elementwise_popcount(d);
523+
// expected-error@-1 {{1st argument must be a vector of integers (was 'double')}}
524+
525+
v = __builtin_elementwise_popcount(v);
526+
// expected-error@-1 {{1st argument must be a vector of integers (was 'float4' (vector of 4 'float' values))}}
527+
528+
int2 i2 = __builtin_elementwise_popcount(iv);
529+
// expected-error@-1 {{initializing 'int2' (vector of 2 'int' values) with an expression of incompatible type 'int3' (vector of 3 'int' values)}}
530+
531+
iv = __builtin_elementwise_popcount(i2);
532+
// expected-error@-1 {{assigning to 'int3' (vector of 3 'int' values) from incompatible type 'int2' (vector of 2 'int' values)}}
533+
534+
unsigned3 u3 = __builtin_elementwise_popcount(iv);
535+
// expected-error@-1 {{initializing 'unsigned3' (vector of 3 'unsigned int' values) with an expression of incompatible type 'int3' (vector of 3 'int' values)}}
536+
537+
iv = __builtin_elementwise_popcount(u3);
538+
// expected-error@-1 {{assigning to 'int3' (vector of 3 'int' values) from incompatible type 'unsigned3' (vector of 3 'unsigned int' values)}}
539+
}
540+
508541
void test_builtin_elementwise_pow(int i, short s, double d, float4 v, int3 iv, unsigned3 uv, int *p) {
509542
i = __builtin_elementwise_pow(p, d);
510543
// expected-error@-1 {{arguments are of different types ('int *' vs 'double')}}

clang/test/Sema/countbits-errors.hlsl

Lines changed: 28 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,28 @@
1+
// RUN: %clang_cc1 -finclude-default-header
2+
// -triple dxil-pc-shadermodel6.6-library %s -fnative-half-type -emit-llvm-only
3+
// -disable-llvm-passes -verify
4+
5+
double2 test_int_builtin(double2 p0) {
6+
return __builtin_hlsl_elementwise_countbits(p0);
7+
// expected-error@-1 {{passing 'double2' (aka 'vector<double, 2>') to
8+
// parameter of incompatible type
9+
// '__attribute__((__vector_size__(2 * sizeof(int)))) int'
10+
// (vector of 2 'int' values)}}
11+
}
12+
13+
float test_ambiguous(float p0) {
14+
return countbits(p0);
15+
// expected-error@-1 {{call to 'countbits' is ambiguous}}
16+
// expected-note@hlsl/hlsl_intrinsics.h:* {{candidate function}}
17+
// expected-note@hlsl/hlsl_intrinsics.h:* {{candidate function}}
18+
// expected-note@hlsl/hlsl_intrinsics.h:* {{candidate function}}
19+
// expected-note@hlsl/hlsl_intrinsics.h:* {{candidate function}}
20+
// expected-note@hlsl/hlsl_intrinsics.h:* {{candidate function}}
21+
// expected-note@hlsl/hlsl_intrinsics.h:* {{candidate function}}
22+
}
23+
24+
float test_float_builtin(float p0) {
25+
return __builtin_hlsl_elementwise_countbits(p0);
26+
// expected-error@-1 {{passing 'double' to parameter of incompatible type
27+
// 'int'}}
28+
}

clang/test/SemaCXX/builtins-elementwise-math.cpp

Lines changed: 8 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -269,3 +269,11 @@ void test_builtin_elementwise_bitreverse() {
269269
static_assert(!is_const<decltype(__builtin_elementwise_bitreverse(a))>::value);
270270
static_assert(!is_const<decltype(__builtin_elementwise_bitreverse(b))>::value);
271271
}
272+
273+
void test_builtin_elementwise_popcount() {
274+
const int a = 2;
275+
int b = 1;
276+
static_assert(!is_const<decltype(__builtin_elementwise_popcount(a))>::value);
277+
static_assert(!is_const<decltype(__builtin_elementwise_popcount(b))>::value);
278+
}
279+
Lines changed: 21 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,21 @@
1+
// RUN: %clang_cc1 -finclude-default-header
2+
// -triple dxil-pc-shadermodel6.6-library %s -fnative-half-type -emit-llvm-only
3+
// -disable-llvm-passes -verify -verify-ignore-unexpected
4+
5+
6+
double test_int_builtin(double p0) {
7+
return countbits(p0);
8+
// expected-error@-1 {{call to 'countbits' is ambiguous}}
9+
}
10+
11+
double2 test_int_builtin_2(double2 p0) {
12+
return __builtin_elementwise_popcount(p0);
13+
// expected-error@-1 {{1st argument must be a vector of integers
14+
// (was 'double2' (aka 'vector<double, 2>'))}}
15+
}
16+
17+
double test_int_builtin_3(float p0) {
18+
return __builtin_elementwise_popcount(p0);
19+
// expected-error@-1 {{1st argument must be a vector of integers
20+
// (was 'float')}}
21+
}

llvm/lib/Target/DirectX/DXIL.td

Lines changed: 11 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -553,6 +553,17 @@ def Rbits : DXILOp<30, unary> {
553553
let attributes = [Attributes<DXIL1_0, [ReadNone]>];
554554
}
555555

556+
def CBits : DXILOp<31, unary> {
557+
let Doc = "Returns the number of 1 bits in the specified value.";
558+
let LLVMIntrinsic = int_ctpop;
559+
let arguments = [OverloadTy];
560+
let result = OverloadTy;
561+
let overloads =
562+
[Overloads<DXIL1_0, [Int16Ty, Int32Ty, Int64Ty]>];
563+
let stages = [Stages<DXIL1_0, [all_stages]>];
564+
let attributes = [Attributes<DXIL1_0, [ReadNone]>];
565+
}
566+
556567
def FMax : DXILOp<35, binary> {
557568
let Doc = "Float maximum. FMax(a,b) = a > b ? a : b";
558569
let LLVMIntrinsic = int_maxnum;

0 commit comments

Comments
 (0)