Skip to content

Commit 64908e7

Browse files
author
Raghuveer Devulapalli
authored
Merge pull request #60 from sterrettm2/avx2-32bit
Adds support for AVX2 for 32-bit types for quicksort and quickselect
2 parents 49769c0 + 9810e05 commit 64908e7

20 files changed

+1114
-174
lines changed

_clang-format

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -74,7 +74,7 @@ PenaltyExcessCharacter: 1000000
7474
PenaltyReturnTypeOnItsOwnLine: 60
7575
PointerAlignment: Right
7676
ReflowComments: false
77-
SortIncludes: true
77+
SortIncludes: false
7878
SortUsingDeclarations: true
7979
SpaceAfterCStyleCast: false
8080
SpaceAfterTemplateKeyword: true

examples/Makefile

Lines changed: 4 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,6 @@
11
CXX ?= g++-12
22
CFLAGS = -I../src -std=c++17 -O3
3-
EXE = argsort kvsort qsortfp16 qsort16 qsort32 qsort64
3+
EXE = qsort32avx2 argsort kvsort qsortfp16 qsort16 qsort32 qsort64
44

55
default: all
66
all : $(EXE)
@@ -14,6 +14,9 @@ qsort16: avx512-16bit-qsort.cpp
1414
qsort32: avx512-32bit-qsort.cpp
1515
$(CXX) -o qsort32 -march=skylake-avx512 $(CFLAGS) avx512-32bit-qsort.cpp
1616

17+
qsort32avx2: avx2-32bit-qsort.cpp
18+
$(CXX) -o qsort32avx2 -march=haswell $(CFLAGS) avx2-32bit-qsort.cpp
19+
1720
qsort64: avx512-64bit-qsort.cpp
1821
$(CXX) -o qsort64 -march=skylake-avx512 $(CFLAGS) avx512-64bit-qsort.cpp
1922

examples/avx2-32bit-qsort.cpp

Lines changed: 10 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,10 @@
1+
#include "avx2-32bit-qsort.hpp"
2+
3+
int main() {
4+
const int size = 1000;
5+
float arr[size];
6+
avx2_qsort(arr, size);
7+
avx2_qselect(arr, 10, size);
8+
avx2_partial_qsort(arr, 10, size);
9+
return 0;
10+
}

lib/meson.build

Lines changed: 10 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,15 @@
11
libtargets = []
22

3+
if cpp.has_argument('-march=haswell')
4+
libtargets += static_library('libavx',
5+
files(
6+
'x86simdsort-avx2.cpp',
7+
),
8+
include_directories : [src],
9+
cpp_args : ['-march=haswell', flags_hide_symbols],
10+
)
11+
endif
12+
313
if cpp.has_argument('-march=skylake-avx512')
414
libtargets += static_library('libskx',
515
files(

lib/x86simdsort-avx2.cpp

Lines changed: 28 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,28 @@
1+
// AVX2 specific routines:
2+
#include "avx2-32bit-qsort.hpp"
3+
#include "x86simdsort-internal.h"
4+
5+
#define DEFINE_ALL_METHODS(type) \
6+
template <> \
7+
void qsort(type *arr, size_t arrsize) \
8+
{ \
9+
avx2_qsort(arr, arrsize); \
10+
} \
11+
template <> \
12+
void qselect(type *arr, size_t k, size_t arrsize, bool hasnan) \
13+
{ \
14+
avx2_qselect(arr, k, arrsize, hasnan); \
15+
} \
16+
template <> \
17+
void partial_qsort(type *arr, size_t k, size_t arrsize, bool hasnan) \
18+
{ \
19+
avx2_partial_qsort(arr, k, arrsize, hasnan); \
20+
}
21+
22+
namespace xss {
23+
namespace avx2 {
24+
DEFINE_ALL_METHODS(uint32_t)
25+
DEFINE_ALL_METHODS(int32_t)
26+
DEFINE_ALL_METHODS(float)
27+
} // namespace avx512
28+
} // namespace xss

lib/x86simdsort.cpp

Lines changed: 36 additions & 15 deletions
Original file line numberDiff line numberDiff line change
@@ -7,7 +7,7 @@
77

88
static int check_cpu_feature_support(std::string_view cpufeature)
99
{
10-
const char* disable_avx512 = std::getenv("XSS_DISABLE_AVX512");
10+
const char *disable_avx512 = std::getenv("XSS_DISABLE_AVX512");
1111

1212
if ((cpufeature == "avx512_spr") && (!disable_avx512))
1313
#ifdef __FLT16_MAX__
@@ -100,34 +100,40 @@ dispatch_requested(std::string_view cpurequested,
100100
}
101101

102102
/* runtime dispatch mechanism */
103-
#define DISPATCH(func, TYPE, ...) \
103+
#define DISPATCH(func, TYPE, ISA) \
104104
DECLARE_INTERNAL_##func(TYPE) static __attribute__((constructor)) void \
105105
CAT(CAT(resolve_, func), TYPE)(void) \
106106
{ \
107107
CAT(CAT(internal_, func), TYPE) = &xss::scalar::func<TYPE>; \
108108
__builtin_cpu_init(); \
109-
std::string_view preferred_cpu = find_preferred_cpu({__VA_ARGS__}); \
110-
if constexpr (dispatch_requested("avx512", {__VA_ARGS__})) { \
109+
std::string_view preferred_cpu = find_preferred_cpu(ISA); \
110+
if constexpr (dispatch_requested("avx512", ISA)) { \
111111
if (preferred_cpu.find("avx512") != std::string_view::npos) { \
112112
CAT(CAT(internal_, func), TYPE) = &xss::avx512::func<TYPE>; \
113113
return; \
114114
} \
115115
} \
116-
else if constexpr (dispatch_requested("avx2", {__VA_ARGS__})) { \
116+
if constexpr (dispatch_requested("avx2", ISA)) { \
117117
if (preferred_cpu.find("avx2") != std::string_view::npos) { \
118118
CAT(CAT(internal_, func), TYPE) = &xss::avx2::func<TYPE>; \
119119
return; \
120120
} \
121121
} \
122122
}
123123

124+
#define ISA_LIST(...) \
125+
std::initializer_list<std::string_view> \
126+
{ \
127+
__VA_ARGS__ \
128+
}
129+
124130
namespace x86simdsort {
125131
#ifdef __FLT16_MAX__
126-
DISPATCH(qsort, _Float16, "avx512_spr")
127-
DISPATCH(qselect, _Float16, "avx512_spr")
128-
DISPATCH(partial_qsort, _Float16, "avx512_spr")
129-
DISPATCH(argsort, _Float16, "none")
130-
DISPATCH(argselect, _Float16, "none")
132+
DISPATCH(qsort, _Float16, ISA_LIST("avx512_spr"))
133+
DISPATCH(qselect, _Float16, ISA_LIST("avx512_spr"))
134+
DISPATCH(partial_qsort, _Float16, ISA_LIST("avx512_spr"))
135+
DISPATCH(argsort, _Float16, ISA_LIST("none"))
136+
DISPATCH(argselect, _Float16, ISA_LIST("none"))
131137
#endif
132138

133139
#define DISPATCH_ALL(func, ISA_16BIT, ISA_32BIT, ISA_64BIT) \
@@ -140,10 +146,25 @@ DISPATCH(argselect, _Float16, "none")
140146
DISPATCH(func, uint64_t, ISA_64BIT) \
141147
DISPATCH(func, double, ISA_64BIT)
142148

143-
DISPATCH_ALL(qsort, ("avx512_icl"), ("avx512_skx"), ("avx512_skx"))
144-
DISPATCH_ALL(qselect, ("avx512_icl"), ("avx512_skx"), ("avx512_skx"))
145-
DISPATCH_ALL(partial_qsort, ("avx512_icl"), ("avx512_skx"), ("avx512_skx"))
146-
DISPATCH_ALL(argsort, "none", "avx512_skx", "avx512_skx")
147-
DISPATCH_ALL(argselect, "none", "avx512_skx", "avx512_skx")
149+
DISPATCH_ALL(qsort,
150+
(ISA_LIST("avx512_icl")),
151+
(ISA_LIST("avx512_skx", "avx2")),
152+
(ISA_LIST("avx512_skx")))
153+
DISPATCH_ALL(qselect,
154+
(ISA_LIST("avx512_icl")),
155+
(ISA_LIST("avx512_skx", "avx2")),
156+
(ISA_LIST("avx512_skx")))
157+
DISPATCH_ALL(partial_qsort,
158+
(ISA_LIST("avx512_icl")),
159+
(ISA_LIST("avx512_skx", "avx2")),
160+
(ISA_LIST("avx512_skx")))
161+
DISPATCH_ALL(argsort,
162+
(ISA_LIST("none")),
163+
(ISA_LIST("avx512_skx")),
164+
(ISA_LIST("avx512_skx")))
165+
DISPATCH_ALL(argselect,
166+
(ISA_LIST("none")),
167+
(ISA_LIST("avx512_skx")),
168+
(ISA_LIST("avx512_skx")))
148169

149170
} // namespace x86simdsort

0 commit comments

Comments
 (0)