Skip to content

Commit 89878e2

Browse files
authored
This patch adds a dispatcher for x86. It also adds tester for the fun… (pytorch#32)
* This patch adds a dispatcher for x86. It also adds tester for the functions with dispatcher. 5 dummy functions(xsinpif_u05, xtgammf_u1, xlgammf_u1, xerff_u1 and xerfcf_u15) are added since they are referenced from the dispatcher. I will implement these functions soon. Dispatcher is a mechanism for the library to automatically detect the available features of the CPU and use the fastest implementation from the functions with the same specification. The patch adds the dispatcher for 128-bit computation that selects from SSE2, SSE4 and AVX2 implementations, and the dispatcher for 256-bit computation that selects from AVX, FMA4 and AVX2.
1 parent 0c46698 commit 89878e2

File tree

11 files changed

+620
-68
lines changed

11 files changed

+620
-68
lines changed

src/libm-tester/Makefile

Lines changed: 16 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -42,7 +42,7 @@ endif
4242

4343
ifeq ($(ARCH), x86_64)
4444

45-
TARGET_IUT += iutsse2 iutsse4 iutavx
45+
TARGET_IUT += iutsse2 iutsse4 iutavx iutdsp128 iutdsp256
4646
TARGET_TESTER += tester2sse2dp tester2sse2sp tester2sse4dp tester2sse4sp tester2avxdp tester2avxsp
4747

4848
ifeq ($(ENABLEAVX2), 1)
@@ -194,6 +194,20 @@ tester2avx512fsp : ../../lib/libsleef.$(DLLSUFFIX) ../../include/sleef.h tester2
194194

195195
#
196196

197+
iutdsp128 : ../../lib/libsleef.$(DLLSUFFIX) ../../include/sleef.h iutsimdmain.o iutsimd.c testerutil.c ../libm/renamedsp128.h ../../include/sleef.h
198+
$(CC) $(CFLAGS) -DENABLE_DSP128 iutsimdmain.o iutsimd.c testerutil.c -o iutdsp128 -lsleef -lm $(LIB)
199+
200+
../libm/renamedsp128.h :
201+
+"$(MAKE)" --directory=../libm renamedsp128.h
202+
203+
iutdsp256 : ../../lib/libsleef.$(DLLSUFFIX) ../../include/sleef.h iutsimdmain.o iutsimd.c testerutil.c ../libm/renamedsp256.h ../../include/sleef.h
204+
$(CC) $(CFLAGS) -DENABLE_DSP256 -mavx iutsimdmain.o iutsimd.c testerutil.c -o iutdsp256 -lsleef -lm $(LIB)
205+
206+
../libm/renamedsp256.h :
207+
+"$(MAKE)" --directory=../libm renamedsp256.h
208+
209+
#
210+
197211
iutneon32 : ../../lib/libsleef.$(DLLSUFFIX) ../../include/sleef.h iutsimdmain.o iutsimd.c testerutil.c ../libm/renameneon32.h ../../include/sleef.h
198212
$(CC) $(CFLAGS) -DENABLE_NEON32 -mfpu=neon iutsimdmain.o iutsimd.c testerutil.c -o iutneon32 -lsleef -lm $(LIB)
199213

@@ -283,7 +297,7 @@ clean :
283297
rm -f *~ a.out *.obj *.lib *.dll *.exp *.exe *.stackdump
284298
rm -rf *.dSYM *.dylib
285299
rm -f *.so *.so.* *.a *.s *.o
286-
rm -f iut iutsse2 iutsse4 iutavx iutfma4 iutavx2 iutavx2128 iutavx512f iutneon32 iutadvsimd
300+
rm -f iut iutsse2 iutsse4 iutavx iutfma4 iutavx2 iutavx2128 iutavx512f iutneon32 iutadvsimd iutdsp128 iutdsp256
287301
rm -f tester tester2vecextdp tester2vecextsp iutvecext tester2purecdp tester2purecsp iutpurec
288302
rm -f tester2dp tester2sp tester2qp tester2ld tester2sse2dp tester2sse2sp tester2fma4dp tester2fma4sp tester2avxdp tester2avxsp tester2avx2dp tester2avx2sp tester2avx512fdp tester2avx512fsp tester2sse4dp tester2sse4sp
289303
rm -f tester2advsimddp tester2advsimdsp

src/libm-tester/Makefile.vc

Lines changed: 16 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -3,7 +3,7 @@ CFLAGS+=/fp:strict /I..\common /I..\arch /I../../include /I../libm
33

44
#
55

6-
TARGET_IUT = iut.exe iutsse2.exe iutsse4.exe iutavx.exe iutavx2.exe iutavx2128.exe
6+
TARGET_IUT = iut.exe iutsse2.exe iutsse4.exe iutavx.exe iutavx2.exe iutavx2128.exe iutdsp128.exe iutdsp256.exe
77

88
.PHONY: all
99
all : $(TARGET_IUT)
@@ -60,6 +60,20 @@ iutavx2128.exe : ../../lib/libsleef.lib ../../include/sleef.h iutsimdmain.obj iu
6060

6161
#
6262

63+
iutdsp128.exe : ../../lib/libsleef.lib ../../include/sleef.h iutsimdmain.obj iutsimd.c testerutil.c ../libm/renamedsp128.h ../../include/sleef.h
64+
$(CC) $(CFLAGS) /DENABLE_DSP128 /D__SSE2__ /DLOGVECTLENDP=1 iutsimdmain.obj iutsimd.c testerutil.c ../../lib/libsleef.lib /Feiutdsp128.exe
65+
66+
../libm/renamedsp128.h :
67+
+"$(MAKE)" --directory=../libm -f Makefile.vc renamedsp128.h
68+
69+
iutdsp256.exe : ../../lib/libsleef.lib ../../include/sleef.h iutsimdmain.obj iutsimd.c testerutil.c ../libm/renamedsp256.h ../../include/sleef.h
70+
$(CC) $(CFLAGS) /DENABLE_DSP256 /DLOGVECTLENDP=2 /arch:AVX iutsimdmain.obj iutsimd.c testerutil.c ../../lib/libsleef.lib /Feiutdsp256.exe
71+
72+
../libm/renamedsp256.h :
73+
+"$(MAKE)" --directory=../libm -f Makefile.vc renamedsp256.h
74+
75+
#
76+
6377
iutpurec.exe : ../libm/sleefsimddp.c ../libm/sleefsimdsp.c iutsimdmain.obj iutsimd.c testerutil.c
6478
$(CC) $(CFLAGS) /DENABLE_PUREC ../libm/sleefsimddp.c ../libm/sleefsimdsp.c iutsimdmain.obj iutsimd.c testerutil.c /Feiutpurec.exe
6579

@@ -76,7 +90,7 @@ iutpurec.exe : ../libm/sleefsimddp.c ../libm/sleefsimdsp.c iutsimdmain.obj iutsi
7690
.PHONY: clean
7791
clean :
7892
rm -f *~ a.out
79-
rm -f iut iutsse2 iutsse4 iutavx iutfma4 iutavx2 iutavx2128 iutavx512f
93+
rm -f iut iutsse2 iutsse4 iutavx iutfma4 iutavx2 iutavx2128 iutavx512f iutdsp128 iutdsp256
8094
rm -f tester tester2vecextdp tester2vecextsp iutvecext tester2purecdp tester2purecsp iutpurec
8195
rm -f tester2dp tester2sp tester2qp tester2ld tester2sse2dp tester2sse2sp tester2fma4dp tester2fma4sp tester2avxdp tester2avxsp tester2avx2dp tester2avx2sp tester2avx512fdp tester2avx512fsp
8296

src/libm-tester/iutsimd.c

Lines changed: 16 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -107,6 +107,22 @@ typedef Sleef_float32x4_t_2 vfloat2;
107107
#include "norename.h"
108108
#endif
109109

110+
#ifdef ENABLE_DSP128
111+
#define CONFIG 2
112+
#include "helpersse2.h"
113+
#include "renamedsp128.h"
114+
typedef Sleef___m128d_2 vdouble2;
115+
typedef Sleef___m128_2 vfloat2;
116+
#endif
117+
118+
#ifdef ENABLE_DSP256
119+
#define CONFIG 1
120+
#include "helperavx.h"
121+
#include "renamedsp256.h"
122+
typedef Sleef___m256d_2 vdouble2;
123+
typedef Sleef___m256_2 vfloat2;
124+
#endif
125+
110126
//
111127

112128
#ifdef ENABLE_DP

src/libm/Makefile

Lines changed: 60 additions & 20 deletions
Original file line numberDiff line numberDiff line change
@@ -26,7 +26,7 @@ endif
2626
#
2727

2828
ifeq ($(ARCH), x86_64)
29-
OBJ+=sleefdpsse2.o sleefdpavx.o sleefspsse2.o sleefspavx.o sleefdpsse4.o sleefspsse4.o
29+
OBJ+=dispsse.o dispavx.o sleefdpsse2.o sleefdpavx.o sleefspsse2.o sleefspavx.o sleefdpsse4.o sleefspsse4.o
3030
OBJ2+=sleefgdpsse2.o sleefgdpavx.o sleefgspsse2.o sleefgspavx.o
3131

3232
ifeq ($(ENABLEAVX2), 1)
@@ -68,29 +68,31 @@ ifeq ($(ARCH), x86_64)
6868
sleef.h : mkrename
6969
cp sleeflibm.h.org sleef.h
7070
$(FLOCK) mkrename.c -c 'echo Acquiring lock for mkrename'
71-
./mkrename sse2 2 4 __m128d __m128 __m128i __m128i __SSE2__ >> sleef.h
72-
./mkrename sse4 2 4 __m128d __m128 __m128i __m128i __SSE2__ >> sleef.h
73-
./mkrename avx2128 2 4 __m128d __m128 __m128i __m128i __SSE2__ >> sleef.h
74-
./mkrename avx 4 8 __m256d __m256 __m128i 'struct { __m128i x, y; }' __AVX__ >> sleef.h
75-
./mkrename fma4 4 8 __m256d __m256 __m128i 'struct { __m128i x, y; }' __FMA4__ >> sleef.h
76-
./mkrename avx2 4 8 __m256d __m256 __m128i __m256i __AVX2__ >> sleef.h
77-
./mkrename avx512f 8 16 __m512d __m512 __m256i __m512i __AVX512F__ >> sleef.h
71+
./mkrename 2 4 __m128d __m128 __m128i __m128i __SSE2__ >> sleef.h
72+
./mkrename 2 4 __m128d __m128 __m128i __m128i __SSE2__ sse2 >> sleef.h
73+
./mkrename 2 4 __m128d __m128 __m128i __m128i __SSE2__ sse4 >> sleef.h
74+
./mkrename 2 4 __m128d __m128 __m128i __m128i __SSE2__ avx2128 >> sleef.h
75+
./mkrename 4 8 __m256d __m256 __m128i 'struct { __m128i x, y; }' __AVX__ >> sleef.h
76+
./mkrename 4 8 __m256d __m256 __m128i 'struct { __m128i x, y; }' __AVX__ avx >> sleef.h
77+
./mkrename 4 8 __m256d __m256 __m128i 'struct { __m128i x, y; }' __AVX__ fma4 >> sleef.h
78+
./mkrename 4 8 __m256d __m256 __m128i __m256i __AVX__ avx2 >> sleef.h
79+
./mkrename 8 16 __m512d __m512 __m256i __m512i __AVX512F__ avx512f >> sleef.h
7880
echo '#undef IMPORT' >> sleef.h
7981
echo '#endif' >> sleef.h
8082
cp sleef.h ../../include
8183
else ifeq ($(ARCH), arm)
8284
sleef.h : mkrename
8385
cp sleeflibm.h.org sleef.h
8486
$(FLOCK) mkrename.c -c 'echo Acquiring lock for mkrename'
85-
./mkrename neon 2 4 - float32x4_t int32x4_t int32x4_t __ARM_NEON__ >> sleef.h
87+
./mkrename 2 4 - float32x4_t int32x4_t int32x4_t __ARM_NEON__ neon >> sleef.h
8688
echo '#undef IMPORT' >> sleef.h
8789
echo '#endif' >> sleef.h
8890
cp sleef.h ../../include
8991
else ifeq ($(ARCH), aarch64)
9092
sleef.h : mkrename
9193
cp sleeflibm.h.org sleef.h
9294
$(FLOCK) mkrename.c -c 'echo Acquiring lock for mkrename'
93-
./mkrename advsimd 2 4 float64x2_t float32x4_t int32x2_t int32x4_t __ARM_NEON >> sleef.h
95+
./mkrename 2 4 float64x2_t float32x4_t int32x2_t int32x4_t __ARM_NEON advsimd >> sleef.h
9496
echo '#undef IMPORT' >> sleef.h
9597
echo '#endif' >> sleef.h
9698
cp sleef.h ../../include
@@ -134,7 +136,7 @@ sleefspsse2.o : sleefsimdsp.c renamesse2.h
134136

135137
renamesse2.h : mkrename
136138
$(FLOCK) mkrename.c -c 'echo Acquiring lock for mkrename'
137-
./mkrename sse2 2 4 > renamesse2.h
139+
./mkrename 2 4 sse2 > renamesse2.h
138140

139141
#
140142

@@ -158,7 +160,34 @@ sleefspsse4.o : sleefsimdsp.c renamesse4.h
158160

159161
renamesse4.h : mkrename
160162
$(FLOCK) mkrename.c -c 'echo Acquiring lock for mkrename'
161-
./mkrename sse4 2 4 > renamesse4.h
163+
./mkrename 2 4 sse4 > renamesse4.h
164+
165+
##
166+
167+
DISPAVXOPT=
168+
ifeq ($ENABLEFMA4, 1)
169+
DISPAVXOPT+=-DENABLE_FMA4
170+
endif
171+
ifeq ($(ENABLEAVX2), 1)
172+
DISPAVXOPT+=-DENABLE_AVX2
173+
endif
174+
175+
dispavx.c : mkdisp
176+
cp dispavx.c.org dispavx.c
177+
./mkdisp 4 8 __m256d __m256 __m128i avx fma4 avx2 >> dispavx.c
178+
179+
dispsse.c : mkdisp
180+
cp dispsse.c.org dispsse.c
181+
./mkdisp 2 4 __m128d __m128 __m128i sse2 sse4 avx2128 >> dispsse.c
182+
183+
mkdisp : mkdisp.c
184+
$(CC) $(CFLAGS) -Wall mkdisp.c -o mkdisp
185+
186+
dispavx.o : dispavx.c sleef.h
187+
$(CC) $(CFLAGS) $(SHAREDFLAGS) $(DISPAVXOPT) -mavx dispavx.c -c -o dispavx.o
188+
189+
dispsse.o : dispsse.c sleef.h
190+
$(CC) $(CFLAGS) $(SHAREDFLAGS) $(DISPAVXOPT) dispsse.c -c -o dispsse.o
162191

163192
##
164193

@@ -170,7 +199,7 @@ sleefspavx.o : sleefsimdsp.c renameavx.h
170199

171200
renameavx.h : mkrename
172201
$(FLOCK) mkrename.c -c 'echo Acquiring lock for mkrename'
173-
./mkrename avx 4 8 > renameavx.h
202+
./mkrename 4 8 avx > renameavx.h
174203

175204
#
176205

@@ -194,7 +223,7 @@ sleefspfma4.o : sleefsimdsp.c renamefma4.h
194223

195224
renamefma4.h : mkrename
196225
$(FLOCK) mkrename.c -c 'echo Acquiring lock for mkrename'
197-
./mkrename fma4 4 8 > renamefma4.h
226+
./mkrename 4 8 fma4 > renamefma4.h
198227

199228
##
200229

@@ -206,7 +235,7 @@ sleefspavx2.o : sleefsimdsp.c renameavx2.h
206235

207236
renameavx2.h : mkrename
208237
$(FLOCK) mkrename.c -c 'echo Acquiring lock for mkrename'
209-
./mkrename avx2 4 8 > renameavx2.h
238+
./mkrename 4 8 avx2 > renameavx2.h
210239

211240
#
212241

@@ -230,7 +259,7 @@ sleefspavx2128.o : sleefsimdsp.c renameavx2128.h
230259

231260
renameavx2128.h : mkrename
232261
$(FLOCK) mkrename.c -c 'echo Acquiring lock for mkrename'
233-
./mkrename avx2128 2 4 > renameavx2128.h
262+
./mkrename 2 4 avx2128 > renameavx2128.h
234263

235264
##
236265

@@ -242,7 +271,7 @@ sleefspavx512f.o : sleefsimdsp.c renameavx512f.h
242271

243272
renameavx512f.h : mkrename
244273
$(FLOCK) mkrename.c -c 'echo Acquiring lock for mkrename'
245-
./mkrename avx512f 8 16 > renameavx512f.h
274+
./mkrename 8 16 avx512f > renameavx512f.h
246275

247276
#
248277

@@ -263,7 +292,7 @@ sleefspneon32.o : sleefsimdsp.c renameneon32.h
263292

264293
renameneon32.h : mkrename
265294
$(FLOCK) mkrename.c -c 'echo Acquiring lock for mkrename'
266-
./mkrename neon 2 4 > renameneon32.h
295+
./mkrename 2 4 neon > renameneon32.h
267296

268297
##
269298

@@ -275,7 +304,7 @@ sleefdpadvsimd.o : sleefsimddp.c ../arch/helperadvsimd.h
275304

276305
renameadvsimd.h : mkrename
277306
$(FLOCK) mkrename.c -c 'echo Acquiring lock for mkrename'
278-
./mkrename advsimd 2 4 > renameadvsimd.h
307+
./mkrename 2 4 advsimd > renameadvsimd.h
279308

280309
#
281310

@@ -291,9 +320,20 @@ renameadvsimd_gnuabi.h : mkrename_gnuabi
291320

292321
#
293322

323+
renamedsp128.h : mkrename
324+
$(FLOCK) mkrename.c -c 'echo Acquiring lock for mkrename'
325+
./mkrename 2 4 > renamedsp128.h
326+
327+
renamedsp256.h : mkrename
328+
$(FLOCK) mkrename.c -c 'echo Acquiring lock for mkrename'
329+
./mkrename 4 8 > renamedsp256.h
330+
331+
#
332+
294333
.PHONY: clean
295334
clean :
296335
rm -f *~ *.o *.s libm.a libgm.a sleef.h a.out *.lock OBJ.txt OBJ2.txt
297336
rm -f *.obj *.lib *.dll *.exp *.exe
298-
rm -f mkrename renamesse2.h renamesse4.h renameavx.h renamefma4.h renameavx2.h renameavx2128.h renameavx512f.h renameneon32.h renameadvsimd.h
337+
rm -f mkrename renamesse2.h renamesse4.h renameavx.h renamefma4.h renameavx2.h renameavx2128.h renameavx512f.h renameneon32.h renameadvsimd.h renamedsp128.h renamedsp256.h
338+
rm -f mkdisp dispavx.c dispsse.c
299339
rm -f mkrename_gnuabi renamesse2_gnuabi.h renameavx_gnuabi.h renamefma4_gnuabi.h renameavx2_gnuabi.h renameavx512f_gnuabi.h renameneon32_gnuabi.h renameadvsimd_gnuabi.h

src/libm/Makefile.vc

Lines changed: 49 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -3,6 +3,7 @@ CFLAGS+=/TC /I..\common /I..\arch
33
OPTFLAGS=/O2
44

55
OBJ=sleefdp.obj sleefsp.obj sleefld.obj
6+
OBJ+=dispavx.obj dispsse.obj
67
OBJ+=sleefdpsse2.obj sleefdpsse4.obj sleefdpavx.obj sleefdpavx2.obj sleefdpavx2128.obj
78
OBJ+=sleefspsse2.obj sleefspsse4.obj sleefspavx.obj sleefspavx2.obj sleefspavx2128.obj
89

@@ -27,11 +28,13 @@ OBJ2.txt : $(OBJ2)
2728

2829
sleef.h : mkrename.exe
2930
cp sleeflibm.h.org sleef.h
30-
./mkrename.exe sse2 2 4 __m128d __m128 __m128i __m128i __SSE2__ >> sleef.h
31-
./mkrename.exe sse4 2 4 __m128d __m128 __m128i __m128i __SSE2__ >> sleef.h
32-
./mkrename.exe avx2128 2 4 __m128d __m128 __m128i __m128i __SSE2__ >> sleef.h
33-
./mkrename.exe avx 4 8 __m256d __m256 __m128i 'struct { __m128i x, y; }' __AVX__ >> sleef.h
34-
./mkrename.exe avx2 4 8 __m256d __m256 __m128i __m256i __AVX2__ >> sleef.h
31+
./mkrename.exe 2 4 __m128d __m128 __m128i __m128i __SSE2__ >> sleef.h
32+
./mkrename.exe 2 4 __m128d __m128 __m128i __m128i __SSE2__ sse2 >> sleef.h
33+
./mkrename.exe 2 4 __m128d __m128 __m128i __m128i __SSE2__ sse4 >> sleef.h
34+
./mkrename.exe 2 4 __m128d __m128 __m128i __m128i __SSE2__ avx2128 >> sleef.h
35+
./mkrename.exe 4 8 __m256d __m256 __m128i 'struct { __m128i x, y; }' __AVX__ >> sleef.h
36+
./mkrename.exe 4 8 __m256d __m256 __m128i 'struct { __m128i x, y; }' __AVX__ avx >> sleef.h
37+
./mkrename.exe 4 8 __m256d __m256 __m128i __m256i __AVX__ avx2 >> sleef.h
3538
echo '#undef IMPORT' >> sleef.h
3639
echo '#endif' >> sleef.h
3740
cp sleef.h ../../include
@@ -66,7 +69,7 @@ sleefspsse2.obj : sleefsimdsp.c renamesse2.h
6669
$(CC) $(CFLAGS) $(OPTFLAGS) -DDORENAME -DENABLE_SSE2 -D__SSE2__ sleefsimdsp.c /c /Fosleefspsse2.obj
6770

6871
renamesse2.h : mkrename.exe
69-
./mkrename.exe sse2 2 4 > renamesse2.h
72+
./mkrename.exe 2 4 sse2 > renamesse2.h
7073

7174
#
7275

@@ -88,7 +91,7 @@ sleefspsse4.obj : sleefsimdsp.c renamesse4.h
8891
$(CC) $(CFLAGS) $(OPTFLAGS) -DDORENAME -DENABLE_SSE4 -D__SSE2__ -D__SSE3__ -D__SSE4_1__ sleefsimdsp.c /c /Fosleefspsse4.obj
8992

9093
renamesse4.h : mkrename.exe
91-
./mkrename.exe sse4 2 4 > renamesse4.h
94+
./mkrename.exe 2 4 sse4 > renamesse4.h
9295

9396
##
9497

@@ -99,7 +102,7 @@ sleefspavx2128.obj : sleefsimdsp.c renameavx2128.h
99102
$(CC) $(CFLAGS) $(OPTFLAGS) -DDORENAME -DENABLE_AVX2128 /arch:AVX2 sleefsimdsp.c /c /Fosleefspavx2128.obj
100103

101104
renameavx2128.h : mkrename.exe
102-
./mkrename.exe avx2128 2 4 > renameavx2128.h
105+
./mkrename.exe 2 4 avx2128 > renameavx2128.h
103106

104107
##
105108

@@ -110,7 +113,7 @@ sleefspavx.obj : sleefsimdsp.c renameavx.h
110113
$(CC) $(CFLAGS) $(OPTFLAGS) -DDORENAME -DENABLE_AVX /arch:AVX sleefsimdsp.c /c /Fosleefspavx.obj
111114

112115
renameavx.h : mkrename.exe
113-
./mkrename.exe avx 4 8 > renameavx.h
116+
./mkrename.exe 4 8 avx > renameavx.h
114117

115118
#
116119

@@ -132,7 +135,7 @@ sleefspavx2.obj : sleefsimdsp.c renameavx2.h
132135
$(CC) $(CFLAGS) $(OPTFLAGS) -DDORENAME -DENABLE_AVX2 /arch:AVX2 sleefsimdsp.c /c /Fosleefspavx2.obj
133136

134137
renameavx2.h : mkrename.exe
135-
./mkrename.exe avx2 4 8 > renameavx2.h
138+
./mkrename.exe 4 8 avx2 > renameavx2.h
136139

137140
#
138141

@@ -147,6 +150,42 @@ renameavx2_gnuabi.h : mkrename_gnuabi.exe
147150

148151
##
149152

153+
dispavx.c : mkdisp.exe
154+
cp dispavx.c.org dispavx.c
155+
./mkdisp.exe 4 8 __m256d __m256 __m128i avx fma4 avx2 >> dispavx.c
156+
157+
dispsse.c : mkdisp.exe
158+
cp dispsse.c.org dispsse.c
159+
./mkdisp.exe 2 4 __m128d __m128 __m128i sse2 sse4 avx2128 >> dispsse.c
160+
161+
mkdisp.exe : mkdisp.c
162+
$(CC) $(CFLAGS) mkdisp.c
163+
rm -f mkdisp.obj
164+
165+
DISPAVXOPT=
166+
ifdef ENABLEFMA4
167+
DISPAVXOPT+=-DENABLE_FMA4
168+
endif
169+
ifdef ENABLEAVX2
170+
DISPAVXOPT+=-DENABLE_AVX2
171+
endif
172+
173+
dispavx.obj : dispavx.c sleef.h
174+
$(CC) $(CFLAGS) $(SHAREDFLAGS) $(DISPAVXOPT) /arch:AVX dispavx.c /c /Fodispavx.obj
175+
176+
dispsse.obj : dispsse.c sleef.h
177+
$(CC) $(CFLAGS) $(SHAREDFLAGS) -D__SSE2__ dispsse.c /c /Fodispsse.obj
178+
179+
renamedsp128.h : mkrename.exe
180+
$(FLOCK) mkrename.c -c 'echo Acquiring lock for mkrename'
181+
./mkrename.exe 2 4 > renamedsp128.h
182+
183+
renamedsp256.h : mkrename.exe
184+
$(FLOCK) mkrename.c -c 'echo Acquiring lock for mkrename'
185+
./mkrename.exe 4 8 > renamedsp256.h
186+
187+
##
188+
150189
.PHONY: clean
151190
clean :
152191
rm -f *~ *.obj sleef.h

0 commit comments

Comments
 (0)