Skip to content

Commit 38742d5

Browse files
authored
Merge pull request #2361 from wjc404/develop
Optimize AVX2 SGEMM & STRMM
2 parents fd2ff27 + bd4c032 commit 38742d5

File tree

5 files changed

+505
-14
lines changed

5 files changed

+505
-14
lines changed

CONTRIBUTORS.md

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -179,3 +179,4 @@ In chronological order:
179179
* [2019-11-12] AVX512 CGEMM & ZGEMM kernels
180180
* [2019-12-23] optimize AVX2 CGEMM and ZGEMM
181181
* [2019-12-30] AVX2 CGEMM3M & ZGEMM3M kernels
182+
* [2020-01-07] optimize AVX2 SGEMM and STRMM

kernel/x86_64/KERNEL.HASWELL

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -31,11 +31,11 @@ DAXPYKERNEL = daxpy.c
3131
CAXPYKERNEL = caxpy.c
3232
ZAXPYKERNEL = zaxpy.c
3333

34-
STRMMKERNEL = sgemm_kernel_16x4_haswell.S
35-
SGEMMKERNEL = sgemm_kernel_16x4_haswell.S
34+
STRMMKERNEL = sgemm_kernel_8x4_haswell.c
35+
SGEMMKERNEL = sgemm_kernel_8x4_haswell.c
3636
SGEMM_BETA = sgemm_beta_skylakex.c
37-
SGEMMINCOPY = ../generic/gemm_ncopy_16.c
38-
SGEMMITCOPY = ../generic/gemm_tcopy_16.c
37+
SGEMMINCOPY = ../generic/gemm_ncopy_8.c
38+
SGEMMITCOPY = ../generic/gemm_tcopy_8.c
3939
SGEMMONCOPY = sgemm_ncopy_4_skylakex.c
4040
SGEMMOTCOPY = ../generic/gemm_tcopy_4.c
4141
SGEMMINCOPYOBJ = sgemm_incopy$(TSUFFIX).$(SUFFIX)

kernel/x86_64/KERNEL.ZEN

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -30,10 +30,10 @@ DAXPYKERNEL = daxpy.c
3030
CAXPYKERNEL = caxpy.c
3131
ZAXPYKERNEL = zaxpy.c
3232

33-
STRMMKERNEL = sgemm_kernel_16x4_haswell.S
34-
SGEMMKERNEL = sgemm_kernel_16x4_haswell.S
35-
SGEMMINCOPY = ../generic/gemm_ncopy_16.c
36-
SGEMMITCOPY = ../generic/gemm_tcopy_16.c
33+
STRMMKERNEL = sgemm_kernel_8x4_haswell.c
34+
SGEMMKERNEL = sgemm_kernel_8x4_haswell.c
35+
SGEMMINCOPY = ../generic/gemm_ncopy_8.c
36+
SGEMMITCOPY = ../generic/gemm_tcopy_8.c
3737
SGEMMONCOPY = ../generic/gemm_ncopy_4.c
3838
SGEMMOTCOPY = ../generic/gemm_tcopy_4.c
3939
SGEMMINCOPYOBJ = sgemm_incopy$(TSUFFIX).$(SUFFIX)

0 commit comments

Comments
 (0)