Skip to content

Commit db9b611

Browse files
committed
Merge pull request #798 from wernsaar/develop
Optimized zgemv_n kernel for bulldozer, piledriver and steamroller
2 parents 711ecb8 + 2e6333f commit db9b611

File tree

5 files changed

+520
-4
lines changed

5 files changed

+520
-4
lines changed

common.h

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -332,12 +332,13 @@ typedef int blasint;
332332
#endif
333333
#endif
334334

335-
335+
/*
336336
#ifdef PILEDRIVER
337337
#ifndef YIELDING
338338
#define YIELDING __asm__ __volatile__ ("nop;nop;nop;nop;nop;nop;nop;nop;\n");
339339
#endif
340340
#endif
341+
*/
341342

342343
/*
343344
#ifdef STEAMROLLER

kernel/x86_64/KERNEL.PILEDRIVER

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -11,7 +11,7 @@ ZAXPYKERNEL = zaxpy.c
1111
SGEMVNKERNEL = sgemv_n_4.c
1212
SGEMVTKERNEL = sgemv_t_4.c
1313

14-
ZGEMVNKERNEL = zgemv_n_dup.S
14+
ZGEMVNKERNEL = zgemv_n_4.c
1515
ZGEMVTKERNEL = zgemv_t_4.c
1616

1717
DGEMVNKERNEL = dgemv_n_bulldozer.S

kernel/x86_64/KERNEL.STEAMROLLER

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -24,7 +24,7 @@ SGEMVTKERNEL = sgemv_t_4.c
2424
DGEMVNKERNEL = dgemv_n_4.c
2525
DGEMVTKERNEL = dgemv_t_4.c
2626

27-
ZGEMVNKERNEL = zgemv_n_dup.S
27+
ZGEMVNKERNEL = zgemv_t_4.c
2828
ZGEMVTKERNEL = zgemv_t_4.c
2929

3030
DCOPYKERNEL = dcopy_bulldozer.S

kernel/x86_64/zgemv_n_4.c

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -34,9 +34,10 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
3434
#include "zgemv_n_microk_haswell-4.c"
3535
#elif defined(SANDYBRIDGE)
3636
#include "zgemv_n_microk_sandy-4.c"
37+
#elif defined(BULLDOZER) || defined(PILEDRIVER) || defined(STEAMROLLER)
38+
#include "zgemv_n_microk_bulldozer-4.c"
3739
#endif
3840

39-
4041
#define NBMAX 1024
4142

4243
#ifndef HAVE_KERNEL_4x4

0 commit comments

Comments
 (0)