From ca22e28ca13fb1906b3a13194711969eaef9df85 Mon Sep 17 00:00:00 2001 From: Martin Kroeker Date: Mon, 18 Aug 2025 01:25:44 -0700 Subject: [PATCH 01/35] Rename sgemm_direct_sme1.S to sgemm_direct_sme1_2VLx2VL.S --- kernel/arm64/{sgemm_direct_sme1.S => sgemm_direct_sme1_2VLx2VL.S} | 0 1 file changed, 0 insertions(+), 0 deletions(-) rename kernel/arm64/{sgemm_direct_sme1.S => sgemm_direct_sme1_2VLx2VL.S} (100%) diff --git a/kernel/arm64/sgemm_direct_sme1.S b/kernel/arm64/sgemm_direct_sme1_2VLx2VL.S similarity index 100% rename from kernel/arm64/sgemm_direct_sme1.S rename to kernel/arm64/sgemm_direct_sme1_2VLx2VL.S From 22c6607db93e649026a95269bb1c7f2ee4ce008b Mon Sep 17 00:00:00 2001 From: Martin Kroeker Date: Mon, 18 Aug 2025 01:30:10 -0700 Subject: [PATCH 02/35] Use ASMNAME to get symbol name from build system; leave x18 unused as reserved on MacOS --- kernel/arm64/sgemm_direct_sme1_2VLx2VL.S | 19 ++++++++++--------- 1 file changed, 10 insertions(+), 9 deletions(-) diff --git a/kernel/arm64/sgemm_direct_sme1_2VLx2VL.S b/kernel/arm64/sgemm_direct_sme1_2VLx2VL.S index 8c0a173f3d..ebbd0cadd9 100644 --- a/kernel/arm64/sgemm_direct_sme1_2VLx2VL.S +++ b/kernel/arm64/sgemm_direct_sme1_2VLx2VL.S @@ -35,16 +35,17 @@ #define K_exit x15 //Exit condition for K loop #define M_cntr x16 //M loop counter #define C1 x17 //Constant1: N*(SVLs+1);SVLs-No. of 32-bit elements -#define C2 x18 //Constant2: N + SVLs -#define C3 x19 //Constant3: K*SVLs + SVLs -#define C4 x20 //Constant4: SVLs-2 -#define C5 x21 //Constant5: K*SVLs -#define C6 x22 //Constant6: N*SVLs +#define C2 x19 //Constant2: N + SVLs +#define C3 x20 //Constant3: K*SVLs + SVLs +#define C4 x21 //Constant4: SVLs-2 +#define C5 x22 //Constant5: K*SVLs +#define C6 x23 //Constant6: N*SVLs .text - .global sgemm_direct_sme1_2VLx2VL + .global ASMNAME - sgemm_direct_sme1_2VLx2VL: + ASMNAME: + //sgemm_direct_sme1_2VLx2VL: stp x19, x20, [sp, #-48]! stp x21, x22, [sp, #16] @@ -211,12 +212,12 @@ process_K_less_than_equal_2: addvl Cptr, Cptr, #2 addvl Bptr, Bptr, #1 whilelt p0.b, Bptr, N_exit //1st Tile predicate (N dimension) - b.first .N_Loop + b.mi .N_Loop add A_base, A_base, C5, lsl #3 //A_base += 2*K*SVLs FP32 elements add C_base, C_base, C6, lsl #3 //C_base += 2*N*SVLs FP32 elements incw M_cntr whilelt p2.s, M_cntr, M //1st Tile predicate (M dimension) - b.first .M_Loop + b.mi .M_Loop smstop From 89898fc499aaf51d6aa9df98a6ca7eece953eea9 Mon Sep 17 00:00:00 2001 From: Martin Kroeker Date: Mon, 18 Aug 2025 01:31:40 -0700 Subject: [PATCH 03/35] Add sgemm_direct_performant for switching between direct and regular kernels --- kernel/arm64/sgemm_direct_performant.c | 31 ++++++++++++++++++++++++++ 1 file changed, 31 insertions(+) create mode 100644 kernel/arm64/sgemm_direct_performant.c diff --git a/kernel/arm64/sgemm_direct_performant.c b/kernel/arm64/sgemm_direct_performant.c new file mode 100644 index 0000000000..c5c42bccca --- /dev/null +++ b/kernel/arm64/sgemm_direct_performant.c @@ -0,0 +1,31 @@ +#include "common.h" +/* helper for the direct sgemm code written by Arjan van der Ven */ + + + + +int CNAME(BLASLONG M, BLASLONG N, BLASLONG K) +{ +if (M<3 || M%2==1) return 0; + unsigned long long mnk = M * N * K; + /* large matrixes -> not performant */ + if (mnk >= 28 * 512 * 512) + return 0; + + /* + * if the B matrix is not a nice multiple if 4 we get many unaligned accesses, + * and the regular sgemm copy/realignment of data pays off much quicker + */ + if ((N & 3) != 0 && (mnk >= 8 * 512 * 512)) + return 0; + +#ifdef SMP + /* if we can run multithreaded, the threading changes the based threshold */ + if (mnk > 2 * 350 * 512 && num_cpu_avail(3)> 1) + return 0; +#endif + + return 1; +} + + From 08a00326a4a8dca7642647b2dcbae8b401d4da42 Mon Sep 17 00:00:00 2001 From: Martin Kroeker Date: Mon, 18 Aug 2025 01:35:41 -0700 Subject: [PATCH 04/35] Build symbol name from build system variables --- .../sgemm_direct_alpha_beta_arm64_sme1.c | 24 +++++++++---- kernel/arm64/sgemm_direct_arm64_sme1.c | 34 +++++++++++++------ 2 files changed, 41 insertions(+), 17 deletions(-) diff --git a/kernel/arm64/sgemm_direct_alpha_beta_arm64_sme1.c b/kernel/arm64/sgemm_direct_alpha_beta_arm64_sme1.c index d9de3ace3f..7a46e40f3a 100644 --- a/kernel/arm64/sgemm_direct_alpha_beta_arm64_sme1.c +++ b/kernel/arm64/sgemm_direct_alpha_beta_arm64_sme1.c @@ -14,9 +14,17 @@ #include #endif +#if defined(DYNAMIC_ARCH) +#define COMBINE(a,b) a ## b +#define COMBINE2(a,b) COMBINE(a,b) +#define SME1_PREPROCESS_BASE sgemm_direct_sme1_preprocess +#define SME1_PREPROCESS COMBINE2(SME1_PREPROCESS_BASE,TS) +#else +#define SME1_PREPROCESS sgemm_direct_sme1_preprocess +#endif /* Function prototypes */ -extern void sgemm_direct_sme1_preprocess(uint64_t nbr, uint64_t nbc,\ - const float * restrict a, float * a_mod) __asm__("sgemm_direct_sme1_preprocess"); +extern void SME1_PREPROCESS(uint64_t nbr, uint64_t nbc,\ + const float * restrict a, float * a_mod); /* Function Definitions */ static uint64_t sve_cntw() { @@ -99,10 +107,11 @@ kernel_2x2(const float *A, const float *B, float *C, size_t shared_dim, svst1_hor_za32(/*tile*/2, /*slice*/i, pg_c_0, &C[i * ldc]); svst1_hor_za32(/*tile*/3, /*slice*/i, pg_c_1, &C[i * ldc + svl]); } +return; } __arm_new("za") __arm_locally_streaming -void sgemm_direct_alpha_beta_sme1_2VLx2VL(uint64_t m, uint64_t k, uint64_t n, const float* alpha,\ +static void sgemm_direct_alpha_beta_sme1_2VLx2VL(uint64_t m, uint64_t k, uint64_t n, const float* alpha,\ const float *ba, const float *restrict bb, const float* beta,\ float *restrict C) { @@ -125,6 +134,7 @@ void sgemm_direct_alpha_beta_sme1_2VLx2VL(uint64_t m, uint64_t k, uint64_t n, co // Block over row dimension of C for (; row_idx < num_rows; row_idx += row_batch) { row_batch = MIN(row_batch, num_rows - row_idx); + uint64_t col_idx = 0; uint64_t col_batch = 2*svl; @@ -143,7 +153,7 @@ void sgemm_direct_alpha_beta_sme1_2VLx2VL(uint64_t m, uint64_t k, uint64_t n, co #else void sgemm_direct_alpha_beta_sme1_2VLx2VL(uint64_t m, uint64_t k, uint64_t n, const float* alpha,\ const float *ba, const float *restrict bb, const float* beta,\ - float *restrict C){} + float *restrict C){fprintf(stderr,"empty sgemm_alpha_beta2x2 should never get called!!!\n");} #endif /*void sgemm_kernel_direct (BLASLONG M, BLASLONG N, BLASLONG K,\ @@ -175,7 +185,8 @@ void CNAME (BLASLONG M, BLASLONG N, BLASLONG K, float alpha, float * __restrict /* Pre-process the left matrix to make it suitable for matrix sum of outer-product calculation */ - sgemm_direct_sme1_preprocess(M, K, A, A_mod); + + SME1_PREPROCESS(M, K, A, A_mod); asm volatile("" : : :"p0", "p1", "p2", "p3", "p4", "p5", "p6", "p7", "p8", "p9", "p10", "p11", "p12", "p13", "p14", "p15", @@ -185,6 +196,7 @@ void CNAME (BLASLONG M, BLASLONG N, BLASLONG K, float alpha, float * __restrict "z24", "z25", "z26", "z27", "z28", "z29", "z30", "z31"); /* Calculate C = alpha*A*B + beta*C */ + sgemm_direct_alpha_beta_sme1_2VLx2VL(M, K, N, &alpha, A_mod, B, &beta, R); free(A_mod); @@ -194,6 +206,6 @@ void CNAME (BLASLONG M, BLASLONG N, BLASLONG K, float alpha, float * __restrict void CNAME (BLASLONG M, BLASLONG N, BLASLONG K, float alpha, float * __restrict A,\ BLASLONG strideA, float * __restrict B, BLASLONG strideB ,\ - float beta, float * __restrict R, BLASLONG strideR){} + float beta, float * __restrict R, BLASLONG strideR){fprintf(stderr,"empty sgemm_direct_alpha_beta should not be called!!!\n");} #endif diff --git a/kernel/arm64/sgemm_direct_arm64_sme1.c b/kernel/arm64/sgemm_direct_arm64_sme1.c index 13c337a13e..a214a0ff66 100644 --- a/kernel/arm64/sgemm_direct_arm64_sme1.c +++ b/kernel/arm64/sgemm_direct_arm64_sme1.c @@ -8,17 +8,28 @@ #include #include #if defined(HAVE_SME) - +#if defined(DYNAMIC_ARCH) +#define COMBINE(a,b) a ## b +#define COMBINE2(a,b) COMBINE(a,b) +#define SME1_PREPROCESS_BASE sgemm_direct_sme1_preprocess +#define SME1_PREPROCESS COMBINE2(SME1_PREPROCESS_BASE,TS) +#define SME1_DIRECT2X2_BASE sgemm_direct_sme1_2VLx2VL +#define SME1_DIRECT2X2 COMBINE2(SME1_DIRECT2X2_BASE,TS) +#else +#define SME1_PREPROCESS sgemm_direct_sme1_preprocess +#define SME1_DIRECT2X2 sgemm_direct_sme1_2VLx2VL +#endif /* Function prototypes */ -extern void sgemm_direct_sme1_preprocess(uint64_t nbr, uint64_t nbc,\ - const float * restrict a, float * a_mod) __asm__("sgemm_direct_sme1_preprocess"); -extern void sgemm_direct_sme1_2VLx2VL(uint64_t m, uint64_t k, uint64_t n,\ +extern void SME1_PREPROCESS(uint64_t nbr, uint64_t nbc,\ + const float * restrict a, float * a_mod) ; + +extern void SME1_DIRECT2X2(uint64_t m, uint64_t k, uint64_t n,\ const float * matLeft,\ const float * restrict matRight,\ - const float * restrict matResult) __asm__("sgemm_direct_sme1_2VLx2VL"); + const float * restrict matResult) ; /* Function Definitions */ -uint64_t sve_cntw() { +static uint64_t sve_cntw() { uint64_t cnt; asm volatile( "rdsvl %[res], #1\n" @@ -39,7 +50,6 @@ void CNAME (BLASLONG M, BLASLONG N, BLASLONG K, float * __restrict A,\ uint64_t m_mod, vl_elms; vl_elms = sve_cntw(); - m_mod = ceil((double)M/(double)vl_elms) * vl_elms; float *A_mod = (float *) malloc(m_mod*K*sizeof(float)); @@ -57,10 +67,11 @@ void CNAME (BLASLONG M, BLASLONG N, BLASLONG K, float * __restrict A,\ /* Pre-process the left matrix to make it suitable for matrix sum of outer-product calculation */ - sgemm_direct_sme1_preprocess(M, K, A, A_mod); + SME1_PREPROCESS(M, K, A, A_mod); /* Calculate C = A*B */ - sgemm_direct_sme1_2VLx2VL(M, K, N, A_mod, B, R); +fprintf(stderr,"sme direct calling 2x2\n"); + SME1_DIRECT2X2(M, K, N, A_mod, B, R); asm volatile("" : : :"p0", "p1", "p2", "p3", "p4", "p5", "p6", "p7", "p8", "p9", "p10", "p11", "p12", "p13", "p14", "p15", @@ -75,6 +86,7 @@ void CNAME (BLASLONG M, BLASLONG N, BLASLONG K, float * __restrict A,\ void CNAME (BLASLONG M, BLASLONG N, BLASLONG K, float * __restrict A,\ BLASLONG strideA, float * __restrict B, BLASLONG strideB ,\ - float * __restrict R, BLASLONG strideR){} - + float * __restrict R, BLASLONG strideR){ +fprintf(stderr,"EMPTY sgemm_kernel_direct should never be called \n"); +} #endif From 53d3bb50cc643c741442c74213ce6568396af33b Mon Sep 17 00:00:00 2001 From: Martin Kroeker Date: Mon, 18 Aug 2025 01:37:50 -0700 Subject: [PATCH 05/35] Get symbol name from build system; change b.first to b.mi for AppleClang compatibility --- kernel/arm64/sgemm_direct_sme1_preprocess.S | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/kernel/arm64/sgemm_direct_sme1_preprocess.S b/kernel/arm64/sgemm_direct_sme1_preprocess.S index fa13620751..6c51b0bf63 100644 --- a/kernel/arm64/sgemm_direct_sme1_preprocess.S +++ b/kernel/arm64/sgemm_direct_sme1_preprocess.S @@ -37,9 +37,9 @@ #define C6 x15 //Constant6: 3*ncol .text - .global sgemm_direct_sme1_preprocess + .global ASMNAME //sgemm_direct_sme1_preprocess - sgemm_direct_sme1_preprocess: + ASMNAME: //sgemm_direct_sme1_preprocess: stp x19, x20, [sp, #-48]! stp x21, x22, [sp, #16] @@ -114,14 +114,14 @@ addvl mat_ptr0, mat_ptr0, #1 //mat_ptr0 += SVLb whilelt p8.b, mat_ptr0, inner_loop_exit - b.first .Loop_process + b.mi .Loop_process add mat_mod, mat_mod, C3, lsl #2 //mat_mod+=SVLs*nbc FP32 elements add mat, mat, C3, lsl #2 //mat+=SVLs*nbc FP32 elements incw outer_loop_cntr whilelt p0.s, outer_loop_cntr, nrow - b.first .M_Loop + b.mi .M_Loop smstop From 731f4dd686d9805ea4052e360dd811541a6e15c5 Mon Sep 17 00:00:00 2001 From: Martin Kroeker Date: Mon, 18 Aug 2025 01:39:35 -0700 Subject: [PATCH 06/35] Add VORTEXM4 settings --- kernel/Makefile | 3 +++ 1 file changed, 3 insertions(+) diff --git a/kernel/Makefile b/kernel/Makefile index 84cd482a06..7b96244ece 100644 --- a/kernel/Makefile +++ b/kernel/Makefile @@ -29,6 +29,9 @@ ifdef TARGET_CORE ifeq ($(TARGET_CORE), ARMV9SME) override CFLAGS += -DBUILD_KERNEL -DTABLE_NAME=gotoblas_$(TARGET_CORE) -DHAVE_SME -march=armv9-a+sve2+sme endif +ifeq ($(TARGET_CORE), VORTEXM4) + override CFLAGS += -DBUILD_KERNEL -DTABLE_NAME=gotoblas_$(TARGET_CORE) -DHAVE_SME -march=armv8.4-a+sme +endif ifeq ($(TARGET_CORE), SAPPHIRERAPIDS) override CFLAGS += -DBUILD_KERNEL -DTABLE_NAME=gotoblas_$(TARGET_CORE) ifeq (1, $(filter 1,$(GCCVERSIONGTEQ11) $(CLANGVERSIONGTEQ12))) From e82bcd27403b788bdacaaec56da839e5ccec5806 Mon Sep 17 00:00:00 2001 From: Martin Kroeker Date: Mon, 18 Aug 2025 01:41:13 -0700 Subject: [PATCH 07/35] Update ARM64 sgemm_direct object generation --- kernel/Makefile.L3 | 13 ++++++++++--- 1 file changed, 10 insertions(+), 3 deletions(-) diff --git a/kernel/Makefile.L3 b/kernel/Makefile.L3 index 79c88d76c7..b117bf1100 100644 --- a/kernel/Makefile.L3 +++ b/kernel/Makefile.L3 @@ -131,8 +131,12 @@ ifeq ($(ARCH), arm64) ifeq ($(TARGET_CORE), ARMV9SME) HAVE_SME = 1 endif +ifeq ($(TARGET_CORE), VORTEXM4) +HAVE_SME = 1 +endif SGEMMDIRECTKERNEL = sgemm_direct_arm64_sme1.c SGEMMDIRECTKERNEL_ALPHA_BETA = sgemm_direct_alpha_beta_arm64_sme1.c +SGEMMDIRECTPERFORMANT = sgemm_direct_performant.c endif endif endif @@ -209,11 +213,12 @@ SKERNELOBJS += \ endif ifeq ($(ARCH), arm64) SKERNELOBJS += \ + sgemm_direct_performant$(TSUFFIX).$(SUFFIX) \ sgemm_direct$(TSUFFIX).$(SUFFIX) \ sgemm_direct_alpha_beta$(TSUFFIX).$(SUFFIX) ifdef HAVE_SME SKERNELOBJS += \ - sgemm_direct_sme1$(TSUFFIX).$(SUFFIX) \ + sgemm_direct_sme1_2VLx2VL$(TSUFFIX).$(SUFFIX) \ sgemm_direct_sme1_preprocess$(TSUFFIX).$(SUFFIX) endif endif @@ -969,13 +974,15 @@ $(KDIR)sgemm_direct$(TSUFFIX).$(SUFFIX) : $(KERNELDIR)/$(SGEMMDIRECTKERNEL) $(CC) $(CFLAGS) -c -UDOUBLE -UCOMPLEX $< -o $@ endif ifeq ($(ARCH), arm64) +$(KDIR)sgemm_direct_performant$(TSUFFIX).$(SUFFIX) : $(KERNELDIR)/$(SGEMMDIRECTPERFORMANT) + $(CC) $(CFLAGS) -c -UDOUBLE -UCOMPLEX $< -o $@ $(KDIR)sgemm_direct$(TSUFFIX).$(SUFFIX) : $(KERNELDIR)/$(SGEMMDIRECTKERNEL) $(CC) $(CFLAGS) -c -UDOUBLE -UCOMPLEX $< -o $@ $(KDIR)sgemm_direct_alpha_beta$(TSUFFIX).$(SUFFIX) : $(KERNELDIR)/$(SGEMMDIRECTKERNEL_ALPHA_BETA) $(CC) $(CFLAGS) -c -UDOUBLE -UCOMPLEX $< -o $@ ifdef HAVE_SME -$(KDIR)sgemm_direct_sme1$(TSUFFIX).$(SUFFIX) : - $(CC) $(CFLAGS) -c $(KERNELDIR)/sgemm_direct_sme1.S -UDOUBLE -UCOMPLEX -o $@ +$(KDIR)sgemm_direct_sme1_2VLx2VL$(TSUFFIX).$(SUFFIX) : + $(CC) $(CFLAGS) -c $(KERNELDIR)/sgemm_direct_sme1_2VLx2VL.S -UDOUBLE -UCOMPLEX -o $@ $(KDIR)sgemm_direct_sme1_preprocess$(TSUFFIX).$(SUFFIX) : $(CC) $(CFLAGS) -c $(KERNELDIR)/sgemm_direct_sme1_preprocess.S -UDOUBLE -UCOMPLEX -o $@ endif From 0203657f409e90247f8469c149331fdf1575d30c Mon Sep 17 00:00:00 2001 From: Martin Kroeker Date: Mon, 18 Aug 2025 01:42:32 -0700 Subject: [PATCH 08/35] Add sgemm_direct_performant for ARM64 --- kernel/setparam-ref.c | 1 + 1 file changed, 1 insertion(+) diff --git a/kernel/setparam-ref.c b/kernel/setparam-ref.c index ccfbab8c11..36ebdd22ac 100644 --- a/kernel/setparam-ref.c +++ b/kernel/setparam-ref.c @@ -216,6 +216,7 @@ gotoblas_t TABLE_NAME = { #ifdef ARCH_ARM64 sgemm_directTS, sgemm_direct_alpha_betaTS, + sgemm_direct_performantTS, #endif sgemm_kernelTS, sgemm_betaTS, From de91afd2ae52ee0bf5a00680a7158e6c10cad6ae Mon Sep 17 00:00:00 2001 From: Martin Kroeker Date: Mon, 18 Aug 2025 01:44:21 -0700 Subject: [PATCH 09/35] Move SGEMM_DIRECT after the CBLAS parameter check and add sgemm_direct_performant for ARM64 --- interface/gemm.c | 56 +++++++++++++++++++++++++++--------------------- 1 file changed, 32 insertions(+), 24 deletions(-) diff --git a/interface/gemm.c b/interface/gemm.c index c5182c266a..62bc442467 100644 --- a/interface/gemm.c +++ b/interface/gemm.c @@ -424,30 +424,6 @@ void CNAME(enum CBLAS_ORDER order, enum CBLAS_TRANSPOSE TransA, enum CBLAS_TRANS PRINT_DEBUG_CNAME; -#if !defined(COMPLEX) && !defined(DOUBLE) && !defined(BFLOAT16) && !defined(HFLOAT16) -#if defined(ARCH_x86) && (defined(USE_SGEMM_KERNEL_DIRECT)||defined(DYNAMIC_ARCH)) -#if defined(DYNAMIC_ARCH) - if (support_avx512() ) -#endif - if (beta == 0 && alpha == 1.0 && order == CblasRowMajor && TransA == CblasNoTrans && TransB == CblasNoTrans && SGEMM_DIRECT_PERFORMANT(m,n,k)) { - SGEMM_DIRECT(m, n, k, a, lda, b, ldb, c, ldc); - return; - } -#endif -#if defined(ARCH_ARM64) && (defined(USE_SGEMM_KERNEL_DIRECT)||defined(DYNAMIC_ARCH)) -#if defined(DYNAMIC_ARCH) - if (support_sme1()) -#endif - if (beta == 0 && alpha == 1.0 && order == CblasRowMajor && TransA == CblasNoTrans && TransB == CblasNoTrans) { - SGEMM_DIRECT(m, n, k, a, lda, b, ldb, c, ldc); - return; - }else if (order == CblasRowMajor && TransA == CblasNoTrans && TransB == CblasNoTrans) { - SGEMM_DIRECT_ALPHA_BETA(m, n, k, alpha, a, lda, b, ldb, beta, c, ldc); - return; - } -#endif -#endif - #ifndef COMPLEX args.alpha = (void *)α args.beta = (void *)β @@ -564,6 +540,35 @@ void CNAME(enum CBLAS_ORDER order, enum CBLAS_TRANSPOSE TransA, enum CBLAS_TRANS return; } + + if ((args.m == 0) || (args.n == 0)) return; +#if !defined(COMPLEX) && !defined(DOUBLE) && !defined(BFLOAT16) && !defined(HFLOAT16) +#if defined(ARCH_x86) && (defined(USE_SGEMM_KERNEL_DIRECT)||defined(DYNAMIC_ARCH)) +#if defined(DYNAMIC_ARCH) + if (support_avx512() ) +#endif + if (order == CblasRowMajor && beta == 0 && alpha == 1.0 && TransA == CblasNoTrans && TransB == CblasNoTrans && SGEMM_DIRECT_PERFORMANT(m,n,k)) { + SGEMM_DIRECT(m, n, k, a, lda, b, ldb, c, ldc); + return; + } +#endif +#if defined(ARCH_ARM64) && (defined(USE_SGEMM_KERNEL_DIRECT)||defined(DYNAMIC_ARCH)) +#if defined(DYNAMIC_ARCH) +if (strcmp(gotoblas_corename(), "armv9sme") == 0 || strcmp(gotoblas_corename(), "vortexm4") == 0) +// if (support_sme1()) +#endif + if (order == CblasRowMajor && beta == 0 && alpha == 1.0 && TransA == CblasNoTrans && TransB == CblasNoTrans&& SGEMM_DIRECT_PERFORMANT(m,n,k)) { + SGEMM_DIRECT(m, n, k, a, lda, b, ldb, c, ldc); + return; + } +else + if (order == CblasRowMajor && beta != 0. && (!(alpha==1.&&beta==1.)) && TransA == CblasNoTrans && TransB == CblasNoTrans&& SGEMM_DIRECT_PERFORMANT(m,n,k)) { + SGEMM_DIRECT_ALPHA_BETA(m, n, k, alpha, a, lda, b, ldb, beta, c, ldc); + return; + } +#endif +#endif + #endif #if defined(__linux__) && defined(__x86_64__) && defined(BFLOAT16) @@ -582,6 +587,9 @@ void CNAME(enum CBLAS_ORDER order, enum CBLAS_TRANSPOSE TransA, enum CBLAS_TRANS if ((args.m == 0) || (args.n == 0)) return; + + + #if 0 fprintf(stderr, "m = %4d n = %d k = %d lda = %4d ldb = %4d ldc = %4d\n", args.m, args.n, args.k, args.lda, args.ldb, args.ldc); From 202a7a0e2a84c0f435312cc0c885b7ac41418e31 Mon Sep 17 00:00:00 2001 From: Martin Kroeker Date: Mon, 18 Aug 2025 01:45:40 -0700 Subject: [PATCH 10/35] Separate VORTEXM4 from VORTEX and ARMV9SME --- param.h | 14 +++++++------- 1 file changed, 7 insertions(+), 7 deletions(-) diff --git a/param.h b/param.h index d0ee246e83..5a807d7f6d 100644 --- a/param.h +++ b/param.h @@ -3353,7 +3353,7 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. #if defined(CORTEXA57) || defined(CORTEXX1) || \ defined(CORTEXA72) || defined(CORTEXA73) || \ - defined(FALKOR) || defined(TSV110) || defined(EMAG8180) || defined(VORTEX) || defined(FT2000) + defined(FALKOR) || defined(TSV110) || defined(EMAG8180) || defined(VORTEX) || defined(FT2000) || defined(VORTEXM4) #define SGEMM_DEFAULT_UNROLL_M 16 #define SGEMM_DEFAULT_UNROLL_N 4 @@ -3370,7 +3370,7 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. /*FIXME: this should be using the cache size, but there is currently no easy way to query that on ARM. So if getarch counted more than 8 cores we simply assume the host is a big desktop or server with abundant cache rather than a phone or embedded device */ -#if NUM_CORES > 8 || defined(TSV110) || defined(EMAG8180) || defined(VORTEX)|| defined(CORTEXX1) +#if NUM_CORES > 8 || defined(TSV110) || defined(EMAG8180) || defined(VORTEX)|| defined(CORTEXX1) || defined(VORTEXM4) #define SGEMM_DEFAULT_P 512 #define DGEMM_DEFAULT_P 256 #define CGEMM_DEFAULT_P 256 @@ -3598,15 +3598,15 @@ is a big desktop or server with abundant cache rather than a phone or embedded d #undef BGEMM_ALIGN_K #undef BGEMM_DEFAULT_UNROLL_M #undef BGEMM_DEFAULT_UNROLL_N -#define BGEMM_ALIGN_K 4 -#define BGEMM_DEFAULT_UNROLL_M 8 +#define BGEMM_ALIGN_K 8 #define BGEMM_DEFAULT_UNROLL_N 4 +#define BGEMM_DEFAULT_UNROLL_M 4 #undef SBGEMM_ALIGN_K #undef SBGEMM_DEFAULT_UNROLL_M #undef SBGEMM_DEFAULT_UNROLL_N -#define SBGEMM_ALIGN_K 4 -#define SBGEMM_DEFAULT_UNROLL_M 8 +#define SBGEMM_ALIGN_K 8 +#define SBGEMM_DEFAULT_UNROLL_M 4 #define SBGEMM_DEFAULT_UNROLL_N 4 #define SGEMM_DEFAULT_UNROLL_M 16 @@ -3842,7 +3842,7 @@ Until then, just keep it different than DGEMM_DEFAULT_UNROLL_N to keep copy rout #endif /* ARMv8 */ -#if defined(ARMV9SME) /* ARMv9 SME */ +#if defined(ARMV9SME) || defined(VORTEXM4) /* ARMv9 SME */ #define USE_SGEMM_KERNEL_DIRECT 1 #endif /* ARMv9 SME */ From e76c39099a5fbdb94c9e6ceeaf0e324896ea4d43 Mon Sep 17 00:00:00 2001 From: Martin Kroeker Date: Mon, 18 Aug 2025 01:47:17 -0700 Subject: [PATCH 11/35] Add sgemm_direct_performant for ARM64 --- common_param.h | 1 + 1 file changed, 1 insertion(+) diff --git a/common_param.h b/common_param.h index 0145f667a1..d8298a0057 100644 --- a/common_param.h +++ b/common_param.h @@ -257,6 +257,7 @@ int (*shgemm_otcopy )(BLASLONG, BLASLONG, hfloat16 *, BLASLONG, hfloat16 *); #ifdef ARCH_ARM64 void (*sgemm_direct) (BLASLONG, BLASLONG, BLASLONG, float *, BLASLONG , float *, BLASLONG , float * , BLASLONG); void (*sgemm_direct_alpha_beta) (BLASLONG, BLASLONG, BLASLONG, float, float *, BLASLONG, float *, BLASLONG, float, float * , BLASLONG); + int (*sgemm_direct_performant) (BLASLONG M, BLASLONG N, BLASLONG K); #endif From ef0b883dffb8840799b722dd310fb1781859ef7b Mon Sep 17 00:00:00 2001 From: Martin Kroeker Date: Mon, 18 Aug 2025 01:48:08 -0700 Subject: [PATCH 12/35] Add sgemm_direct_performant for ARM64 --- common_s.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/common_s.h b/common_s.h index 88b4732f51..513906ad50 100644 --- a/common_s.h +++ b/common_s.h @@ -217,7 +217,7 @@ #define SGEMM_DIRECT_PERFORMANT gotoblas -> sgemm_direct_performant #define SGEMM_DIRECT gotoblas -> sgemm_direct #elif ARCH_ARM64 -#define SGEMM_DIRECT_PERFORMANT sgemm_direct_performant +#define SGEMM_DIRECT_PERFORMANT gotoblas -> sgemm_direct_performant #define SGEMM_DIRECT gotoblas -> sgemm_direct #define SGEMM_DIRECT_ALPHA_BETA gotoblas -> sgemm_direct_alpha_beta #endif From ccfd0170fb48c3077621540f9eb2949ab6b38bf0 Mon Sep 17 00:00:00 2001 From: Martin Kroeker Date: Mon, 18 Aug 2025 01:50:13 -0700 Subject: [PATCH 13/35] Enable SME on MacOS and add VORTEXM4 to DYNAMIC_ARCH list --- Makefile.system | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/Makefile.system b/Makefile.system index 3f1c48d23d..cd9213365e 100644 --- a/Makefile.system +++ b/Makefile.system @@ -427,7 +427,7 @@ ifndef MACOSX_DEPLOYMENT_TARGET ifeq ($(ARCH), arm64) export MACOSX_DEPLOYMENT_TARGET=11.0 export NO_SVE = 1 -export NO_SME = 1 +# export NO_SME = 1 else export MACOSX_DEPLOYMENT_TARGET=10.8 endif @@ -723,6 +723,7 @@ DYNAMIC_CORE += A64FX endif ifneq ($(NO_SME), 1) DYNAMIC_CORE += ARMV9SME +DYNAMIC_CORE += VORTEXM4 endif DYNAMIC_CORE += THUNDERX DYNAMIC_CORE += THUNDERX2T99 From b0a00fbd62e1d3d6c0be4a42201ddee3002df1ae Mon Sep 17 00:00:00 2001 From: Martin Kroeker Date: Mon, 18 Aug 2025 01:51:10 -0700 Subject: [PATCH 14/35] Add minimal compiler flags for VORTEXM4 --- Makefile.arm64 | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/Makefile.arm64 b/Makefile.arm64 index b98933b77a..3d713b853a 100644 --- a/Makefile.arm64 +++ b/Makefile.arm64 @@ -303,6 +303,11 @@ FCOMMON_OPT += -march=armv8.3-a endif endif +ifeq ($(CORE), VORTEXM4) +CCOMMON_OPT += -march=armv8.4-a+sme +FCOMMON_OPT += -march=armv8.4-a+sme +endif + ifeq (1, $(filter 1,$(GCCVERSIONGTEQ9) $(ISCLANG))) ifeq ($(CORE), TSV110) CCOMMON_OPT += -march=armv8.2-a -mtune=tsv110 From 30970460b88593030dc90c2dea864d344ef25cf2 Mon Sep 17 00:00:00 2001 From: Martin Kroeker Date: Mon, 18 Aug 2025 01:52:05 -0700 Subject: [PATCH 15/35] Add VORTEXM4 target --- TargetList.txt | 1 + 1 file changed, 1 insertion(+) diff --git a/TargetList.txt b/TargetList.txt index b890c1440d..4903261e2c 100644 --- a/TargetList.txt +++ b/TargetList.txt @@ -111,6 +111,7 @@ THUNDERX2T99 TSV110 THUNDERX3T110 VORTEX +VORTEXM4 A64FX ARMV8SVE ARMV9SME From 4e2a8c18e5096f0775b0530dfb4cdbda9bf0c593 Mon Sep 17 00:00:00 2001 From: Martin Kroeker Date: Mon, 18 Aug 2025 01:53:04 -0700 Subject: [PATCH 16/35] Split VORTEXM4 from VORTEX target due to SME support --- cpuid_arm64.c | 29 ++++++++++++++++++++++++++--- 1 file changed, 26 insertions(+), 3 deletions(-) diff --git a/cpuid_arm64.c b/cpuid_arm64.c index fd6a9bd478..0f1a125745 100644 --- a/cpuid_arm64.c +++ b/cpuid_arm64.c @@ -82,6 +82,7 @@ size_t length64=sizeof(value64); #define CPU_AMPERE1 25 // Apple #define CPU_VORTEX 13 +#define CPU_VORTEXM4 26 // Fujitsu #define CPU_A64FX 15 // Phytium @@ -113,7 +114,8 @@ static char *cpuname[] = { "FT2000", "CORTEXA76", "NEOVERSEV2", - "AMPERE1" + "AMPERE1", + "VORTEXM4", }; static char *cpuname_lower[] = { @@ -143,7 +145,7 @@ static char *cpuname_lower[] = { "cortexa76", "neoversev2", "ampere1", - "ampere1a" + "vortexm4" }; static int cpulowperf=0; @@ -400,7 +402,7 @@ int detect(void) if (value64 ==131287967|| value64 == 458787763 ) return CPU_VORTEX; //A12/M1 if (value64 == 3660830781) return CPU_VORTEX; //A15/M2 if (value64 == 2271604202) return CPU_VORTEX; //A16/M3 - if (value64 == 1867590060) return CPU_VORTEX; //M4 + if (value64 == 1867590060) return CPU_VORTEXM4; //M4 #else #ifdef OS_WINDOWS HKEY reghandle; @@ -740,6 +742,27 @@ void get_cpuconfig(void) length64 = sizeof(value64); sysctlbyname("hw.l2cachesize",&value64,&length64,NULL,0); printf("#define L2_SIZE %lld \n",value64); +#endif + printf("#define DTB_DEFAULT_ENTRIES 64 \n"); + printf("#define DTB_SIZE 4096 \n"); + break; + case CPU_VORTEXM4: + printf("#define VORTEXM4 \n"); + printf("#define HAVE_SME 1 \n"); +#ifdef __APPLE__ + length64 = sizeof(value64); + sysctlbyname("hw.l1icachesize",&value64,&length64,NULL,0); + printf("#define L1_CODE_SIZE %lld \n",value64); + length64 = sizeof(value64); + sysctlbyname("hw.cachelinesize",&value64,&length64,NULL,0); + printf("#define L1_CODE_LINESIZE %lld \n",value64); + printf("#define L1_DATA_LINESIZE %lld \n",value64); + length64 = sizeof(value64); + sysctlbyname("hw.l1dcachesize",&value64,&length64,NULL,0); + printf("#define L1_DATA_SIZE %lld \n",value64); + length64 = sizeof(value64); + sysctlbyname("hw.l2cachesize",&value64,&length64,NULL,0); + printf("#define L2_SIZE %lld \n",value64); #endif printf("#define DTB_DEFAULT_ENTRIES 64 \n"); printf("#define DTB_SIZE 4096 \n"); From 18f9582f3e955c0e98ac98d221de88cb18de2480 Mon Sep 17 00:00:00 2001 From: Martin Kroeker Date: Mon, 18 Aug 2025 01:54:09 -0700 Subject: [PATCH 17/35] Add VORTEXM4 --- driver/others/dynamic_arm64.c | 40 +++++++++++++++++++++++------------ 1 file changed, 27 insertions(+), 13 deletions(-) diff --git a/driver/others/dynamic_arm64.c b/driver/others/dynamic_arm64.c index 70b51f6fce..0202a7368b 100644 --- a/driver/others/dynamic_arm64.c +++ b/driver/others/dynamic_arm64.c @@ -128,6 +128,12 @@ extern gotoblas_t gotoblas_ARMV9SME; #else #define gotoblas_ARMV9SME gotoblas_ARMV8 #endif +#ifdef DYN_VORTEXM4 +extern gotoblas_t gotoblas_VORTEXM4; +#else +#error "dont have vortexm4" +#define gotoblas_VORTEXM4 gotoblas_ARMV8 +#endif #ifdef DYN_CORTEXA55 extern gotoblas_t gotoblas_CORTEXA55; #else @@ -155,17 +161,22 @@ extern gotoblas_t gotoblas_NEOVERSEV1; extern gotoblas_t gotoblas_NEOVERSEN2; extern gotoblas_t gotoblas_ARMV8SVE; extern gotoblas_t gotoblas_A64FX; -#ifndef NO_SME -extern gotoblas_t gotoblas_ARMV9SME; -#else -#define gotoblas_ARMV9SME gotoblas_ARMV8SVE -#endif #else #define gotoblas_NEOVERSEV1 gotoblas_ARMV8 #define gotoblas_NEOVERSEN2 gotoblas_ARMV8 #define gotoblas_ARMV8SVE gotoblas_ARMV8 #define gotoblas_A64FX gotoblas_ARMV8 -#define gotoblas_ARMV9SME gotoblas_ARMV8 +#endif +#ifndef NO_SME +extern gotoblas_t gotoblas_ARMV9SME; +extern gotoblas_t gotoblas_VORTEXM4; +#else +#ifndef NO_SVE +#define gotoblas_ARMV9SME gotoblas_ARMV8SVE +#else +#define gotoblas_ARMV9SME gotoblas_NEOVERSEN1 +#endif +#define gotoblas_VORTEXM4 gotoblas_NEOVERSEN1 #endif extern gotoblas_t gotoblas_THUNDERX3T110; @@ -176,7 +187,7 @@ extern void openblas_warning(int verbose, const char * msg); #define FALLBACK_VERBOSE 1 #define NEOVERSEN1_FALLBACK "OpenBLAS : Your OS does not support SVE instructions. OpenBLAS is using Neoverse N1 kernels as a fallback, which may give poorer performance.\n" -#define NUM_CORETYPES 19 +#define NUM_CORETYPES 20 /* * In case asm/hwcap.h is outdated on the build system, make sure @@ -216,6 +227,7 @@ static char *corename[] = { "armv8sve", "a64fx", "armv9sme", + "vortexm4", "unknown" }; @@ -239,6 +251,7 @@ char *gotoblas_corename(void) { if (gotoblas == &gotoblas_ARMV8SVE) return corename[16]; if (gotoblas == &gotoblas_A64FX) return corename[17]; if (gotoblas == &gotoblas_ARMV9SME) return corename[18]; + if (gotoblas == &gotoblas_VORTEXM4) return corename[19]; return corename[NUM_CORETYPES]; } @@ -277,6 +290,7 @@ static gotoblas_t *force_coretype(char *coretype) { case 16: return (&gotoblas_ARMV8SVE); case 17: return (&gotoblas_A64FX); case 18: return (&gotoblas_ARMV9SME); + case 19: return (&gotoblas_VORTEXM4); } snprintf(message, 128, "Core not found: %s\n", coretype); openblas_warning(1, message); @@ -288,11 +302,11 @@ static gotoblas_t *get_coretype(void) { char coremsg[128]; #if defined (OS_DARWIN) -//future #if !defined(NO_SME) -// if (support_sme1()) { -// return &gotoblas_ARMV9SME; -// } -// #endif +#if !defined(NO_SME) + if (support_sme1()) { + return &gotoblas_VORTEXM4; + } +#endif return &gotoblas_NEOVERSEN1; #endif @@ -463,7 +477,7 @@ static gotoblas_t *get_coretype(void) { } break; case 0x61: // Apple -//future if (support_sme1()) return &gotoblas_ARMV9SME; + if (support_sme1()) return &gotoblas_VORTEXM4; return &gotoblas_NEOVERSEN1; break; default: From ca542f319fe27f54bb1b7245a4a7025cbe2d0060 Mon Sep 17 00:00:00 2001 From: Martin Kroeker Date: Mon, 18 Aug 2025 08:41:38 -0700 Subject: [PATCH 18/35] Add VORTEXM4 --- kernel/arm64/KERNEL.VORTEXM4 | 1 + 1 file changed, 1 insertion(+) create mode 100644 kernel/arm64/KERNEL.VORTEXM4 diff --git a/kernel/arm64/KERNEL.VORTEXM4 b/kernel/arm64/KERNEL.VORTEXM4 new file mode 100644 index 0000000000..46a34469c3 --- /dev/null +++ b/kernel/arm64/KERNEL.VORTEXM4 @@ -0,0 +1 @@ +include $(KERNELDIR)/KERNEL.NEOVERSEN1 From a4f5fec46e0b8b931512b05981c601293f5214e5 Mon Sep 17 00:00:00 2001 From: Martin Kroeker Date: Mon, 18 Aug 2025 14:32:07 -0700 Subject: [PATCH 19/35] Add compiler options for VORTEXM4 --- cmake/cc.cmake | 10 ++++++++++ 1 file changed, 10 insertions(+) diff --git a/cmake/cc.cmake b/cmake/cc.cmake index 952b2dd7ad..10ad9388d6 100644 --- a/cmake/cc.cmake +++ b/cmake/cc.cmake @@ -315,6 +315,16 @@ if (${CORE} STREQUAL ARMV9SME) endif () endif () +if (${CORE} STREQUAL VORTEXM4) + if (NOT DYNAMIC_ARCH) + if (${CMAKE_C_COMPILER_ID} STREQUAL "NVC" AND NOT NO_SVE) + set (CCOMMON_OPT "${CCOMMON_OPT} -tp=host") + else () + set (CCOMMON_OPT "${CCOMMON_OPT} -march=armv8.4-a+sme") + endif () + endif () +endif () + if (${CORE} STREQUAL CORTEXA510) if (NOT DYNAMIC_ARCH) set (CCOMMON_OPT "${CCOMMON_OPT} -march=armv8.4-a+sve") From c794d0a4ced73f8819ae14e54d864a6a4dcdd3e2 Mon Sep 17 00:00:00 2001 From: Martin Kroeker Date: Mon, 18 Aug 2025 14:33:24 -0700 Subject: [PATCH 20/35] Add VORTEXM4 --- cmake/prebuild.cmake | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/cmake/prebuild.cmake b/cmake/prebuild.cmake index eea5eb5ede..8b0fda8178 100644 --- a/cmake/prebuild.cmake +++ b/cmake/prebuild.cmake @@ -1252,7 +1252,7 @@ endif () set(ZGEMM_UNROLL_M 4) set(ZGEMM_UNROLL_N 4) set(SYMV_P 16) - elseif ("${TCORE}" STREQUAL "VORTEX") + elseif ("${TCORE}" STREQUAL "VORTEX" OR "${TCORE}" STREQUAL "VORTEXM4") file(APPEND ${TARGET_CONF_TEMP} "#define ARMV8\n" "#define L1_CODE_SIZE\t32768\n" From 4328c91e27fb75e337d23939e7ccbeaed20dd43a Mon Sep 17 00:00:00 2001 From: Martin Kroeker Date: Mon, 18 Aug 2025 14:34:51 -0700 Subject: [PATCH 21/35] relax requirements in compiler SME capability check --- cmake/system_check.cmake | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/cmake/system_check.cmake b/cmake/system_check.cmake index dd0dfab637..04e07cc135 100644 --- a/cmake/system_check.cmake +++ b/cmake/system_check.cmake @@ -142,7 +142,7 @@ endif() if (ARM64) if (NOT NO_SME) file(WRITE ${PROJECT_BINARY_DIR}/sme.c ".text \n.global sme_test\n\nsme_test:\nsmstart\nsmstop\nret\n") - execute_process(COMMAND ${CMAKE_C_COMPILER} -march=armv9-a+sve2+sme -c -v -o ${PROJECT_BINARY_DIR}/sme.o ${PROJECT_BINARY_DIR}/sme.c OUTPUT_QUIET ERROR_QUIET RESULT_VARIABLE NO_SME) + execute_process(COMMAND ${CMAKE_C_COMPILER} -march=armv8.4-a+sme -c -v -o ${PROJECT_BINARY_DIR}/sme.o ${PROJECT_BINARY_DIR}/sme.c OUTPUT_QUIET ERROR_QUIET RESULT_VARIABLE NO_SME) if (NO_SME EQUAL 1) set (CCOMMON_OPT "${CCOMMON_OPT} -DNO_SME") endif() From 426b5f23ed0e50cfc5110b2e8529c0e4c8bd65cc Mon Sep 17 00:00:00 2001 From: Martin Kroeker Date: Mon, 18 Aug 2025 14:35:36 -0700 Subject: [PATCH 22/35] Add compiler options for VORTEXM4 --- cmake/system.cmake | 3 +++ 1 file changed, 3 insertions(+) diff --git a/cmake/system.cmake b/cmake/system.cmake index bf4c548b92..7ce5b82abb 100644 --- a/cmake/system.cmake +++ b/cmake/system.cmake @@ -361,6 +361,9 @@ if (${TARGET} STREQUAL NEOVERSEV1) if (${TARGET} STREQUAL ARMV9SME) set (KERNEL_DEFINITIONS "${KERNEL_DEFINITIONS} -march=armv9-a+sme -O3") endif() + if (${TARGET} STREQUAL VORTEXM4) + set (KERNEL_DEFINITIONS "${KERNEL_DEFINITIONS} -march=armv8.4-a+sme -O3") + endif() if (${TARGET} STREQUAL A64FX) if (${CMAKE_C_COMPILER_ID} STREQUAL "PGI" AND NOT NO_SVE) set (KERNEL_DEFINITIONS "${KERNEL_DEFINITIONS} -Msve-intrinsics -march=armv8.2-a+sve -mtune=a64fx") From 0bc19a13353aa5e306786b118144086754508073 Mon Sep 17 00:00:00 2001 From: Martin Kroeker Date: Mon, 18 Aug 2025 14:38:16 -0700 Subject: [PATCH 23/35] Update SME kernel details --- kernel/arm64/CMakeLists.txt | 1499 +++++++++++++++++++++++++++++++++++ 1 file changed, 1499 insertions(+) create mode 100644 kernel/arm64/CMakeLists.txt diff --git a/kernel/arm64/CMakeLists.txt b/kernel/arm64/CMakeLists.txt new file mode 100644 index 0000000000..b4ea62f0d5 --- /dev/null +++ b/kernel/arm64/CMakeLists.txt @@ -0,0 +1,1499 @@ +############################################################################### +# Copyright (c) 2025, The OpenBLAS Project +# All rights reserved. +# Redistribution and use in source and binary forms, with or without +# modification, are permitted provided that the following conditions are +# met: +# 1. Redistributions of source code must retain the above copyright +# notice, this list of conditions and the following disclaimer. +# 2. Redistributions in binary form must reproduce the above copyright +# notice, this list of conditions and the following disclaimer in +# the documentation and/or other materials provided with the +# distribution. +# 3. Neither the name of the OpenBLAS project nor the names of +# its contributors may be used to endorse or promote products +# derived from this software without specific prior written permission. +# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" +# AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE +# ARE DISCLAIMED. IN NO EVENT SHALL THE OPENBLAS PROJECT OR CONTRIBUTORS BE +# LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +# CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +# SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +# INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +# CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +# ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +# POSSIBILITY OF SUCH DAMAGE. +############################################################################### + +include_directories(${PROJECT_SOURCE_DIR}) + +# Makefile +function (build_core TARGET_CORE KDIR TSUFFIX KERNEL_DEFINITIONS) + set (OPENBLAS_SRC "") + set (ARCH_SUFFIX "") + include("${PROJECT_SOURCE_DIR}/cmake/kernel.cmake") + if (${DYNAMIC_ARCH}) + include("${PROJECT_SOURCE_DIR}/cmake/system.cmake") + endif () + ParseMakefileVars("${KERNELDIR}/KERNEL") + ParseMakefileVars("${KERNELDIR}/KERNEL.${TARGET_CORE}") + SetDefaultL1() + SetDefaultL2() + SetDefaultL3() + + set(KERNEL_INTERFACE common_level1.h common_level2.h common_level3.h) + if(NOT NO_LAPACK) + set(KERNEL_INTERFACE ${KERNEL_INTERFACE} common_lapack.h) + endif () + + if (${ADD_COMMONOBJS}) + if (X86) + if (NOT "${CMAKE_C_COMPILER_ID}" STREQUAL "MSVC") + GenerateNamedObjects("${KERNELDIR}/cpuid.S" "" "" false "" "" true) + else() + GenerateNamedObjects("${KERNELDIR}/cpuid_win.c" "" "" false "" "" true) + endif() + endif () + + # don't use float type name mangling here + GenerateNamedObjects("${KERNELDIR}/${LSAME_KERNEL}" "F_INTERFACE" "lsame" false "" "" true "") + GenerateNamedObjects("${KERNELDIR}/${SCABS_KERNEL}" "COMPLEX;F_INTERFACE" "scabs1" false "" "" true "") + GenerateNamedObjects("${KERNELDIR}/${DCABS_KERNEL}" "DOUBLE;COMPLEX;F_INTERFACE" "dcabs1" false "" "" true "") + endif () + + # Run with no ARCH_SUFFIX for above + set (ARCH_SUFFIX "${TSUFFIX}") + # Makefile.L1 + foreach (float_type ${FLOAT_TYPES}) + # a bit of metaprogramming here to pull out the appropriate KERNEL var + string(SUBSTRING ${float_type} 0 1 float_char) + if (${float_type} STREQUAL "BFLOAT16") + set (float_char "SB") + endif () + GenerateNamedObjects("${KERNELDIR}/${${float_char}AMAXKERNEL}" "USE_ABS" "amax_k" false "" "" false ${float_type}) + GenerateNamedObjects("${KERNELDIR}/${${float_char}AMINKERNEL}" "USE_ABS;USE_MIN" "amin_k" false "" "" false ${float_type}) + if (DEFINED ${float_char}MAXKERNEL) + GenerateNamedObjects("${KERNELDIR}/${${float_char}MAXKERNEL}" "" "max_k" false "" "" false ${float_type}) + endif () + if (DEFINED ${float_char}MINKERNEL) + GenerateNamedObjects("${KERNELDIR}/${${float_char}MINKERNEL}" "USE_MIN" "min_k" false "" "" false ${float_type}) + endif () + GenerateNamedObjects("${KERNELDIR}/${I${float_char}AMAXKERNEL}" "USE_ABS" "i*amax_k" false "" "" false ${float_type}) + GenerateNamedObjects("${KERNELDIR}/${I${float_char}AMINKERNEL}" "USE_ABS;USE_MIN" "i*amin_k" false "" "" false ${float_type}) + if (DEFINED I${float_char}MAXKERNEL) + GenerateNamedObjects("${KERNELDIR}/${I${float_char}MAXKERNEL}" "" "i*max_k" false "" "" false ${float_type}) + endif () + if (DEFINED I${float_char}MINKERNEL) + GenerateNamedObjects("${KERNELDIR}/${I${float_char}MINKERNEL}" "USE_MIN" "i*min_k" false "" "" false ${float_type}) + endif () + GenerateNamedObjects("${KERNELDIR}/${${float_char}ASUMKERNEL}" "" "asum_k" false "" "" false ${float_type}) + GenerateNamedObjects("${KERNELDIR}/${${float_char}AXPYKERNEL}" "" "axpy_k" false "" "" false ${float_type}) + GenerateNamedObjects("${KERNELDIR}/${${float_char}COPYKERNEL}" "C_INTERFACE" "copy_k" false "" "" false ${float_type}) + GenerateNamedObjects("${KERNELDIR}/${${float_char}NRM2KERNEL}" "" "nrm2_k" false "" "" false ${float_type}) + GenerateNamedObjects("${KERNELDIR}/${${float_char}ROTKERNEL}" "" "rot_k" false "" "" false ${float_type}) + GenerateNamedObjects("${KERNELDIR}/${${float_char}ROTMKERNEL}" "" "rotm_k" false "" "" false ${float_type}) + GenerateNamedObjects("${KERNELDIR}/${${float_char}SCALKERNEL}" "" "scal_k" false "" "" false ${float_type}) + GenerateNamedObjects("${KERNELDIR}/${${float_char}SWAPKERNEL}" "" "swap_k" false "" "" false ${float_type}) + GenerateNamedObjects("${KERNELDIR}/${${float_char}AXPBYKERNEL}" "" "axpby_k" false "" "" false ${float_type}) + GenerateNamedObjects("${KERNELDIR}/${${float_char}SUMKERNEL}" "" "sum_k" false "" "" false ${float_type}) + + if (${float_type} STREQUAL "COMPLEX" OR ${float_type} STREQUAL "ZCOMPLEX") + GenerateNamedObjects("${KERNELDIR}/${${float_char}AXPYKERNEL}" "CONJ" "axpyc_k" false "" "" false ${float_type}) + GenerateNamedObjects("${KERNELDIR}/${${float_char}DOTKERNEL}" "" "dotu_k" false "" "" false ${float_type}) + GenerateNamedObjects("${KERNELDIR}/${${float_char}DOTKERNEL}" "CONJ" "dotc_k" false "" "" false ${float_type}) + else () + GenerateNamedObjects("${KERNELDIR}/${${float_char}DOTKERNEL}" "" "dot_k" false "" "" false ${float_type}) + endif () + + if (${float_type} STREQUAL "COMPLEX") + GenerateNamedObjects("${KERNELDIR}/${${float_char}ROTKERNEL}" "" "srot_k" false "" "" false ${float_type}) + endif() + if (${float_type} STREQUAL "ZCOMPLEX") + GenerateNamedObjects("${KERNELDIR}/${${float_char}ROTKERNEL}" "" "drot_k" false "" "" false ${float_type}) + endif() + + endforeach () + + #dsdot,sdsdot + GenerateNamedObjects("${KERNELDIR}/${DSDOTKERNEL}" "DSDOT" "d*dot_k" false "" "" false "SINGLE") + GenerateNamedObjects("${KERNELDIR}/${DSDOTKERNEL}" "DSDOT" "dsdot_k" false "" "" false "SINGLE") + + # sbdot + if (BUILD_BFLOAT16) + GenerateNamedObjects("${KERNELDIR}/${BSCALKERNEL}" "BGEMM" "scal_k" false "" "" false "BFLOAT16") + GenerateNamedObjects("${KERNELDIR}/${SBDOTKERNEL}" "SBDOT" "dot_k" false "" "" false "BFLOAT16") + GenerateNamedObjects("${KERNELDIR}/${BF16TOKERNEL}" "SINGLE" "f16tos_k" false "" "" false "BFLOAT16") + GenerateNamedObjects("${KERNELDIR}/${BF16TOKERNEL}" "DOUBLE" "bf16tod_k" false "" "" false "DOUBLE") + GenerateNamedObjects("${KERNELDIR}/${TOBF16KERNEL}" "SINGLE" "stobf16_k" false "" "" false "BFLOAT16") + GenerateNamedObjects("${KERNELDIR}/${TOBF16KERNEL}" "DOUBLE" "dtobf16_k" false "" "" false "BFLOAT16") + endif() + + if ((BUILD_COMPLEX OR BUILD_DOUBLE) AND NOT BUILD_SINGLE) + GenerateNamedObjects("${KERNELDIR}/${SAMAXKERNEL}" "USE_ABS" "amax_k" false "" "" false "SINGLE") + GenerateNamedObjects("${KERNELDIR}/${SAMINKERNEL}" "USE_ABS;USE_MIN" "amin_k" false "" "" false "SINGLE") + GenerateNamedObjects("${KERNELDIR}/${SASUMKERNEL}" "" "asum_k" false "" "" false "SINGLE") + if (DEFINED SMAXKERNEL) + GenerateNamedObjects("${KERNELDIR}/${SMAXKERNEL}" "" "max_k" false "" "" false "SINGLE") + endif () + if (DEFINED SMINKERNEL) + GenerateNamedObjects("${KERNELDIR}/${SMINKERNEL}" "USE_MIN" "min_k" false "" "" false "SINGLE") + endif () + if (DEFINED ISMINKERNEL) + GenerateNamedObjects("${KERNELDIR}/${ISMINKERNEL}" "USE_MIN" "i*min_k" false "" "" false "SINGLE") + endif () + if (DEFINED ISMAXKERNEL) + GenerateNamedObjects("${KERNELDIR}/${ISMAXKERNEL}" "" "i*max_k" false "" "" false "SINGLE") + endif () + GenerateNamedObjects("${KERNELDIR}/${ISAMAXKERNEL}" "USE_ABS" "i*amax_k" false "" "" false "SINGLE") + GenerateNamedObjects("${KERNELDIR}/${ISAMINKERNEL}" "USE_ABS;USE_MIN" "i*amin_k" false "" "" false "SINGLE") + GenerateNamedObjects("${KERNELDIR}/${SSCALKERNEL}" "" "scal_k" false "" "" false "SINGLE") + GenerateNamedObjects("${KERNELDIR}/${SCOPYKERNEL}" "C_INTERFACE" "copy_k" false "" "" false "SINGLE") + GenerateNamedObjects("${KERNELDIR}/${SSWAPKERNEL}" "" "swap_k" false "" "" false "SINGLE") + GenerateNamedObjects("${KERNELDIR}/${SAXPYKERNEL}" "" "axpy_k" false "" "" false "SINGLE") + GenerateNamedObjects("${KERNELDIR}/${SNRM2KERNEL}" "" "nrm2_k" false "" "" false "SINGLE") + GenerateNamedObjects("${KERNELDIR}/${SDOTKERNEL}" "" "dot_k" false "" "" false "SINGLE") + GenerateNamedObjects("${KERNELDIR}/${SROTKERNEL}" "" "rot_k" false "" "" false "SINGLE") + GenerateNamedObjects("${KERNELDIR}/${SROTMKERNEL}" "" "rotm_k" false "" "" false "SINGLE") + endif () + if (BUILD_COMPLEX16 AND NOT BUILD_DOUBLE) + GenerateNamedObjects("${KERNELDIR}/${DAMAXKERNEL}" "USE_ABS" "amax_k" false "" "" false "DOUBLE") + GenerateNamedObjects("${KERNELDIR}/${DAMINKERNEL}" "USE_ABS;USE_MIN" "amin_k" false "" "" false "DOUBLE") + GenerateNamedObjects("${KERNELDIR}/${DASUMKERNEL}" "" "asum_k" false "" "" false "DOUBLE") + if (DEFINED DMAXKERNEL) + GenerateNamedObjects("${KERNELDIR}/${DMAXKERNEL}" "" "max_k" false "" "" false "DOUBLE") + endif () + if (DEFINED DMINKERNEL) + GenerateNamedObjects("${KERNELDIR}/${DMINKERNEL}" "USE_MIN" "min_k" false "" "" false "DOUBLE") + endif () + if (DEFINED IDMINKERNEL) + GenerateNamedObjects("${KERNELDIR}/${IDMINKERNEL}" "USE_MIN" "i*min_k" false "" "" false "DOUBLE") + endif () + if (DEFINED IDMAXKERNEL) + GenerateNamedObjects("${KERNELDIR}/${IDMAXKERNEL}" "" "i*max_k" false "" "" false "DOUBLE") + endif () + GenerateNamedObjects("${KERNELDIR}/${IDAMAXKERNEL}" "USE_ABS" "i*amax_k" false "" "" false "DOUBLE") + GenerateNamedObjects("${KERNELDIR}/${IDAMINKERNEL}" "USE_ABS;USE_MIN" "i*amin_k" false "" "" false "DOUBLE") + GenerateNamedObjects("${KERNELDIR}/${DSCALKERNEL}" "" "scal_k" false "" "" false "DOUBLE") + GenerateNamedObjects("${KERNELDIR}/${DCOPYKERNEL}" "C_INTERFACE" "copy_k" false "" "" false "DOUBLE") + GenerateNamedObjects("${KERNELDIR}/${DNRM2KERNEL}" "" "nrm2_k" false "" "" false "DOUBLE") + GenerateNamedObjects("${KERNELDIR}/${DROTKERNEL}" "" "rot_k" false "" "" false "DOUBLE") + GenerateNamedObjects("${KERNELDIR}/${DROTMKERNEL}" "" "rotm_k" false "" "" false "DOUBLE") + GenerateNamedObjects("${KERNELDIR}/${DDOTKERNEL}" "" "dot_k" false "" "" false "DOUBLE") + GenerateNamedObjects("${KERNELDIR}/${DSWAPKERNEL}" "" "swap_k" false "" "" false "DOUBLE") + GenerateNamedObjects("${KERNELDIR}/${DAXPYKERNEL}" "" "axpy_k" false "" "" false "DOUBLE") + endif () + + # Makefile.L2 + GenerateCombinationObjects("generic/symv_k.c" "LOWER" "U" "" 1 "" "" 3) + GenerateNamedObjects("generic/ger.c" "" "ger_k" false "" "" "" 3) + foreach (float_type ${FLOAT_TYPES}) + string(SUBSTRING ${float_type} 0 1 float_char) + if (${float_type} STREQUAL "COMPLEX" OR ${float_type} STREQUAL "ZCOMPLEX") + GenerateNamedObjects("${KERNELDIR}/${${float_char}GERUKERNEL}" "" "geru_k" false "" "" false ${float_type}) + GenerateNamedObjects("${KERNELDIR}/${${float_char}GERCKERNEL}" "CONJ" "gerc_k" false "" "" false ${float_type}) + GenerateNamedObjects("${KERNELDIR}/${${float_char}GERUKERNEL}" "XCONJ" "gerv_k" false "" "" false ${float_type}) + GenerateNamedObjects("${KERNELDIR}/${${float_char}GERCKERNEL}" "CONJ;XCONJ" "gerd_k" false "" "" false ${float_type}) + + GenerateNamedObjects("${KERNELDIR}/${${float_char}GEMVNKERNEL}" "" "gemv_n" false "" "" false ${float_type}) + GenerateNamedObjects("${KERNELDIR}/${${float_char}GEMVTKERNEL}" "TRANSA" "gemv_t" false "" "" false ${float_type}) + GenerateNamedObjects("${KERNELDIR}/${${float_char}GEMVNKERNEL}" "CONJ" "gemv_r" false "" "" false ${float_type}) + GenerateNamedObjects("${KERNELDIR}/${${float_char}GEMVTKERNEL}" "CONJ;TRANSA" "gemv_c" false "" "" false ${float_type}) + GenerateNamedObjects("${KERNELDIR}/${${float_char}GEMVNKERNEL}" "XCONJ" "gemv_o" false "" "" false ${float_type}) + GenerateNamedObjects("${KERNELDIR}/${${float_char}GEMVTKERNEL}" "XCONJ;TRANSA" "gemv_u" false "" "" false ${float_type}) + GenerateNamedObjects("${KERNELDIR}/${${float_char}GEMVNKERNEL}" "XCONJ;CONJ" "gemv_s" false "" "" false ${float_type}) + GenerateNamedObjects("${KERNELDIR}/${${float_char}GEMVTKERNEL}" "XCONJ;CONJ;TRANSA" "gemv_d" false "" "" false ${float_type}) + + GenerateNamedObjects("${KERNELDIR}/${${float_char}HEMV_U_KERNEL}" "HEMV" "hemv_U" false "" "" false ${float_type}) + GenerateNamedObjects("${KERNELDIR}/${${float_char}HEMV_L_KERNEL}" "HEMV;LOWER" "hemv_L" false "" "" false ${float_type}) + GenerateNamedObjects("${KERNELDIR}/${${float_char}HEMV_V_KERNEL}" "HEMV;HEMVREV" "hemv_V" false "" "" false ${float_type}) + GenerateNamedObjects("${KERNELDIR}/${${float_char}HEMV_M_KERNEL}" "HEMV;HEMVREV;LOWER" "hemv_M" false "" "" false ${float_type}) + + else () + GenerateNamedObjects("${KERNELDIR}/${${float_char}GEMVNKERNEL}" "" "gemv_n" false "" "" false ${float_type}) + GenerateNamedObjects("${KERNELDIR}/${${float_char}GEMVTKERNEL}" "TRANS" "gemv_t" false "" "" false ${float_type}) + endif () + endforeach () + if (BUILD_COMPLEX16 AND NOT BUILD_DOUBLE) + GenerateNamedObjects("${KERNELDIR}/${DGEMVNKERNEL}" "" "gemv_n" false "" "" false "DOUBLE") + GenerateNamedObjects("${KERNELDIR}/${DGEMVTKERNEL}" "TRANS" "gemv_t" false "" "" false "DOUBLE") + endif () + if (BUILD_COMPLEX AND NOT BUILD_SINGLE) + GenerateNamedObjects("${KERNELDIR}/${SGEMVNKERNEL}" "" "gemv_n" false "" "" false "SINGLE") + GenerateNamedObjects("${KERNELDIR}/${SGEMVTKERNEL}" "TRANS" "gemv_t" false "" "" false "SINGLE") + endif () + if (BUILD_BFLOAT16) + GenerateNamedObjects("${KERNELDIR}/${BGEMVNKERNEL}" "BGEMM" "gemv_n" false "" "" false "BFLOAT16") + GenerateNamedObjects("${KERNELDIR}/${BGEMVTKERNEL}" "BGEMM" "gemv_t" false "" "" false "BFLOAT16") + GenerateNamedObjects("${KERNELDIR}/${SBGEMVNKERNEL}" "" "gemv_n" false "" "" false "BFLOAT16") + GenerateNamedObjects("${KERNELDIR}/${SBGEMVTKERNEL}" "" "gemv_t" false "" "" false "BFLOAT16") + endif () + # Makefile.L3 + set(USE_TRMM false) + string(TOUPPER ${TARGET_CORE} UC_TARGET_CORE) + if (ARM OR ARM64 OR RISCV64 OR (UC_TARGET_CORE MATCHES LONGSOON3B) OR (UC_TARGET_CORE MATCHES GENERIC) OR (UC_TARGET_CORE MATCHES HASWELL) OR (UC_TARGET_CORE MATCHES ZEN) OR (UC_TARGET_CORE MATCHES SKYLAKEX) OR (UC_TARGET_CORE MATCHES COOPERLAKE) OR (UC_TARGET_CORE MATCHES SAPPHIRERAPIDS)) + set(USE_TRMM true) + endif () + if (ZARCH OR (UC_TARGET_CORE MATCHES POWER8) OR (UC_TARGET_CORE MATCHES POWER9) OR (UC_TARGET_CORE MATCHES POWER10)) + set(USE_TRMM true) + endif () + set(USE_DIRECT_SGEMM false) + if (X86_64 OR ARM64) + set(USE_DIRECT_SGEMM true) + endif() + if (UC_TARGET_CORE MATCHES ARMV9SME OR UC_TARGET_CORE MATCHES VORTEXM4) + set (HAVE_SME true) + endif () + + if (USE_DIRECT_SGEMM) + # if (NOT DEFINED SGEMMDIRECTKERNEL) + if (X86_64) + set (SGEMMDIRECTKERNEL sgemm_direct_skylakex.c) + set (SGEMMDIRECTPERFORMANT sgemm_direct_performant.c) + # endif() + GenerateNamedObjects("${KERNELDIR}/${SGEMMDIRECTKERNEL}" "" "gemm_direct" false "" "" false SINGLE) + GenerateNamedObjects("${KERNELDIR}/${SGEMMDIRECTPERFORMANT}" "" "gemm_direct_performant" false "" "" false SINGLE) + elseif (ARM64) + set (SGEMMDIRECTPERFORMANT sgemm_direct_performant.c) + set (SGEMMDIRECTKERNEL sgemm_direct_arm64_sme1.c) + set (SGEMMDIRECTKERNEL_ALPHA_BETA sgemm_direct_alpha_beta_arm64_sme1.c) + set (SGEMMDIRECTSMEKERNEL sgemm_direct_sme1_2VLx2VL.S) + set (SGEMMDIRECTPREKERNEL sgemm_direct_sme1_preprocess.S) + GenerateNamedObjects("${KERNELDIR}/${SGEMMDIRECTPERFORMANT}" "" "gemm_direct_performant" false "" "" false SINGLE) + GenerateNamedObjects("${KERNELDIR}/${SGEMMDIRECTKERNEL}" "" "gemm_direct" false "" "" false SINGLE) + GenerateNamedObjects("${KERNELDIR}/${SGEMMDIRECTKERNEL_ALPHA_BETA}" "" "gemm_direct_alpha_beta" false "" "" false SINGLE) + if (HAVE_SME) + GenerateNamedObjects("${KERNELDIR}/${SGEMMDIRECTSMEKERNEL}" "" "gemm_direct_sme1_2VLx2VL" false "" "" false SINGLE) + GenerateNamedObjects("${KERNELDIR}/${SGEMMDIRECTPREKERNEL}" "" "gemm_direct_sme1_preprocess" false "" "" false SINGLE) + endif () + endif () + endif() + + foreach (float_type SINGLE DOUBLE) + string(SUBSTRING ${float_type} 0 1 float_char) + GenerateNamedObjects("${KERNELDIR}/${${float_char}GEMMKERNEL}" "" "gemm_kernel" false "" "" false ${float_type}) + endforeach() + if (BUILD_COMPLEX16 AND NOT BUILD_DOUBLE) + GenerateNamedObjects("${KERNELDIR}/${DGEMMKERNEL}" "" "gemm_kernel" false "" "" false "DOUBLE") + if (DGEMMINCOPY) + GenerateNamedObjects("${KERNELDIR}/${DGEMMINCOPY}" "DOUBLE" "${DGEMMINCOPYOBJ}" false "" "" true "DOUBLE") + endif () + if (DGEMMITCOPY) + GenerateNamedObjects("${KERNELDIR}/${DGEMMITCOPY}" "DOUBLE" "${DGEMMITCOPYOBJ}" false "" "" true "DOUBLE") + endif () + if (DGEMMONCOPY) + GenerateNamedObjects("${KERNELDIR}/${DGEMMONCOPY}" "DOUBLE" "${DGEMMONCOPYOBJ}" false "" "" true "DOUBLE") + endif () + if (DGEMMOTCOPY) + GenerateNamedObjects("${KERNELDIR}/${DGEMMOTCOPY}" "DOUBLE" "${DGEMMOTCOPYOBJ}" false "" "" true "DOUBLE") + endif () + GenerateNamedObjects("${KERNELDIR}/${DGEMM_BETA}" "" "gemm_beta" false "" "" false "DOUBLE") + GenerateNamedObjects("generic/neg_tcopy_${DGEMM_UNROLL_M}.c" "" "neg_tcopy" false "" ${TSUFFIX} false "DOUBLE") + GenerateNamedObjects("generic/laswp_ncopy_${DGEMM_UNROLL_N}.c" "" "laswp_ncopy" false "" ${TSUFFIX} false "DOUBLE") + if (SMALL_MATRIX_OPT) + if (NOT DEFINED DGEMM_SMALL_M_PERMIT) + set(DGEMM_SMALL_M_PERMIT ../generic/gemm_small_matrix_permit.c) + endif () + if (NOT DEFINED DGEMM_SMALL_K_NN) + set(DGEMM_SMALL_K_NN ../generic/gemm_small_matrix_kernel_nn.c) + endif () + if (NOT DEFINED DGEMM_SMALL_K_NT) + set(DGEMM_SMALL_K_NT ../generic/gemm_small_matrix_kernel_nt.c) + endif () + if (NOT DEFINED DGEMM_SMALL_K_TN) + set(DGEMM_SMALL_K_TN ../generic/gemm_small_matrix_kernel_tn.c) + endif () + if (NOT DEFINED DGEMM_SMALL_K_TT) + set(DGEMM_SMALL_K_TT ../generic/gemm_small_matrix_kernel_tt.c) + endif () + if (NOT DEFINED DGEMM_SMALL_K_B0_NN) + set(DGEMM_SMALL_K_B0_NN ../generic/gemm_small_matrix_kernel_nn.c) + endif () + if (NOT DEFINED DGEMM_SMALL_K_B0_NT) + set(DGEMM_SMALL_K_B0_NT ../generic/gemm_small_matrix_kernel_nt.c) + endif () + if (NOT DEFINED DGEMM_SMALL_K_B0_TN) + set(DGEMM_SMALL_K_B0_TN ../generic/gemm_small_matrix_kernel_tn.c) + endif () + if (NOT DEFINED DGEMM_SMALL_K_B0_TT) + set(DGEMM_SMALL_K_B0_TT ../generic/gemm_small_matrix_kernel_tt.c) + endif () + + GenerateNamedObjects("${KERNELDIR}/${DGEMM_SMALL_M_PERMIT}" "" "gemm_small_matrix_permit" false "" "" false "DOUBLE") + GenerateNamedObjects("${KERNELDIR}/${DGEMM_SMALL_K_NN}" "NN" "gemm_small_kernel_nn" false "" "" false "DOUBLE") + GenerateNamedObjects("${KERNELDIR}/${DGEMM_SMALL_K_NN}" "NR" "gemm_small_kernel_nr" false "" "" false "DOUBLE") + GenerateNamedObjects("${KERNELDIR}/${DGEMM_SMALL_K_NN}" "RN" "gemm_small_kernel_rn" false "" "" false "DOUBLE") + GenerateNamedObjects("${KERNELDIR}/${DGEMM_SMALL_K_NN}" "RR" "gemm_small_kernel_rr" false "" "" false "DOUBLE") + GenerateNamedObjects("${KERNELDIR}/${DGEMM_SMALL_K_NT}" "NT" "gemm_small_kernel_nt" false "" "" false "DOUBLE") + GenerateNamedObjects("${KERNELDIR}/${DGEMM_SMALL_K_NT}" "NC" "gemm_small_kernel_nc" false "" "" false "DOUBLE") + GenerateNamedObjects("${KERNELDIR}/${DGEMM_SMALL_K_NT}" "RT" "gemm_small_kernel_rt" false "" "" false "DOUBLE") + GenerateNamedObjects("${KERNELDIR}/${DGEMM_SMALL_K_NT}" "RC" "gemm_small_kernel_rc" false "" "" false "DOUBLE") + GenerateNamedObjects("${KERNELDIR}/${DGEMM_SMALL_K_TN}" "TN" "gemm_small_kernel_tn" false "" "" false "DOUBLE") + GenerateNamedObjects("${KERNELDIR}/${DGEMM_SMALL_K_TN}" "TR" "gemm_small_kernel_tr" false "" "" false "DOUBLE") + GenerateNamedObjects("${KERNELDIR}/${DGEMM_SMALL_K_TN}" "CN" "gemm_small_kernel_cn" false "" "" false "DOUBLE") + GenerateNamedObjects("${KERNELDIR}/${DGEMM_SMALL_K_TN}" "CR" "gemm_small_kernel_cr" false "" "" false "DOUBLE") + GenerateNamedObjects("${KERNELDIR}/${DGEMM_SMALL_K_TT}" "TT" "gemm_small_kernel_tt" false "" "" false "DOUBLE") + GenerateNamedObjects("${KERNELDIR}/${DGEMM_SMALL_K_TT}" "TC" "gemm_small_kernel_tc" false "" "" false "DOUBLE") + GenerateNamedObjects("${KERNELDIR}/${DGEMM_SMALL_K_TT}" "CT" "gemm_small_kernel_ct" false "" "" false "DOUBLE") + GenerateNamedObjects("${KERNELDIR}/${DGEMM_SMALL_K_TT}" "CC" "gemm_small_kernel_cc" false "" "" false "DOUBLE") + GenerateNamedObjects("${KERNELDIR}/${DGEMM_SMALL_K_B0_NN}" "NN;B0" "gemm_small_kernel_b0_nn" false "" "" false "DOUBLE") + GenerateNamedObjects("${KERNELDIR}/${DGEMM_SMALL_K_B0_NN}" "NR;B0" "gemm_small_kernel_b0_nr" false "" "" false "DOUBLE") + GenerateNamedObjects("${KERNELDIR}/${DGEMM_SMALL_K_B0_NN}" "RN;B0" "gemm_small_kernel_b0_rn" false "" "" false "DOUBLE") + GenerateNamedObjects("${KERNELDIR}/${DGEMM_SMALL_K_B0_NN}" "RR;B0" "gemm_small_kernel_b0_rr" false "" "" false "DOUBLE") + GenerateNamedObjects("${KERNELDIR}/${DGEMM_SMALL_K_B0_NT}" "NT;B0" "gemm_small_kernel_b0_nt" false "" "" false "DOUBLE") + GenerateNamedObjects("${KERNELDIR}/${DGEMM_SMALL_K_B0_NT}" "NC;B0" "gemm_small_kernel_b0_nc" false "" "" false "DOUBLE") + GenerateNamedObjects("${KERNELDIR}/${DGEMM_SMALL_K_B0_NT}" "RT;B0" "gemm_small_kernel_b0_rt" false "" "" false "DOUBLE") + GenerateNamedObjects("${KERNELDIR}/${DGEMM_SMALL_K_B0_NT}" "RC;B0" "gemm_small_kernel_b0_rc" false "" "" false "DOUBLE") + GenerateNamedObjects("${KERNELDIR}/${DGEMM_SMALL_K_B0_TN}" "TN;B0" "gemm_small_kernel_b0_tn" false "" "" false "DOUBLE") + GenerateNamedObjects("${KERNELDIR}/${DGEMM_SMALL_K_B0_TN}" "TR;B0" "gemm_small_kernel_b0_tr" false "" "" false "DOUBLE") + GenerateNamedObjects("${KERNELDIR}/${DGEMM_SMALL_K_B0_TN}" "CN;B0" "gemm_small_kernel_b0_cn" false "" "" false "DOUBLE") + GenerateNamedObjects("${KERNELDIR}/${DGEMM_SMALL_K_B0_TN}" "CR;B0" "gemm_small_kernel_b0_cr" false "" "" false "DOUBLE") + GenerateNamedObjects("${KERNELDIR}/${DGEMM_SMALL_K_B0_TT}" "TT;B0" "gemm_small_kernel_b0_tt" false "" "" false "DOUBLE") + GenerateNamedObjects("${KERNELDIR}/${DGEMM_SMALL_K_B0_TT}" "TC;B0" "gemm_small_kernel_b0_tc" false "" "" false "DOUBLE") + GenerateNamedObjects("${KERNELDIR}/${DGEMM_SMALL_K_B0_TT}" "CT;B0" "gemm_small_kernel_b0_ct" false "" "" false "DOUBLE") + GenerateNamedObjects("${KERNELDIR}/${DGEMM_SMALL_K_B0_TT}" "CC;B0" "gemm_small_kernel_b0_cc" false "" "" false "DOUBLE") + endif () + + endif () + if ((BUILD_DOUBLE OR BUILD_COMPLEX) AND NOT BUILD_SINGLE) + GenerateNamedObjects("${KERNELDIR}/${SGEMMKERNEL}" "" "gemm_kernel" false "" "" false "SINGLE") + if (SGEMMINCOPY) + GenerateNamedObjects("${KERNELDIR}/${SGEMMINCOPY}" "SINGLE" "${SGEMMINCOPYOBJ}" false "" "" true "SINGLE") + endif () + if (SGEMMITCOPY) + GenerateNamedObjects("${KERNELDIR}/${SGEMMITCOPY}" "SINGLE" "${SGEMMITCOPYOBJ}" false "" "" true "SINGLE") + endif () + if (SGEMMONCOPY) + GenerateNamedObjects("${KERNELDIR}/${SGEMMONCOPY}" "SINGLE" "${SGEMMONCOPYOBJ}" false "" "" true "SINGLE") + endif () + if (SGEMMOTCOPY) + GenerateNamedObjects("${KERNELDIR}/${SGEMMOTCOPY}" "SINGLE" "${SGEMMOTCOPYOBJ}" false "" "" true "SINGLE") + endif () + GenerateNamedObjects("${KERNELDIR}/${SGEMM_BETA}" "" "gemm_beta" false "" "" false "SINGLE") + endif () + + if (BUILD_BFLOAT16) + if (BGEMMINCOPY) + GenerateNamedObjects("${KERNELDIR}/${BGEMMINCOPY}" "BGEMM" "${BGEMMINCOPYOBJ}" false "" "" true "BFLOAT16") + endif () + if (BGEMMITCOPY) + GenerateNamedObjects("${KERNELDIR}/${BGEMMITCOPY}" "BGEMM" "${BGEMMITCOPYOBJ}" false "" "" true "BFLOAT16") + endif () + if (BGEMMONCOPY) + GenerateNamedObjects("${KERNELDIR}/${BGEMMONCOPY}" "BGEMM" "${BGEMMONCOPYOBJ}" false "" "" true "BFLOAT16") + endif () + if (BGEMMOTCOPY) + GenerateNamedObjects("${KERNELDIR}/${BGEMMOTCOPY}" "BGEMM" "${BGEMMOTCOPYOBJ}" false "" "" true "BFLOAT16") + endif () + GenerateNamedObjects("${KERNELDIR}/${BGEMMKERNEL}" "BGEMM" "gemm_kernel" false "" "" false "BFLOAT16") + GenerateNamedObjects("${KERNELDIR}/${BGEMM_BETA}" "BGEMM" "gemm_beta" false "" "" false "BFLOAT16") + if (SBGEMMINCOPY) + GenerateNamedObjects("${KERNELDIR}/${SBGEMMINCOPY}" "" "${SBGEMMINCOPYOBJ}" false "" "" true "BFLOAT16") + endif () + if (SBGEMMITCOPY) + GenerateNamedObjects("${KERNELDIR}/${SBGEMMITCOPY}" "" "${SBGEMMITCOPYOBJ}" false "" "" true "BFLOAT16") + endif () + if (SBGEMMONCOPY) + GenerateNamedObjects("${KERNELDIR}/${SBGEMMONCOPY}" "" "${SBGEMMONCOPYOBJ}" false "" "" true "BFLOAT16") + endif () + if (SBGEMMOTCOPY) + GenerateNamedObjects("${KERNELDIR}/${SBGEMMOTCOPY}" "" "${SBGEMMOTCOPYOBJ}" false "" "" true "BFLOAT16") + endif () + GenerateNamedObjects("${KERNELDIR}/${SBGEMMKERNEL}" "" "gemm_kernel" false "" "" false "BFLOAT16") + GenerateNamedObjects("${KERNELDIR}/${SBGEMM_BETA}" "" "gemm_beta" false "" "" false "BFLOAT16") + endif () + if (BUILD_HFLOAT16) + if (SHGEMMINCOPY) + GenerateNamedObjects("${KERNELDIR}/${SHGEMMINCOPY}" "" "${SHGEMMINCOPYOBJ}" false "" "" true "HFLOAT16") + endif () + if (SHGEMMITCOPY) + GenerateNamedObjects("${KERNELDIR}/${SHGEMMITCOPY}" "" "${SHGEMMITCOPYOBJ}" false "" "" true "HFLOAT16") + endif () + if (SHGEMMONCOPY) + GenerateNamedObjects("${KERNELDIR}/${SHGEMMONCOPY}" "" "${SHGEMMONCOPYOBJ}" false "" "" true "HFLOAT16") + endif () + if (SHGEMMOTCOPY) + GenerateNamedObjects("${KERNELDIR}/${SHGEMMOTCOPY}" "" "${SHGEMMOTCOPYOBJ}" false "" "" true "HFLOAT16") + endif () + GenerateNamedObjects("${KERNELDIR}/${SHGEMMKERNEL}" "" "gemm_kernel" false "" "" false "HFLOAT16") + GenerateNamedObjects("${KERNELDIR}/${SHGEMM_BETA}" "" "gemm_beta" false "" "" false "HFLOAT16") + endif () + foreach (float_type ${FLOAT_TYPES}) + string(SUBSTRING ${float_type} 0 1 float_char) + if (${float_char}GEMMINCOPY) + GenerateNamedObjects("${KERNELDIR}/${${float_char}GEMMINCOPY}" "${float_type}" "${${float_char}GEMMINCOPYOBJ}" false "" "" true ${float_type}) + endif () + + if (${float_char}GEMMITCOPY) + GenerateNamedObjects("${KERNELDIR}/${${float_char}GEMMITCOPY}" "${float_type}" "${${float_char}GEMMITCOPYOBJ}" false "" "" true ${float_type}) + endif () + + if (${float_char}GEMMONCOPY) + GenerateNamedObjects("${KERNELDIR}/${${float_char}GEMMONCOPY}" "${float_type}" "${${float_char}GEMMONCOPYOBJ}" false "" "" true ${float_type}) + endif () + + if (${float_char}GEMMOTCOPY) + GenerateNamedObjects("${KERNELDIR}/${${float_char}GEMMOTCOPY}" "${float_type}" "${${float_char}GEMMOTCOPYOBJ}" false "" "" true ${float_type}) + endif () + + GenerateNamedObjects("${KERNELDIR}/${${float_char}GEMM_BETA}" "" "gemm_beta" false "" "" false ${float_type}) + + if (USE_TRMM) + set(TRMM_KERNEL "${${float_char}TRMMKERNEL}") + else () + set(TRMM_KERNEL "${${float_char}GEMMKERNEL}") + endif () + + if (${float_type} STREQUAL "COMPLEX" OR ${float_type} STREQUAL "ZCOMPLEX") + + # just enumerate all these. there is an extra define for these indicating which side is a conjugate (e.g. CN NC NN) that I don't really want to work into GenerateCombinationObjects + + GenerateNamedObjects("${KERNELDIR}/${${float_char}GEMMKERNEL}" "NN" "gemm_kernel_n" false "" "" false ${float_type}) + GenerateNamedObjects("${KERNELDIR}/${${float_char}GEMMKERNEL}" "CN" "gemm_kernel_l" false "" "" false ${float_type}) + GenerateNamedObjects("${KERNELDIR}/${${float_char}GEMMKERNEL}" "NC" "gemm_kernel_r" false "" "" false ${float_type}) + GenerateNamedObjects("${KERNELDIR}/${${float_char}GEMMKERNEL}" "CC" "gemm_kernel_b" false "" "" false ${float_type}) + + GenerateNamedObjects("${KERNELDIR}/${TRMM_KERNEL}" "TRMMKERNEL;LEFT;NN" "trmm_kernel_LN" false "" "" false ${float_type}) + GenerateNamedObjects("${KERNELDIR}/${TRMM_KERNEL}" "TRMMKERNEL;LEFT;TRANSA;NN" "trmm_kernel_LT" false "" "" false ${float_type}) + GenerateNamedObjects("${KERNELDIR}/${TRMM_KERNEL}" "TRMMKERNEL;LEFT;CONJ;CN" "trmm_kernel_LR" false "" "" false ${float_type}) + GenerateNamedObjects("${KERNELDIR}/${TRMM_KERNEL}" "TRMMKERNEL;LEFT;TRANSA;CONJ;CN" "trmm_kernel_LC" false "" "" false ${float_type}) + GenerateNamedObjects("${KERNELDIR}/${TRMM_KERNEL}" "TRMMKERNEL;NN" "trmm_kernel_RN" false "" "" false ${float_type}) + GenerateNamedObjects("${KERNELDIR}/${TRMM_KERNEL}" "TRMMKERNEL;TRANSA;NN" "trmm_kernel_RT" false "" "" false ${float_type}) + GenerateNamedObjects("${KERNELDIR}/${TRMM_KERNEL}" "TRMMKERNEL;CONJ;NC" "trmm_kernel_RR" false "" "" false ${float_type}) + GenerateNamedObjects("${KERNELDIR}/${TRMM_KERNEL}" "TRMMKERNEL;TRANSA;CONJ;NC" "trmm_kernel_RC" false "" "" false ${float_type}) + + GenerateNamedObjects("${KERNELDIR}/${${float_char}TRSMKERNEL_LN}" "UPPER;LN;TRSMKERNEL;CONJ" "trsm_kernel_LR" false "" "" false ${float_type}) + GenerateNamedObjects("${KERNELDIR}/${${float_char}TRSMKERNEL_LT}" "LT;TRSMKERNEL;CONJ" "trsm_kernel_LC" false "" "" false ${float_type}) + GenerateNamedObjects("${KERNELDIR}/${${float_char}TRSMKERNEL_RN}" "UPPER;RN;TRSMKERNEL;CONJ" "trsm_kernel_RR" false "" "" false ${float_type}) + GenerateNamedObjects("${KERNELDIR}/${${float_char}TRSMKERNEL_RT}" "RT;TRSMKERNEL;CONJ" "trsm_kernel_RC" false "" "" false ${float_type}) + + + #hemm +if (NOT DEFINED ${float_char}HEMMUTCOPY_M) + set(HEMMUTCOPY_M "generic/zhemm_utcopy_${${float_char}GEMM_UNROLL_M}.c") + set(HEMMLTCOPY_M "generic/zhemm_ltcopy_${${float_char}GEMM_UNROLL_M}.c") +else () + set(HEMMUTCOPY_M "${KERNELDIR}/${${float_char}HEMMUTCOPY_M}") + set(HEMMLTCOPY_M "${KERNELDIR}/${${float_char}HEMMLTCOPY_M}") +endif() + GenerateNamedObjects(${HEMMUTCOPY_M} "" "hemm_iutcopy" false "" "" false ${float_type}) + GenerateNamedObjects(${HEMMLTCOPY_M} "LOWER" "hemm_iltcopy" false "" "" false ${float_type}) + GenerateNamedObjects("generic/zhemm_utcopy_${${float_char}GEMM_UNROLL_N}.c" "OUTER" "hemm_outcopy" false "" "" false ${float_type}) + GenerateNamedObjects("generic/zhemm_ltcopy_${${float_char}GEMM_UNROLL_N}.c" "OUTER;LOWER" "hemm_oltcopy" false "" "" false ${float_type}) + + # symm for c and z +if (NOT DEFINED ${float_char}SYMMUCOPY_M) + set(SYMMUCOPY_M "generic/zsymm_ucopy_${${float_char}GEMM_UNROLL_M}.c") + set(SYMMLCOPY_M "generic/zsymm_lcopy_${${float_char}GEMM_UNROLL_M}.c") +else () + set(SYMMUCOPY_M "${KERNELDIR}/${${float_char}SYMMUCOPY_M}") + set(SYMMLCOPY_M "${KERNELDIR}/${${float_char}SYMMLCOPY_M}") +endif() + GenerateNamedObjects("generic/zsymm_ucopy_${${float_char}GEMM_UNROLL_N}.c" "OUTER" "symm_outcopy" false "" "" false ${float_type}) + GenerateNamedObjects(${SYMMUCOPY_M} "" "symm_iutcopy" false "" "" false ${float_type}) + + GenerateNamedObjects("generic/zsymm_lcopy_${${float_char}GEMM_UNROLL_N}.c" "LOWER;OUTER" "symm_oltcopy" false "" "" false ${float_type}) + GenerateNamedObjects(${SYMMLCOPY_M} "LOWER" "symm_iltcopy" false "" "" false ${float_type}) + + +if (NOT DEFINED ${float_char}TRMMUNCOPY_M) + set(TRMMUNCOPY_M "generic/ztrmm_uncopy_${${float_char}GEMM_UNROLL_M}.c") + set(TRMMLNCOPY_M "generic/ztrmm_lncopy_${${float_char}GEMM_UNROLL_M}.c") + set(TRMMUTCOPY_M "generic/ztrmm_utcopy_${${float_char}GEMM_UNROLL_M}.c") + set(TRMMLTCOPY_M "generic/ztrmm_ltcopy_${${float_char}GEMM_UNROLL_M}.c") +else () + set(TRMMUNCOPY_M "${KERNELDIR}/${${float_char}TRMMUNCOPY_M}") + set(TRMMLNCOPY_M "${KERNELDIR}/${${float_char}TRMMLNCOPY_M}") + set(TRMMUTCOPY_M "${KERNELDIR}/${${float_char}TRMMUTCOPY_M}") + set(TRMMLTCOPY_M "${KERNELDIR}/${${float_char}TRMMLTCOPY_M}") +endif () + GenerateNamedObjects(${TRMMUNCOPY_M} "UNIT" "trmm_iunucopy" false "" "" false ${float_type}) + GenerateNamedObjects(${TRMMUNCOPY_M} "" "trmm_iunncopy" false "" "" false ${float_type}) + GenerateNamedObjects("generic/ztrmm_uncopy_${${float_char}GEMM_UNROLL_N}.c" "OUTER;UNIT" "trmm_ounucopy" false "" "" false ${float_type}) + GenerateNamedObjects("generic/ztrmm_uncopy_${${float_char}GEMM_UNROLL_N}.c" "OUTER" "trmm_ounncopy" false "" "" false ${float_type}) + + GenerateNamedObjects(${TRMMLNCOPY_M} "LOWER;UNIT" "trmm_ilnucopy" false "" "" false ${float_type}) + GenerateNamedObjects(${TRMMLNCOPY_M} "LOWER" "trmm_ilnncopy" false "" "" false ${float_type}) + GenerateNamedObjects("generic/ztrmm_lncopy_${${float_char}GEMM_UNROLL_N}.c" "OUTER;LOWER;UNIT" "trmm_olnucopy" false "" "" false ${float_type}) + GenerateNamedObjects("generic/ztrmm_lncopy_${${float_char}GEMM_UNROLL_N}.c" "OUTER;LOWER" "trmm_olnncopy" false "" "" false ${float_type}) + + GenerateNamedObjects(${TRMMUTCOPY_M} "UNIT" "trmm_iutucopy" false "" "" false ${float_type}) + GenerateNamedObjects(${TRMMUTCOPY_M} "" "trmm_iutncopy" false "" "" false ${float_type}) + GenerateNamedObjects("generic/ztrmm_utcopy_${${float_char}GEMM_UNROLL_N}.c" "OUTER;UNIT" "trmm_outucopy" false "" "" false ${float_type}) + GenerateNamedObjects("generic/ztrmm_utcopy_${${float_char}GEMM_UNROLL_N}.c" "OUTER" "trmm_outncopy" false "" "" false ${float_type}) + + GenerateNamedObjects(${TRMMLTCOPY_M} "LOWER;UNIT" "trmm_iltucopy" false "" "" false ${float_type}) + GenerateNamedObjects(${TRMMLTCOPY_M} "LOWER" "trmm_iltncopy" false "" "" false ${float_type}) + GenerateNamedObjects("generic/ztrmm_ltcopy_${${float_char}GEMM_UNROLL_N}.c" "OUTER;LOWER;UNIT" "trmm_oltucopy" false "" "" false ${float_type}) + GenerateNamedObjects("generic/ztrmm_ltcopy_${${float_char}GEMM_UNROLL_N}.c" "OUTER;LOWER" "trmm_oltncopy" false "" "" false ${float_type}) + + +if (NOT DEFINED ZTRSMCOPYLN_M) + set(ZTRSMUNCOPY_M "generic/ztrsm_uncopy_${${float_char}GEMM_UNROLL_M}.c") + set(ZTRSMLNCOPY_M "generic/ztrsm_lncopy_${${float_char}GEMM_UNROLL_M}.c") + set(ZTRSMUTCOPY_M "generic/ztrsm_utcopy_${${float_char}GEMM_UNROLL_M}.c") + set(ZTRSMLTCOPY_M "generic/ztrsm_ltcopy_${${float_char}GEMM_UNROLL_M}.c") +else () + set(ZTRSMUNCOPY_M "${KERNELDIR}/${ZTRSMCOPYUN_M}") + set(ZTRSMLNCOPY_M "${KERNELDIR}/${ZTRSMCOPYLN_M}") + set(ZTRSMUTCOPY_M "${KERNELDIR}/${ZTRSMCOPYUT_M}") + set(ZTRSMLTCOPY_M "${KERNELDIR}/${ZTRSMCOPYLT_M}") +endif () + GenerateNamedObjects(${ZTRSMUNCOPY_M} "UNIT" "trsm_iunucopy" false "" "" false ${float_type}) + GenerateNamedObjects(${ZTRSMUNCOPY_M} "" "trsm_iunncopy" false "" "" false ${float_type}) + GenerateNamedObjects("generic/ztrsm_uncopy_${${float_char}GEMM_UNROLL_N}.c" "OUTER;UNIT" "trsm_ounucopy" false "" "" false ${float_type}) + GenerateNamedObjects("generic/ztrsm_uncopy_${${float_char}GEMM_UNROLL_N}.c" "OUTER" "trsm_ounncopy" false "" "" false ${float_type}) + + GenerateNamedObjects(${ZTRSMLNCOPY_M} "LOWER;UNIT" "trsm_ilnucopy" false "" "" false ${float_type}) + GenerateNamedObjects(${ZTRSMLNCOPY_M} "LOWER" "trsm_ilnncopy" false "" "" false ${float_type}) + GenerateNamedObjects("generic/ztrsm_lncopy_${${float_char}GEMM_UNROLL_N}.c" "OUTER;LOWER;UNIT" "trsm_olnucopy" false "" "" false ${float_type}) + GenerateNamedObjects("generic/ztrsm_lncopy_${${float_char}GEMM_UNROLL_N}.c" "OUTER;LOWER" "trsm_olnncopy" false "" "" false ${float_type}) + + GenerateNamedObjects(${ZTRSMUTCOPY_M} "UNIT" "trsm_iutucopy" false "" "" false ${float_type}) + GenerateNamedObjects(${ZTRSMUTCOPY_M} "" "trsm_iutncopy" false "" "" false ${float_type}) + GenerateNamedObjects("generic/ztrsm_utcopy_${${float_char}GEMM_UNROLL_N}.c" "OUTER;UNIT" "trsm_outucopy" false "" "" false ${float_type}) + GenerateNamedObjects("generic/ztrsm_utcopy_${${float_char}GEMM_UNROLL_N}.c" "OUTER" "trsm_outncopy" false "" "" false ${float_type}) + + GenerateNamedObjects(${ZTRSMLTCOPY_M} "LOWER;UNIT" "trsm_iltucopy" false "" "" false ${float_type}) + GenerateNamedObjects(${ZTRSMLTCOPY_M} "LOWER" "trsm_iltncopy" false "" "" false ${float_type}) + GenerateNamedObjects("generic/ztrsm_ltcopy_${${float_char}GEMM_UNROLL_N}.c" "OUTER;LOWER;UNIT" "trsm_oltucopy" false "" "" false ${float_type}) + GenerateNamedObjects("generic/ztrsm_ltcopy_${${float_char}GEMM_UNROLL_N}.c" "OUTER;LOWER" "trsm_oltncopy" false "" "" false ${float_type}) + + #gemm3m + if (USE_GEMM3M) + GenerateNamedObjects("${KERNELDIR}/${${float_char}GEMM3MKERNEL}" "NN" "gemm3m_kernel" false "" "" false ${float_type}) + + GenerateNamedObjects("generic/zgemm3m_ncopy_${${float_char}GEMM3M_UNROLL_N}.c" "USE_ALPHA" "gemm3m_oncopyb" false "" "" false ${float_type}) + GenerateNamedObjects("generic/zgemm3m_ncopy_${${float_char}GEMM3M_UNROLL_N}.c" "USE_ALPHA;REAL_ONLY" "gemm3m_oncopyr" false "" "" false ${float_type}) + GenerateNamedObjects("generic/zgemm3m_ncopy_${${float_char}GEMM3M_UNROLL_N}.c" "USE_ALPHA;IMAGE_ONLY" "gemm3m_oncopyi" false "" "" false ${float_type}) + + GenerateNamedObjects("generic/zgemm3m_tcopy_${${float_char}GEMM3M_UNROLL_N}.c" "USE_ALPHA" "gemm3m_otcopyb" false "" "" false ${float_type}) + GenerateNamedObjects("generic/zgemm3m_tcopy_${${float_char}GEMM3M_UNROLL_N}.c" "USE_ALPHA;REAL_ONLY" "gemm3m_otcopyr" false "" "" false ${float_type}) + GenerateNamedObjects("generic/zgemm3m_tcopy_${${float_char}GEMM3M_UNROLL_N}.c" "USE_ALPHA;IMAGE_ONLY" "gemm3m_otcopyi" false "" "" false ${float_type}) + + GenerateNamedObjects("generic/zgemm3m_ncopy_${${float_char}GEMM3M_UNROLL_M}.c" "ICOPY" "gemm3m_incopyb" false "" "" false ${float_type}) + GenerateNamedObjects("generic/zgemm3m_ncopy_${${float_char}GEMM3M_UNROLL_M}.c" "ICOPY;REAL_ONLY" "gemm3m_incopyr" false "" "" false ${float_type}) + GenerateNamedObjects("generic/zgemm3m_ncopy_${${float_char}GEMM3M_UNROLL_M}.c" "ICOPY;IMAGE_ONLY" "gemm3m_incopyi" false "" "" false ${float_type}) + + GenerateNamedObjects("generic/zgemm3m_tcopy_${${float_char}GEMM3M_UNROLL_M}.c" "ICOPY" "gemm3m_itcopyb" false "" "" false ${float_type}) + GenerateNamedObjects("generic/zgemm3m_tcopy_${${float_char}GEMM3M_UNROLL_M}.c" "ICOPY;REAL_ONLY" "gemm3m_itcopyr" false "" "" false ${float_type}) + GenerateNamedObjects("generic/zgemm3m_tcopy_${${float_char}GEMM3M_UNROLL_M}.c" "ICOPY;IMAGE_ONLY" "gemm3m_itcopyi" false "" "" false ${float_type}) + +#hemm3m and symm3m + foreach(name symm3m hemm3m) + GenerateNamedObjects("generic/z${name}_ucopy_${${float_char}GEMM3M_UNROLL_M}.c" "USE_ALPHA" "${name}_oucopyb.c" false "" "" false ${float_type}) + GenerateNamedObjects("generic/z${name}_ucopy_${${float_char}GEMM3M_UNROLL_M}.c" "USE_ALPHA;REAL_ONLY" "${name}_oucopyr.c" false "" "" false ${float_type}) + GenerateNamedObjects("generic/z${name}_ucopy_${${float_char}GEMM3M_UNROLL_M}.c" "USE_ALPHA;IMAGE_ONLY" "${name}_oucopyi.c" false "" "" false ${float_type}) + GenerateNamedObjects("generic/z${name}_lcopy_${${float_char}GEMM3M_UNROLL_M}.c" "USE_ALPHA" "${name}_olcopyb.c" false "" "" false ${float_type}) + GenerateNamedObjects("generic/z${name}_lcopy_${${float_char}GEMM3M_UNROLL_M}.c" "USE_ALPHA;REAL_ONLY" "${name}_olcopyr.c" false "" "" false ${float_type}) + GenerateNamedObjects("generic/z${name}_lcopy_${${float_char}GEMM3M_UNROLL_M}.c" "USE_ALPHA;IMAGE_ONLY" "${name}_olcopyi.c" false "" "" false ${float_type}) + GenerateNamedObjects("generic/z${name}_ucopy_${${float_char}GEMM3M_UNROLL_M}.c" "" "${name}_iucopyb.c" false "" "" false ${float_type}) + GenerateNamedObjects("generic/z${name}_ucopy_${${float_char}GEMM3M_UNROLL_M}.c" "REAL_ONLY" "${name}_iucopyr.c" false "" "" false ${float_type}) + GenerateNamedObjects("generic/z${name}_ucopy_${${float_char}GEMM3M_UNROLL_M}.c" "IMAGE_ONLY" "${name}_iucopyi.c" false "" "" false ${float_type}) + GenerateNamedObjects("generic/z${name}_lcopy_${${float_char}GEMM3M_UNROLL_M}.c" "" "${name}_ilcopyb.c" false "" "" false ${float_type}) + GenerateNamedObjects("generic/z${name}_lcopy_${${float_char}GEMM3M_UNROLL_M}.c" "REAL_ONLY" "${name}_ilcopyr.c" false "" "" false ${float_type}) + GenerateNamedObjects("generic/z${name}_lcopy_${${float_char}GEMM3M_UNROLL_M}.c" "IMAGE_ONLY" "${name}_ilcopyi.c" false "" "" false ${float_type}) + + endforeach () + + endif() + + else () #For real + GenerateCombinationObjects("${KERNELDIR}/${TRMM_KERNEL}" "LEFT;TRANSA" "R;N" "TRMMKERNEL" 2 "trmm_kernel" false ${float_type}) + + # symm for s and d +if (NOT DEFINED ${float_char}SYMMUCOPY_M) + set(SYMMUCOPY_M "generic/symm_ucopy_${${float_char}GEMM_UNROLL_M}.c") + set(SYMMLCOPY_M "generic/symm_lcopy_${${float_char}GEMM_UNROLL_M}.c") +else () + set(SYMMUCOPY_M "${KERNELDIR}/${${float_char}SYMMUCOPY_M}") + set(SYMMLCOPY_M "${KERNELDIR}/${${float_char}SYMMLCOPY_M}") +endif() + GenerateNamedObjects("generic/symm_ucopy_${${float_char}GEMM_UNROLL_N}.c" "OUTER" "symm_outcopy" false "" "" false ${float_type}) + GenerateNamedObjects(${SYMMUCOPY_M} "" "symm_iutcopy" false "" "" false ${float_type}) + + GenerateNamedObjects("generic/symm_lcopy_${${float_char}GEMM_UNROLL_N}.c" "LOWER;OUTER" "symm_oltcopy" false "" "" false ${float_type}) + GenerateNamedObjects(${SYMMLCOPY_M} "LOWER" "symm_iltcopy" false "" "" false ${float_type}) + + # These don't use a scheme that is easy to iterate over - the filenames have part of the DEFINE codes in them, for UPPER/TRANS but not for UNIT/OUTER. Also TRANS is not passed in as a define. + # Could simplify it a bit by pairing up by -UUNIT/-DUNIT. + +if (NOT DEFINED ${float_char}TRMMUNCOPY_M) + set(TRMMUNCOPY_M "generic/trmm_uncopy_${${float_char}GEMM_UNROLL_M}.c") + set(TRMMLNCOPY_M "generic/trmm_lncopy_${${float_char}GEMM_UNROLL_M}.c") + set(TRMMUTCOPY_M "generic/trmm_utcopy_${${float_char}GEMM_UNROLL_M}.c") + set(TRMMLTCOPY_M "generic/trmm_ltcopy_${${float_char}GEMM_UNROLL_M}.c") +else () + set(TRMMUNCOPY_M "${KERNELDIR}/${${float_char}TRMMUNCOPY_M}") + set(TRMMLNCOPY_M "${KERNELDIR}/${${float_char}TRMMLNCOPY_M}") + set(TRMMUTCOPY_M "${KERNELDIR}/${${float_char}TRMMUTCOPY_M}") + set(TRMMLTCOPY_M "${KERNELDIR}/${${float_char}TRMMLTCOPY_M}") +endif () + GenerateNamedObjects(${TRMMUNCOPY_M} "UNIT" "trmm_iunucopy" false "" "" false ${float_type}) + GenerateNamedObjects(${TRMMUNCOPY_M} "" "trmm_iunncopy" false "" "" false ${float_type}) + GenerateNamedObjects("generic/trmm_uncopy_${${float_char}GEMM_UNROLL_N}.c" "OUTER;UNIT" "trmm_ounucopy" false "" "" false ${float_type}) + GenerateNamedObjects("generic/trmm_uncopy_${${float_char}GEMM_UNROLL_N}.c" "OUTER" "trmm_ounncopy" false "" "" false ${float_type}) + + GenerateNamedObjects(${TRMMLNCOPY_M} "LOWER;UNIT" "trmm_ilnucopy" false "" "" false ${float_type}) + GenerateNamedObjects(${TRMMLNCOPY_M} "LOWER" "trmm_ilnncopy" false "" "" false ${float_type}) + GenerateNamedObjects("generic/trmm_lncopy_${${float_char}GEMM_UNROLL_N}.c" "OUTER;LOWER;UNIT" "trmm_olnucopy" false "" "" false ${float_type}) + GenerateNamedObjects("generic/trmm_lncopy_${${float_char}GEMM_UNROLL_N}.c" "OUTER;LOWER" "trmm_olnncopy" false "" "" false ${float_type}) + + GenerateNamedObjects(${TRMMUTCOPY_M} "UNIT" "trmm_iutucopy" false "" "" false ${float_type}) + GenerateNamedObjects(${TRMMUTCOPY_M} "" "trmm_iutncopy" false "" "" false ${float_type}) + GenerateNamedObjects("generic/trmm_utcopy_${${float_char}GEMM_UNROLL_N}.c" "OUTER;UNIT" "trmm_outucopy" false "" "" false ${float_type}) + GenerateNamedObjects("generic/trmm_utcopy_${${float_char}GEMM_UNROLL_N}.c" "OUTER" "trmm_outncopy" false "" "" false ${float_type}) + + GenerateNamedObjects(${TRMMLTCOPY_M} "LOWER;UNIT" "trmm_iltucopy" false "" "" false ${float_type}) + GenerateNamedObjects(${TRMMLTCOPY_M} "LOWER" "trmm_iltncopy" false "" "" false ${float_type}) + GenerateNamedObjects("generic/trmm_ltcopy_${${float_char}GEMM_UNROLL_N}.c" "OUTER;LOWER;UNIT" "trmm_oltucopy" false "" "" false ${float_type}) + GenerateNamedObjects("generic/trmm_ltcopy_${${float_char}GEMM_UNROLL_N}.c" "OUTER;LOWER" "trmm_oltncopy" false "" "" false ${float_type}) + + +if (NOT DEFINED TRSMCOPYLN_M) + set(TRSMUNCOPY_M "generic/trsm_uncopy_${${float_char}GEMM_UNROLL_M}.c") + set(TRSMLNCOPY_M "generic/trsm_lncopy_${${float_char}GEMM_UNROLL_M}.c") + set(TRSMUTCOPY_M "generic/trsm_utcopy_${${float_char}GEMM_UNROLL_M}.c") + set(TRSMLTCOPY_M "generic/trsm_ltcopy_${${float_char}GEMM_UNROLL_M}.c") +else () + set(TRSMUNCOPY_M "${KERNELDIR}/${TRSMCOPYUN_M}") + set(TRSMLNCOPY_M "${KERNELDIR}/${TRSMCOPYLN_M}") + set(TRSMUTCOPY_M "${KERNELDIR}/${TRSMCOPYUT_M}") + set(TRSMLTCOPY_M "${KERNELDIR}/${TRSMCOPYLT_M}") +endif () + GenerateNamedObjects(${TRSMUNCOPY_M} "UNIT" "trsm_iunucopy" false "" "" false ${float_type}) + GenerateNamedObjects(${TRSMUNCOPY_M} "" "trsm_iunncopy" false "" "" false ${float_type}) + GenerateNamedObjects("generic/trsm_uncopy_${${float_char}GEMM_UNROLL_N}.c" "OUTER;UNIT" "trsm_ounucopy" false "" "" false ${float_type}) + GenerateNamedObjects("generic/trsm_uncopy_${${float_char}GEMM_UNROLL_N}.c" "OUTER" "trsm_ounncopy" false "" "" false ${float_type}) + + GenerateNamedObjects(${TRSMLNCOPY_M} "LOWER;UNIT" "trsm_ilnucopy" false "" "" false ${float_type}) + GenerateNamedObjects(${TRSMLNCOPY_M} "LOWER" "trsm_ilnncopy" false "" "" false ${float_type}) + GenerateNamedObjects("generic/trsm_lncopy_${${float_char}GEMM_UNROLL_N}.c" "OUTER;LOWER;UNIT" "trsm_olnucopy" false "" "" false ${float_type}) + GenerateNamedObjects("generic/trsm_lncopy_${${float_char}GEMM_UNROLL_N}.c" "OUTER;LOWER" "trsm_olnncopy" false "" "" false ${float_type}) + + GenerateNamedObjects(${TRSMUTCOPY_M} "UNIT" "trsm_iutucopy" false "" "" false ${float_type}) + GenerateNamedObjects(${TRSMUTCOPY_M} "" "trsm_iutncopy" false "" "" false ${float_type}) + GenerateNamedObjects("generic/trsm_utcopy_${${float_char}GEMM_UNROLL_N}.c" "OUTER;UNIT" "trsm_outucopy" false "" "" false ${float_type}) + GenerateNamedObjects("generic/trsm_utcopy_${${float_char}GEMM_UNROLL_N}.c" "OUTER" "trsm_outncopy" false "" "" false ${float_type}) + + GenerateNamedObjects(${TRSMLTCOPY_M} "LOWER;UNIT" "trsm_iltucopy" false "" "" false ${float_type}) + GenerateNamedObjects(${TRSMLTCOPY_M} "LOWER" "trsm_iltncopy" false "" "" false ${float_type}) + GenerateNamedObjects("generic/trsm_ltcopy_${${float_char}GEMM_UNROLL_N}.c" "OUTER;LOWER;UNIT" "trsm_oltucopy" false "" "" false ${float_type}) + GenerateNamedObjects("generic/trsm_ltcopy_${${float_char}GEMM_UNROLL_N}.c" "OUTER;LOWER" "trsm_oltncopy" false "" "" false ${float_type}) + + endif () + + GenerateNamedObjects("${KERNELDIR}/${${float_char}TRSMKERNEL_LN}" "UPPER;LN;TRSMKERNEL" "trsm_kernel_LN" false "" "" false ${float_type}) + GenerateNamedObjects("${KERNELDIR}/${${float_char}TRSMKERNEL_LT}" "LT;TRSMKERNEL" "trsm_kernel_LT" false "" "" false ${float_type}) + GenerateNamedObjects("${KERNELDIR}/${${float_char}TRSMKERNEL_RN}" "UPPER;RN;TRSMKERNEL" "trsm_kernel_RN" false "" "" false ${float_type}) + GenerateNamedObjects("${KERNELDIR}/${${float_char}TRSMKERNEL_RT}" "RT;TRSMKERNEL" "trsm_kernel_RT" false "" "" false ${float_type}) + + if (NOT DEFINED ${float_char}GEMM_SMALL_M_PERMIT) + if (${float_char} STREQUAL "Z" OR ${float_char} STREQUAL "C") + set(${float_char}GEMM_SMALL_M_PERMIT ../generic/zgemm_small_matrix_permit.c) + else () + set(${float_char}GEMM_SMALL_M_PERMIT ../generic/gemm_small_matrix_permit.c) + endif () + endif () + if (NOT DEFINED ${float_char}GEMM_SMALL_K_NN) + if (${float_char} STREQUAL "Z" OR ${float_char} STREQUAL "C") + set(${float_char}GEMM_SMALL_K_NN ../generic/zgemm_small_matrix_kernel_nn.c) + else () + set(${float_char}GEMM_SMALL_K_NN ../generic/gemm_small_matrix_kernel_nn.c) + endif () + endif () + if (NOT DEFINED ${float_char}GEMM_SMALL_K_NT) + if (${float_char} STREQUAL "Z" OR ${float_char} STREQUAL "C") + set(${float_char}GEMM_SMALL_K_NT ../generic/zgemm_small_matrix_kernel_nt.c) + else () + set(${float_char}GEMM_SMALL_K_NT ../generic/gemm_small_matrix_kernel_nt.c) + endif () + endif () + if (NOT DEFINED ${float_char}GEMM_SMALL_K_TN) + if (${float_char} STREQUAL "Z" OR ${float_char} STREQUAL "C") + set(${float_char}GEMM_SMALL_K_TN ../generic/zgemm_small_matrix_kernel_tn.c) + else () + set(${float_char}GEMM_SMALL_K_TN ../generic/gemm_small_matrix_kernel_tn.c) + endif () + endif () + if (NOT DEFINED ${float_char}GEMM_SMALL_K_TT) + if (${float_char} STREQUAL "Z" OR ${float_char} STREQUAL "C") + set(${float_char}GEMM_SMALL_K_TT ../generic/zgemm_small_matrix_kernel_tt.c) + else () + set(${float_char}GEMM_SMALL_K_TT ../generic/gemm_small_matrix_kernel_tt.c) + endif () + endif () + if (NOT DEFINED ${float_char}GEMM_SMALL_K_B0_NN) + if (${float_char} STREQUAL "Z" OR ${float_char} STREQUAL "C") + set(${float_char}GEMM_SMALL_K_B0_NN ../generic/zgemm_small_matrix_kernel_nn.c) + else () + set(${float_char}GEMM_SMALL_K_B0_NN ../generic/gemm_small_matrix_kernel_nn.c) + endif () + endif () + if (NOT DEFINED ${float_char}GEMM_SMALL_K_B0_NT) + if (${float_char} STREQUAL "Z" OR ${float_char} STREQUAL "C") + set(${float_char}GEMM_SMALL_K_B0_NT ../generic/zgemm_small_matrix_kernel_nt.c) + else () + set(${float_char}GEMM_SMALL_K_B0_NT ../generic/gemm_small_matrix_kernel_nt.c) + endif () + endif () + if (NOT DEFINED ${float_char}GEMM_SMALL_K_B0_TN) + if (${float_char} STREQUAL "Z" OR ${float_char} STREQUAL "C") + set(${float_char}GEMM_SMALL_K_B0_TN ../generic/zgemm_small_matrix_kernel_tn.c) + else () + set(${float_char}GEMM_SMALL_K_B0_TN ../generic/gemm_small_matrix_kernel_tn.c) + endif () + endif () + if (NOT DEFINED ${float_char}GEMM_SMALL_K_B0_TT) + if (${float_char} STREQUAL "Z" OR ${float_char} STREQUAL "C") + set(${float_char}GEMM_SMALL_K_B0_TT ../generic/zgemm_small_matrix_kernel_tt.c) + else () + set(${float_char}GEMM_SMALL_K_B0_TT ../generic/gemm_small_matrix_kernel_tt.c) + endif () + endif () + + if (SMALL_MATRIX_OPT) + GenerateNamedObjects("${KERNELDIR}/${${float_char}GEMM_SMALL_M_PERMIT}" "" "gemm_small_matrix_permit" false "" "" false ${float_type}) + if (${float_char} STREQUAL "Z" OR ${float_char} STREQUAL "C") + GenerateNamedObjects("${KERNELDIR}/${${float_char}GEMM_SMALL_K_NN}" "NN" "gemm_small_kernel_nn" false "" "" false ${float_type}) + GenerateNamedObjects("${KERNELDIR}/${${float_char}GEMM_SMALL_K_NN}" "NR" "gemm_small_kernel_nr" false "" "" false ${float_type}) + GenerateNamedObjects("${KERNELDIR}/${${float_char}GEMM_SMALL_K_NN}" "RN" "gemm_small_kernel_rn" false "" "" false ${float_type}) + GenerateNamedObjects("${KERNELDIR}/${${float_char}GEMM_SMALL_K_NN}" "RR" "gemm_small_kernel_rr" false "" "" false ${float_type}) + GenerateNamedObjects("${KERNELDIR}/${${float_char}GEMM_SMALL_K_NT}" "NT" "gemm_small_kernel_nt" false "" "" false ${float_type}) + GenerateNamedObjects("${KERNELDIR}/${${float_char}GEMM_SMALL_K_NT}" "NC" "gemm_small_kernel_nc" false "" "" false ${float_type}) + GenerateNamedObjects("${KERNELDIR}/${${float_char}GEMM_SMALL_K_NT}" "RT" "gemm_small_kernel_rt" false "" "" false ${float_type}) + GenerateNamedObjects("${KERNELDIR}/${${float_char}GEMM_SMALL_K_NT}" "RC" "gemm_small_kernel_rc" false "" "" false ${float_type}) + GenerateNamedObjects("${KERNELDIR}/${${float_char}GEMM_SMALL_K_TN}" "TN" "gemm_small_kernel_tn" false "" "" false ${float_type}) + GenerateNamedObjects("${KERNELDIR}/${${float_char}GEMM_SMALL_K_TN}" "TR" "gemm_small_kernel_tr" false "" "" false ${float_type}) + GenerateNamedObjects("${KERNELDIR}/${${float_char}GEMM_SMALL_K_TN}" "CN" "gemm_small_kernel_cn" false "" "" false ${float_type}) + GenerateNamedObjects("${KERNELDIR}/${${float_char}GEMM_SMALL_K_TN}" "CR" "gemm_small_kernel_cr" false "" "" false ${float_type}) + GenerateNamedObjects("${KERNELDIR}/${${float_char}GEMM_SMALL_K_TT}" "TT" "gemm_small_kernel_tt" false "" "" false ${float_type}) + GenerateNamedObjects("${KERNELDIR}/${${float_char}GEMM_SMALL_K_TT}" "TC" "gemm_small_kernel_tc" false "" "" false ${float_type}) + GenerateNamedObjects("${KERNELDIR}/${${float_char}GEMM_SMALL_K_TT}" "CT" "gemm_small_kernel_ct" false "" "" false ${float_type}) + GenerateNamedObjects("${KERNELDIR}/${${float_char}GEMM_SMALL_K_TT}" "CC" "gemm_small_kernel_cc" false "" "" false ${float_type}) + GenerateNamedObjects("${KERNELDIR}/${${float_char}GEMM_SMALL_K_B0_NN}" "NN;B0" "gemm_small_kernel_b0_nn" false "" "" false ${float_type}) + GenerateNamedObjects("${KERNELDIR}/${${float_char}GEMM_SMALL_K_B0_NN}" "NR;B0" "gemm_small_kernel_b0_nr" false "" "" false ${float_type}) + GenerateNamedObjects("${KERNELDIR}/${${float_char}GEMM_SMALL_K_B0_NN}" "RN;B0" "gemm_small_kernel_b0_rn" false "" "" false ${float_type}) + GenerateNamedObjects("${KERNELDIR}/${${float_char}GEMM_SMALL_K_B0_NN}" "RR;B0" "gemm_small_kernel_b0_rr" false "" "" false ${float_type}) + GenerateNamedObjects("${KERNELDIR}/${${float_char}GEMM_SMALL_K_B0_NT}" "NT;B0" "gemm_small_kernel_b0_nt" false "" "" false ${float_type}) + GenerateNamedObjects("${KERNELDIR}/${${float_char}GEMM_SMALL_K_B0_NT}" "NC;B0" "gemm_small_kernel_b0_nc" false "" "" false ${float_type}) + GenerateNamedObjects("${KERNELDIR}/${${float_char}GEMM_SMALL_K_B0_NT}" "RT;B0" "gemm_small_kernel_b0_rt" false "" "" false ${float_type}) + GenerateNamedObjects("${KERNELDIR}/${${float_char}GEMM_SMALL_K_B0_NT}" "RC;B0" "gemm_small_kernel_b0_rc" false "" "" false ${float_type}) + GenerateNamedObjects("${KERNELDIR}/${${float_char}GEMM_SMALL_K_B0_TN}" "TN;B0" "gemm_small_kernel_b0_tn" false "" "" false ${float_type}) + GenerateNamedObjects("${KERNELDIR}/${${float_char}GEMM_SMALL_K_B0_TN}" "TR;B0" "gemm_small_kernel_b0_tr" false "" "" false ${float_type}) + GenerateNamedObjects("${KERNELDIR}/${${float_char}GEMM_SMALL_K_B0_TN}" "CN;B0" "gemm_small_kernel_b0_cn" false "" "" false ${float_type}) + GenerateNamedObjects("${KERNELDIR}/${${float_char}GEMM_SMALL_K_B0_TN}" "CR;B0" "gemm_small_kernel_b0_cr" false "" "" false ${float_type}) + GenerateNamedObjects("${KERNELDIR}/${${float_char}GEMM_SMALL_K_B0_TT}" "TT;B0" "gemm_small_kernel_b0_tt" false "" "" false ${float_type}) + GenerateNamedObjects("${KERNELDIR}/${${float_char}GEMM_SMALL_K_B0_TT}" "TC;B0" "gemm_small_kernel_b0_tc" false "" "" false ${float_type}) + GenerateNamedObjects("${KERNELDIR}/${${float_char}GEMM_SMALL_K_B0_TT}" "CT;B0" "gemm_small_kernel_b0_ct" false "" "" false ${float_type}) + GenerateNamedObjects("${KERNELDIR}/${${float_char}GEMM_SMALL_K_B0_TT}" "CC;B0" "gemm_small_kernel_b0_cc" false "" "" false ${float_type}) + + else () + GenerateNamedObjects("${KERNELDIR}/${${float_char}GEMM_SMALL_K_NN}" "" "gemm_small_kernel_nn" false "" "" false ${float_type}) + GenerateNamedObjects("${KERNELDIR}/${${float_char}GEMM_SMALL_K_NT}" "" "gemm_small_kernel_nt" false "" "" false ${float_type}) + GenerateNamedObjects("${KERNELDIR}/${${float_char}GEMM_SMALL_K_TN}" "" "gemm_small_kernel_tn" false "" "" false ${float_type}) + GenerateNamedObjects("${KERNELDIR}/${${float_char}GEMM_SMALL_K_TT}" "" "gemm_small_kernel_tt" false "" "" false ${float_type}) + GenerateNamedObjects("${KERNELDIR}/${${float_char}GEMM_SMALL_K_B0_NN}" "B0" "gemm_small_kernel_b0_nn" false "" "" false ${float_type}) + GenerateNamedObjects("${KERNELDIR}/${${float_char}GEMM_SMALL_K_B0_NT}" "B0" "gemm_small_kernel_b0_nt" false "" "" false ${float_type}) + GenerateNamedObjects("${KERNELDIR}/${${float_char}GEMM_SMALL_K_B0_TN}" "B0" "gemm_small_kernel_b0_tn" false "" "" false ${float_type}) + GenerateNamedObjects("${KERNELDIR}/${${float_char}GEMM_SMALL_K_B0_TT}" "B0" "gemm_small_kernel_b0_tt" false "" "" false ${float_type}) + endif () + if (BUILD_BFLOAT16) + if (NOT DEFINED SBGEMM_SMALL_M_PERMIT) + set(SBGEMM_SMALL_M_PERMIT ../generic/gemm_small_matrix_permit.c) + endif () + if (NOT DEFINED SBGEMM_SMALL_K_NN) + set(SBGEMM_SMALL_K_NN ../generic/gemm_small_matrix_kernel_nn.c) + endif () + if (NOT DEFINED SBGEMM_SMALL_K_NT) + set(SBGEMM_SMALL_K_NT ../generic/gemm_small_matrix_kernel_nt.c) + endif () + if (NOT DEFINED SBGEMM_SMALL_K_TN) + set(SBGEMM_SMALL_K_TN ../generic/gemm_small_matrix_kernel_tn.c) + endif () + if (NOT DEFINED SBGEMM_SMALL_K_TT) + set(SBGEMM_SMALL_K_TT ../generic/gemm_small_matrix_kernel_tt.c) + endif () + if (NOT DEFINED SBGEMM_SMALL_K_B0_NN) + set(SBGEMM_SMALL_K_B0_NN ../generic/gemm_small_matrix_kernel_nn.c) + endif () + if (NOT DEFINED SBGEMM_SMALL_K_B0_NT) + set(SBGEMM_SMALL_K_B0_NT ../generic/gemm_small_matrix_kernel_nt.c) + endif () + if (NOT DEFINED SBGEMM_SMALL_K_B0_TN) + set(SBGEMM_SMALL_K_B0_TN ../generic/gemm_small_matrix_kernel_tn.c) + endif () + if (NOT DEFINED SBGEMM_SMALL_K_B0_TT) + set(SBGEMM_SMALL_K_B0_TT ../generic/gemm_small_matrix_kernel_tt.c) + endif () + GenerateNamedObjects("${KERNELDIR}/${SBGEMM_SMALL_M_PERMIT}" "" "gemm_small_matrix_permit" false "" "" false "BFLOAT16") + GenerateNamedObjects("${KERNELDIR}/${SBGEMM_SMALL_K_NN}" "" "gemm_small_kernel_nn" false "" "" false "BFLOAT16") + GenerateNamedObjects("${KERNELDIR}/${SBGEMM_SMALL_K_NT}" "" "gemm_small_kernel_nt" false "" "" false "BFLOAT16") + GenerateNamedObjects("${KERNELDIR}/${SBGEMM_SMALL_K_TN}" "" "gemm_small_kernel_tn" false "" "" false "BFLOAT16") + GenerateNamedObjects("${KERNELDIR}/${SBGEMM_SMALL_K_TT}" "" "gemm_small_kernel_tt" false "" "" false "BFLOAT16") + GenerateNamedObjects("${KERNELDIR}/${SBGEMM_SMALL_K_B0_NN}" "B0" "gemm_small_kernel_b0_nn" false "" "" false "BFLOAT16") + GenerateNamedObjects("${KERNELDIR}/${SBGEMM_SMALL_K_B0_NT}" "B0" "gemm_small_kernel_b0_nt" false "" "" false "BFLOAT16") + GenerateNamedObjects("${KERNELDIR}/${SBGEMM_SMALL_K_B0_TN}" "B0" "gemm_small_kernel_b0_tn" false "" "" false "BFLOAT16") + GenerateNamedObjects("${KERNELDIR}/${SBGEMM_SMALL_K_B0_TT}" "B0" "gemm_small_kernel_b0_tt" false "" "" false "BFLOAT16") + endif () + + if (BUILD_HFLOAT16) + if (NOT DEFINED SHGEMM_SMALL_M_PERMIT) + set(SHGEMM_SMALL_M_PERMIT ../generic/gemm_small_matrix_permit.c) + endif () + if (NOT DEFINED SHGEMM_SMALL_K_NN) + set(SHGEMM_SMALL_K_NN ../generic/gemm_small_matrix_kernel_nn.c) + endif () + if (NOT DEFINED SHGEMM_SMALL_K_NT) + set(SHGEMM_SMALL_K_NT ../generic/gemm_small_matrix_kernel_nt.c) + endif () + if (NOT DEFINED SHGEMM_SMALL_K_TN) + set(SHGEMM_SMALL_K_TN ../generic/gemm_small_matrix_kernel_tn.c) + endif () + if (NOT DEFINED SHGEMM_SMALL_K_TT) + set(SHGEMM_SMALL_K_TT ../generic/gemm_small_matrix_kernel_tt.c) + endif () + if (NOT DEFINED SHGEMM_SMALL_K_B0_NN) + set(SHGEMM_SMALL_K_B0_NN ../generic/gemm_small_matrix_kernel_nn.c) + endif () + if (NOT DEFINED SHGEMM_SMALL_K_B0_NT) + set(SHGEMM_SMALL_K_B0_NT ../generic/gemm_small_matrix_kernel_nt.c) + endif () + if (NOT DEFINED SHGEMM_SMALL_K_B0_TN) + set(SHGEMM_SMALL_K_B0_TN ../generic/gemm_small_matrix_kernel_tn.c) + endif () + if (NOT DEFINED SHGEMM_SMALL_K_B0_TT) + set(SHGEMM_SMALL_K_B0_TT ../generic/gemm_small_matrix_kernel_tt.c) + endif () + GenerateNamedObjects("${KERNELDIR}/${SHGEMM_SMALL_M_PERMIT}" "" "gemm_small_matrix_permit" false "" "" false "HFLOAT16") + GenerateNamedObjects("${KERNELDIR}/${SHGEMM_SMALL_K_NN}" "" "gemm_small_kernel_nn" false "" "" false "HFLOAT16") + GenerateNamedObjects("${KERNELDIR}/${SHGEMM_SMALL_K_NT}" "" "gemm_small_kernel_nt" false "" "" false "HFLOAT16") + GenerateNamedObjects("${KERNELDIR}/${SHGEMM_SMALL_K_TN}" "" "gemm_small_kernel_tn" false "" "" false "HFLOAT16") + GenerateNamedObjects("${KERNELDIR}/${SHGEMM_SMALL_K_TT}" "" "gemm_small_kernel_tt" false "" "" false "HFLOAT16") + GenerateNamedObjects("${KERNELDIR}/${SHGEMM_SMALL_K_B0_NN}" "B0" "gemm_small_kernel_b0_nn" false "" "" false "HFLOAT16") + GenerateNamedObjects("${KERNELDIR}/${SHGEMM_SMALL_K_B0_NT}" "B0" "gemm_small_kernel_b0_nt" false "" "" false "HFLOAT16") + GenerateNamedObjects("${KERNELDIR}/${SHGEMM_SMALL_K_B0_TN}" "B0" "gemm_small_kernel_b0_tn" false "" "" false "HFLOAT16") + GenerateNamedObjects("${KERNELDIR}/${SHGEMM_SMALL_K_B0_TT}" "B0" "gemm_small_kernel_b0_tt" false "" "" false "HFLOAT16") + endif () + endif () + + if (NOT DEFINED ${float_char}OMATCOPY_CN) + if (${float_char} STREQUAL "Z" OR ${float_char} STREQUAL "C") + set(${float_char}OMATCOPY_CN ../arm/zomatcopy_cn.c) + else () + set(${float_char}OMATCOPY_CN ../arm/omatcopy_cn.c) + endif () + endif () + if (NOT DEFINED ${float_char}OMATCOPY_RN) + if (${float_char} STREQUAL "Z" OR ${float_char} STREQUAL "C") + set(${float_char}OMATCOPY_RN ../arm/zomatcopy_rn.c) + else () + set(${float_char}OMATCOPY_RN ../arm/omatcopy_rn.c) + endif () + endif () + if (NOT DEFINED ${float_char}OMATCOPY_CT) + if (${float_char} STREQUAL "Z" OR ${float_char} STREQUAL "C") + set(${float_char}OMATCOPY_CT ../arm/zomatcopy_ct.c) + else () + set(${float_char}OMATCOPY_CT ../arm/omatcopy_ct.c) + endif () + endif () + if (NOT DEFINED ${float_char}OMATCOPY_RT) + if (${float_char} STREQUAL "Z" OR ${float_char} STREQUAL "C") + set(${float_char}OMATCOPY_RT ../arm/zomatcopy_rt.c) + else () + set(${float_char}OMATCOPY_RT ../arm/omatcopy_rt.c) + endif () + endif () + + GenerateNamedObjects("${KERNELDIR}/${${float_char}OMATCOPY_CN}" "" "omatcopy_k_cn" false "" "" false ${float_type}) + GenerateNamedObjects("${KERNELDIR}/${${float_char}OMATCOPY_RN}" "ROWM" "omatcopy_k_rn" false "" "" false ${float_type}) + GenerateNamedObjects("${KERNELDIR}/${${float_char}OMATCOPY_CT}" "" "omatcopy_k_ct" false "" "" false ${float_type}) + GenerateNamedObjects("${KERNELDIR}/${${float_char}OMATCOPY_RT}" "ROWM" "omatcopy_k_rt" false "" "" false ${float_type}) + + if (NOT DEFINED ${float_char}OMATCOPY_CNC) + if (${float_char} STREQUAL "Z" OR ${float_char} STREQUAL "C") + set(${float_char}OMATCOPY_CNC ../arm/zomatcopy_cnc.c) + endif () + endif () + if (NOT DEFINED ${float_char}OMATCOPY_RNC) + if (${float_char} STREQUAL "Z" OR ${float_char} STREQUAL "C") + set(${float_char}OMATCOPY_RNC ../arm/zomatcopy_rnc.c) + endif () + endif () + if (NOT DEFINED ${float_char}OMATCOPY_CTC) + if (${float_char} STREQUAL "Z" OR ${float_char} STREQUAL "C") + set(${float_char}OMATCOPY_CTC ../arm/zomatcopy_ctc.c) + endif () + endif () + if (NOT DEFINED ${float_char}OMATCOPY_RTC) + if (${float_char} STREQUAL "Z" OR ${float_char} STREQUAL "C") + set(${float_char}OMATCOPY_RTC ../arm/zomatcopy_rtc.c) + endif () + endif () + + if (${float_char} STREQUAL "Z" OR ${float_char} STREQUAL "C") + GenerateNamedObjects("${KERNELDIR}/${${float_char}OMATCOPY_CNC}" "CONJ" "omatcopy_k_cnc" false "" "" false ${float_type}) + GenerateNamedObjects("${KERNELDIR}/${${float_char}OMATCOPY_RNC}" "CONJ;ROWM" "omatcopy_k_rnc" false "" "" false ${float_type}) + GenerateNamedObjects("${KERNELDIR}/${${float_char}OMATCOPY_CTC}" "CONJ" "omatcopy_k_ctc" false "" "" false ${float_type}) + GenerateNamedObjects("${KERNELDIR}/${${float_char}OMATCOPY_RTC}" "CONJ;ROWM" "omatcopy_k_rtc" false "" "" false ${float_type}) + endif() + + #imatcopy + if (NOT DEFINED ${float_char}IMATCOPY_CN) + if (${float_char} STREQUAL "Z" OR ${float_char} STREQUAL "C") + set(${float_char}IMATCOPY_CN ../generic/zimatcopy_cn.c) + else () + set(${float_char}IMATCOPY_CN ../generic/imatcopy_cn.c) + endif () + endif () + + if (NOT DEFINED ${float_char}IMATCOPY_RN) + if (${float_char} STREQUAL "Z" OR ${float_char} STREQUAL "C") + set(${float_char}IMATCOPY_RN ../generic/zimatcopy_rn.c) + else () + set(${float_char}IMATCOPY_RN ../generic/imatcopy_rn.c) + endif () + endif () + + if (NOT DEFINED ${float_char}IMATCOPY_CT) + if (${float_char} STREQUAL "Z" OR ${float_char} STREQUAL "C") + set(${float_char}IMATCOPY_CT ../generic/zimatcopy_ct.c) + else () + set(${float_char}IMATCOPY_CT ../generic/imatcopy_ct.c) + endif () + endif () + + if (NOT DEFINED ${float_char}IMATCOPY_RT) + if (${float_char} STREQUAL "Z" OR ${float_char} STREQUAL "C") + set(${float_char}IMATCOPY_RT ../generic/zimatcopy_rt.c) + else () + set(${float_char}IMATCOPY_RT ../generic/imatcopy_rt.c) + endif () + endif () + + GenerateNamedObjects("${KERNELDIR}/${${float_char}IMATCOPY_CN}" "" "imatcopy_k_cn" false "" "" false ${float_type}) + GenerateNamedObjects("${KERNELDIR}/${${float_char}IMATCOPY_RN}" "ROWM" "imatcopy_k_rn" false "" "" false ${float_type}) + GenerateNamedObjects("${KERNELDIR}/${${float_char}IMATCOPY_CT}" "" "imatcopy_k_ct" false "" "" false ${float_type}) + GenerateNamedObjects("${KERNELDIR}/${${float_char}IMATCOPY_RT}" "ROWM" "imatcopy_k_rt" false "" "" false ${float_type}) + + + if (NOT DEFINED ${float_char}IMATCOPY_CNC) + if (${float_char} STREQUAL "Z" OR ${float_char} STREQUAL "C") + set(${float_char}IMATCOPY_CNC ../generic/zimatcopy_cnc.c) + endif () + endif () + if (NOT DEFINED ${float_char}IMATCOPY_RNC) + if (${float_char} STREQUAL "Z" OR ${float_char} STREQUAL "C") + set(${float_char}IMATCOPY_RNC ../generic/zimatcopy_rnc.c) + endif () + endif () + if (NOT DEFINED ${float_char}IMATCOPY_CTC) + if (${float_char} STREQUAL "Z" OR ${float_char} STREQUAL "C") + set(${float_char}IMATCOPY_CTC ../generic/zimatcopy_ctc.c) + endif () + endif () + if (NOT DEFINED ${float_char}IMATCOPY_RTC) + if (${float_char} STREQUAL "Z" OR ${float_char} STREQUAL "C") + set(${float_char}IMATCOPY_RTC ../generic/zimatcopy_rtc.c) + endif () + endif () + + if (${float_char} STREQUAL "Z" OR ${float_char} STREQUAL "C") + GenerateNamedObjects("${KERNELDIR}/${${float_char}IMATCOPY_CNC}" "CONJ" "imatcopy_k_cnc" false "" "" false ${float_type}) + GenerateNamedObjects("${KERNELDIR}/${${float_char}IMATCOPY_RNC}" "CONJ;ROWM" "imatcopy_k_rnc" false "" "" false ${float_type}) + GenerateNamedObjects("${KERNELDIR}/${${float_char}IMATCOPY_CTC}" "CONJ" "imatcopy_k_ctc" false "" "" false ${float_type}) + GenerateNamedObjects("${KERNELDIR}/${${float_char}IMATCOPY_RTC}" "CONJ;ROWM" "imatcopy_k_rtc" false "" "" false ${float_type}) + endif() + + #geadd + GenerateNamedObjects("${KERNELDIR}/${${float_char}GEADD_KERNEL}" "" "geadd_k" false "" "" false ${float_type}) + endforeach () + + if ((BUILD_DOUBLE OR BUILD_COMPLEX) AND NOT BUILD_SINGLE) + GenerateNamedObjects("${KERNELDIR}/${STRSMKERNEL_LN}" "UPPER;LN;TRSMKERNEL" "trsm_kernel_LN" false "" "" false "SINGLE") + GenerateNamedObjects("${KERNELDIR}/${STRSMKERNEL_LT}" "LT;TRSMKERNEL" "trsm_kernel_LT" false "" "" false "SINGLE") + GenerateNamedObjects("${KERNELDIR}/${STRSMKERNEL_RN}" "UPPER;RN;TRSMKERNEL" "trsm_kernel_RN" false "" "" false "SINGLE") + GenerateNamedObjects("${KERNELDIR}/${STRSMKERNEL_RT}" "RT;TRSMKERNEL" "trsm_kernel_RT" false "" "" false "SINGLE") + GenerateNamedObjects("generic/trsm_uncopy_${SGEMM_UNROLL_M}.c" "UNIT" "trsm_iunucopy" false "" "" false "SINGLE") + GenerateNamedObjects("generic/trsm_uncopy_${SGEMM_UNROLL_M}.c" "" "trsm_iunncopy" false "" "" false "SINGLE") + GenerateNamedObjects("generic/trsm_uncopy_${SGEMM_UNROLL_N}.c" "OUTER;UNIT" "trsm_ounucopy" false "" "" false "SINGLE") + GenerateNamedObjects("generic/trsm_uncopy_${SGEMM_UNROLL_N}.c" "OUTER" "trsm_ounncopy" false "" "" false "SINGLE") + + GenerateNamedObjects("generic/trsm_lncopy_${SGEMM_UNROLL_M}.c" "LOWER;UNIT" "trsm_ilnucopy" false "" "" false "SINGLE") + GenerateNamedObjects("generic/trsm_lncopy_${SGEMM_UNROLL_M}.c" "LOWER" "trsm_ilnncopy" false "" "" false "SINGLE") + GenerateNamedObjects("generic/trsm_lncopy_${SGEMM_UNROLL_N}.c" "OUTER;LOWER;UNIT" "trsm_olnucopy" false "" "" false "SINGLE") + GenerateNamedObjects("generic/trsm_lncopy_${SGEMM_UNROLL_N}.c" "OUTER;LOWER" "trsm_olnncopy" false "" "" false "SINGLE") + + GenerateNamedObjects("generic/trsm_utcopy_${SGEMM_UNROLL_M}.c" "UNIT" "trsm_iutucopy" false "" "" false "SINGLE") + GenerateNamedObjects("generic/trsm_utcopy_${SGEMM_UNROLL_M}.c" "" "trsm_iutncopy" false "" "" false "SINGLE") + GenerateNamedObjects("generic/trsm_utcopy_${SGEMM_UNROLL_N}.c" "OUTER;UNIT" "trsm_outucopy" false "" "" false "SINGLE") + GenerateNamedObjects("generic/trsm_utcopy_${SGEMM_UNROLL_N}.c" "OUTER" "trsm_outncopy" false "" "" false "SINGLE") + + GenerateNamedObjects("generic/trsm_ltcopy_${SGEMM_UNROLL_M}.c" "LOWER;UNIT" "trsm_iltucopy" false "" "" false "SINGLE") + GenerateNamedObjects("generic/trsm_ltcopy_${SGEMM_UNROLL_M}.c" "LOWER" "trsm_iltncopy" false "" "" false "SINGLE") + GenerateNamedObjects("generic/trsm_ltcopy_${SGEMM_UNROLL_N}.c" "OUTER;LOWER;UNIT" "trsm_oltucopy" false "" "" false "SINGLE") + GenerateNamedObjects("generic/trsm_ltcopy_${SGEMM_UNROLL_N}.c" "OUTER;LOWER" "trsm_oltncopy" false "" "" false "SINGLE") + if (SMALL_MATRIX_OPT) + if (NOT DEFINED SGEMM_SMALL_M_PERMIT) + set(SGEMM_SMALL_M_PERMIT ../generic/gemm_small_matrix_permit.c) + endif () + if (NOT DEFINED SGEMM_SMALL_K_NN) + set(SGEMM_SMALL_K_NN ../generic/gemm_small_matrix_kernel_nn.c) + endif () + if (NOT DEFINED SGEMM_SMALL_K_NT) + set(SGEMM_SMALL_K_NT ../generic/gemm_small_matrix_kernel_nt.c) + endif () + if (NOT DEFINED SGEMM_SMALL_K_TN) + set(SGEMM_SMALL_K_TN ../generic/gemm_small_matrix_kernel_tn.c) + endif () + if (NOT DEFINED SGEMM_SMALL_K_TT) + set(SGEMM_SMALL_K_TT ../generic/gemm_small_matrix_kernel_tt.c) + endif () + if (NOT DEFINED SGEMM_SMALL_K_B0_NN) + set(SGEMM_SMALL_K_B0_NN ../generic/gemm_small_matrix_kernel_nn.c) + endif () + if (NOT DEFINED SGEMM_SMALL_K_B0_NT) + set(SGEMM_SMALL_K_B0_NT ../generic/gemm_small_matrix_kernel_nt.c) + endif () + if (NOT DEFINED SGEMM_SMALL_K_B0_TN) + set(SGEMM_SMALL_K_B0_TN ../generic/gemm_small_matrix_kernel_tn.c) + endif () + if (NOT DEFINED SGEMM_SMALL_K_B0_TT) + set(SGEMM_SMALL_K_B0_TT ../generic/gemm_small_matrix_kernel_tt.c) + endif () + GenerateNamedObjects("${KERNELDIR}/${SGEMM_SMALL_M_PERMIT}" "" "gemm_small_matrix_permit" false "" "" false "SINGLE") + GenerateNamedObjects("${KERNELDIR}/${SGEMM_SMALL_K_NN}" "" "gemm_small_kernel_nn" false "" "" false "SINGLE") + GenerateNamedObjects("${KERNELDIR}/${SGEMM_SMALL_K_NT}" "" "gemm_small_kernel_nt" false "" "" false "SINGLE") + GenerateNamedObjects("${KERNELDIR}/${SGEMM_SMALL_K_TN}" "" "gemm_small_kernel_tn" false "" "" false "SINGLE") + GenerateNamedObjects("${KERNELDIR}/${SGEMM_SMALL_K_TT}" "" "gemm_small_kernel_tt" false "" "" false "SINGLE") + GenerateNamedObjects("${KERNELDIR}/${SGEMM_SMALL_K_B0_NN}" "B0" "gemm_small_kernel_b0_nn" false "" "" false "SINGLE") + GenerateNamedObjects("${KERNELDIR}/${SGEMM_SMALL_K_B0_NT}" "B0" "gemm_small_kernel_b0_nt" false "" "" false "SINGLE") + GenerateNamedObjects("${KERNELDIR}/${SGEMM_SMALL_K_B0_TN}" "B0" "gemm_small_kernel_b0_tn" false "" "" false "SINGLE") + GenerateNamedObjects("${KERNELDIR}/${SGEMM_SMALL_K_B0_TT}" "B0" "gemm_small_kernel_b0_tt" false "" "" false "SINGLE") + endif () + + endif () + + # Makefile.LA + if(NOT NO_LAPACK) + foreach (float_type ${FLOAT_TYPES}) + string(SUBSTRING ${float_type} 0 1 float_char) + if (${float_type} STREQUAL "BFLOAT16") + set (float_char "SB") + endif () + if (NOT DEFINED ${float_char}NEG_TCOPY) + if (${float_char} STREQUAL "Z" OR ${float_char} STREQUAL "C" OR ${float_char} STREQUAL "X") + set(${float_char}NEG_TCOPY ../generic/zneg_tcopy_${${float_char}GEMM_UNROLL_M}.c) + else () + set(${float_char}NEG_TCOPY ../generic/neg_tcopy_${${float_char}GEMM_UNROLL_M}.c) + endif () + endif () + + if (NOT DEFINED ${float_char}LASWP_NCOPY) + if (${float_char} STREQUAL "Z" OR ${float_char} STREQUAL "C" OR ${float_char} STREQUAL "X") + set(${float_char}LASWP_NCOPY ../generic/zlaswp_ncopy_${${float_char}GEMM_UNROLL_N}.c) + else () + set(${float_char}LASWP_NCOPY ../generic/laswp_ncopy_${${float_char}GEMM_UNROLL_N}.c) + endif () + endif () + GenerateNamedObjects("${KERNELDIR}/${${float_char}NEG_TCOPY}" "" "neg_tcopy" false "" "" false ${float_type}) + GenerateNamedObjects("${KERNELDIR}/${${float_char}LASWP_NCOPY}" "" "laswp_ncopy" false "" "" false ${float_type}) + endforeach() + if (BUILD_COMPLEX AND NOT BUILD_SINGLE) + if (NOT DEFINED SNEG_TCOPY) + set(SNEG_TCOPY ../generic/neg_tcopy_${SGEMM_UNROLL_M}.c) + endif () + + if (NOT DEFINED SLASWP_NCOPY) + set(SLASWP_NCOPY ../generic/laswp_ncopy_${SGEMM_UNROLL_N}.c) + endif () + GenerateNamedObjects("${KERNELDIR}/${SNEG_TCOPY}" "" "neg_tcopy" false "" "" false "SINGLE") + GenerateNamedObjects("${KERNELDIR}/${SLASWP_NCOPY}" "" "laswp_ncopy" false "" "" false "SINGLE") + endif() + if (BUILD_COMPLEX16 AND NOT BUILD_DOUBLE) + if (NOT DEFINED DNEG_TCOPY) + set(DNEG_TCOPY ../generic/neg_tcopy_${DGEMM_UNROLL_M}.c) + endif () + + if (NOT DEFINED DLASWP_NCOPY) + set(DLASWP_NCOPY ../generic/laswp_ncopy_${DGEMM_UNROLL_N}.c) + endif () + GenerateNamedObjects("${KERNELDIR}/${DNEG_TCOPY}" "" "neg_tcopy" false "" "" false "DOUBLE") + GenerateNamedObjects("${KERNELDIR}/${DLASWP_NCOPY}" "" "laswp_ncopy" false "" "" false "DOUBLE") + endif() + endif() + + if (${DYNAMIC_ARCH}) + set(SETPARAM_TARGET_DIR ${CMAKE_CURRENT_BINARY_DIR}${CMAKE_FILES_DIRECTORY}) + file(READ ${CMAKE_CURRENT_SOURCE_DIR}/setparam-ref.c SETPARAM_REF_CONTENTS) + string(REPLACE "TS" "${TSUFFIX}" SETPARAM_REF_CONTENTS_NEW "${SETPARAM_REF_CONTENTS}") + file(WRITE ${SETPARAM_TARGET_DIR}/setparam${TSUFFIX}.tmp "${SETPARAM_REF_CONTENTS_NEW}") + configure_file(${SETPARAM_TARGET_DIR}/setparam${TSUFFIX}.tmp ${SETPARAM_TARGET_DIR}/setparam${TSUFFIX}.c COPYONLY) + set(OPENBLAS_SRC ${OPENBLAS_SRC} ${SETPARAM_TARGET_DIR}/setparam${TSUFFIX}.c) + file(REMOVE ${SETPARAM_TARGET_DIR}/setparam${TSUFFIX}.tmp) + + set(KERNEL_TSUFFIX_CONTENTS "") + foreach (KERNEL_INTERFACE_H ${KERNEL_INTERFACE}) + file(READ ${PROJECT_SOURCE_DIR}/${KERNEL_INTERFACE_H} KERNEL_INTERFACE_H_CONTENTS) + string(REGEX REPLACE " *\\(" "${TSUFFIX}(" KERNEL_INTERFACE_H_CONTENTS_NEW "${KERNEL_INTERFACE_H_CONTENTS}") + set(KERNEL_TSUFFIX_CONTENTS "${KERNEL_TSUFFIX_CONTENTS}\n${KERNEL_INTERFACE_H_CONTENTS_NEW}") + endforeach() + file(WRITE ${SETPARAM_TARGET_DIR}/kernel${TSUFFIX}.tmp "${KERNEL_TSUFFIX_CONTENTS}") + configure_file(${SETPARAM_TARGET_DIR}/kernel${TSUFFIX}.tmp ${SETPARAM_TARGET_DIR}/kernel${TSUFFIX}.h COPYONLY) + file(REMOVE ${SETPARAM_TARGET_DIR}/kernel${TSUFFIX}.tmp) + + foreach (float_type ${FLOAT_TYPES}) + # a bit of metaprogramming here to pull out the appropriate KERNEL var + string(SUBSTRING ${float_type} 0 1 float_char) + if (${float_type} STREQUAL "BFLOAT16") + set (float_char "SB") + endif () + GenerateNamedObjects("generic/neg_tcopy_${${float_char}GEMM_UNROLL_M}.c" "" "neg_tcopy" false "" ${TSUFFIX} false ${float_type}) + GenerateNamedObjects("generic/laswp_ncopy_${${float_char}GEMM_UNROLL_N}.c" "" "laswp_ncopy" false "" ${TSUFFIX} false ${float_type}) + endforeach () + + if (BUILD_COMPLEX AND NOT BUILD_SINGLE) + GenerateNamedObjects("${KERNELDIR}/${SGEMVNKERNEL}" "" "gemv_n" false "" "" false "SINGLE") + GenerateNamedObjects("${KERNELDIR}/${SGEMVTKERNEL}" "TRANS" "gemv_t" false "" "" false "SINGLE") + GenerateNamedObjects("generic/neg_tcopy_${SGEMM_UNROLL_M}.c" "" "neg_tcopy" false "" ${TSUFFIX} false "SINGLE") + GenerateNamedObjects("generic/laswp_ncopy_${SGEMM_UNROLL_N}.c" "" "laswp_ncopy" false "" ${TSUFFIX} false "SINGLE") + endif () + if (BUILD_DOUBLE AND NOT BUILD_SINGLE) + GenerateNamedObjects("generic/neg_tcopy_${SGEMM_UNROLL_M}.c" "" "neg_tcopy" false "" ${TSUFFIX} false "SINGLE") + GenerateNamedObjects("generic/laswp_ncopy_${SGEMM_UNROLL_N}.c" "" "laswp_ncopy" false "" ${TSUFFIX} false "SINGLE") + GenerateNamedObjects("${KERNELDIR}/${STRSMKERNEL_LN}" "UPPER;LN;TRSMKERNEL" "trsm_kernel_LN" false "" ${TSUFFIX} false "SINGLE") + GenerateNamedObjects("${KERNELDIR}/${STRSMKERNEL_LT}" "LT;TRSMKERNEL" "trsm_kernel_LT" false "" ${TSUFFIX} false "SINGLE") + GenerateNamedObjects("${KERNELDIR}/${STRSMKERNEL_RN}" "UPPER;RN;TRSMKERNEL" "trsm_kernel_RN" false "" ${TSUFFIX} false "SINGLE") + GenerateNamedObjects("${KERNELDIR}/${STRSMKERNEL_RT}" "RT;TRSMKERNEL" "trsm_kernel_RT" false "" ${TSUFFIX} false "SINGLE") + GenerateNamedObjects("generic/trsm_uncopy_${SGEMM_UNROLL_M}.c" "UNIT" "trsm_iunucopy" false "" ${TSUFFIX} false "SINGLE") + GenerateNamedObjects("generic/trsm_uncopy_${SGEMM_UNROLL_M}.c" "" "trsm_iunncopy" false "" ${TSUFFIX} false "SINGLE") + GenerateNamedObjects("generic/trsm_uncopy_${SGEMM_UNROLL_N}.c" "OUTER;UNIT" "trsm_ounucopy" false "" ${TSUFFIX} false "SINGLE") + GenerateNamedObjects("generic/trsm_uncopy_${SGEMM_UNROLL_N}.c" "OUTER" "trsm_ounncopy" false "" ${TSUFFIX} false "SINGLE") + + GenerateNamedObjects("generic/trsm_lncopy_${SGEMM_UNROLL_M}.c" "LOWER;UNIT" "trsm_ilnucopy" false "" ${TSUFFIX} false "SINGLE") + GenerateNamedObjects("generic/trsm_lncopy_${SGEMM_UNROLL_M}.c" "LOWER" "trsm_ilnncopy" false "" ${TSUFFIX} false "SINGLE") + GenerateNamedObjects("generic/trsm_lncopy_${SGEMM_UNROLL_N}.c" "OUTER;LOWER;UNIT" "trsm_olnucopy" false "" ${TSUFFIX} false "SINGLE") + GenerateNamedObjects("generic/trsm_lncopy_${SGEMM_UNROLL_N}.c" "OUTER;LOWER" "trsm_olnncopy" false "" ${TSUFFIX} false "SINGLE") + + GenerateNamedObjects("generic/trsm_utcopy_${SGEMM_UNROLL_M}.c" "UNIT" "trsm_iutucopy" false "" ${TSUFFIX} false "SINGLE") + GenerateNamedObjects("generic/trsm_utcopy_${SGEMM_UNROLL_M}.c" "" "trsm_iutncopy" false "" ${TSUFFIX} false "SINGLE") + GenerateNamedObjects("generic/trsm_utcopy_${SGEMM_UNROLL_N}.c" "OUTER;UNIT" "trsm_outucopy" false "" ${TSUFFIX} false "SINGLE") + GenerateNamedObjects("generic/trsm_utcopy_${SGEMM_UNROLL_N}.c" "OUTER" "trsm_outncopy" false "" ${TSUFFIX} false "SINGLE") + + GenerateNamedObjects("generic/trsm_ltcopy_${SGEMM_UNROLL_M}.c" "LOWER;UNIT" "trsm_iltucopy" false "" ${TSUFFIX} false "SINGLE") + GenerateNamedObjects("generic/trsm_ltcopy_${SGEMM_UNROLL_M}.c" "LOWER" "trsm_iltncopy" false "" ${TSUFFIX} false "SINGLE") + GenerateNamedObjects("generic/trsm_ltcopy_${SGEMM_UNROLL_N}.c" "OUTER;LOWER;UNIT" "trsm_oltucopy" false "" ${TSUFFIX} false "SINGLE") + GenerateNamedObjects("generic/trsm_ltcopy_${SGEMM_UNROLL_N}.c" "OUTER;LOWER" "trsm_oltncopy" false "" ${TSUFFIX} false "SINGLE") + + if (SGEMMINCOPY) + GenerateNamedObjects("${KERNELDIR}/${SGEMMINCOPY}" "SINGLE" "${SGEMMINCOPYOBJ}" false "" "" true "SINGLE") + endif () + if (SGEMMITCOPY) + GenerateNamedObjects("${KERNELDIR}/${SGEMMITCOPY}" "SINGLE" "${SGEMMITCOPYOBJ}" false "" "" true "SINGLE") + endif () + if (SGEMMONCOPY) + GenerateNamedObjects("${KERNELDIR}/${SGEMMONCOPY}" "SINGLE" "${SGEMMONCOPYOBJ}" false "" "" true "SINGLE") + endif () + if (SGEMMOTCOPY) + GenerateNamedObjects("${KERNELDIR}/${SGEMMOTCOPY}" "SINGLE" "${SGEMMOTCOPYOBJ}" false "" "" true "SINGLE") + endif () + GenerateNamedObjects("${KERNELDIR}/${SGEMVNKERNEL}" "" "gemv_n" false "" "" false "SINGLE") + GenerateNamedObjects("${KERNELDIR}/${SGEMVTKERNEL}" "TRANS" "gemv_t" false "" "" false "SINGLE") + endif () + + if (BUILD_COMPLEX16 AND NOT BUILD_DOUBLE) + GenerateNamedObjects("${KERNELDIR}/${DAMAXKERNEL}" "USE_ABS" "amax_k" false "" "" false "DOUBLE") + GenerateNamedObjects("${KERNELDIR}/${DAMINKERNEL}" "USE_ABS;USE_MIN" "amin_k" false "" "" false "DOUBLE") + GenerateNamedObjects("${KERNELDIR}/${DASUMKERNEL}" "" "asum_k" false "" "" false "DOUBLE") + if (DEFINED DMAXKERNEL) + GenerateNamedObjects("${KERNELDIR}/${DMAXKERNEL}" "" "max_k" false "" "" false "DOUBLE") + endif () + if (DEFINED DMINKERNEL) + GenerateNamedObjects("${KERNELDIR}/${DMINKERNEL}" "USE_MIN" "min_k" false "" "" false "DOUBLE") + endif () + if (DEFINED IDMINKERNEL) + GenerateNamedObjects("${KERNELDIR}/${IDMINKERNEL}" "USE_MIN" "i*min_k" false "" "" false "DOUBLE") + endif () + if (DEFINED IDMAXKERNEL) + GenerateNamedObjects("${KERNELDIR}/${IDMAXKERNEL}" "" "i*max_k" false "" "" false "DOUBLE") + endif () + GenerateNamedObjects("${KERNELDIR}/${IDAMAXKERNEL}" "USE_ABS" "i*amax_k" false "" "" false "DOUBLE") + GenerateNamedObjects("${KERNELDIR}/${IDAMINKERNEL}" "USE_ABS;USE_MIN" "i*amin_k" false "" "" false "DOUBLE") + GenerateNamedObjects("${KERNELDIR}/${DSCALKERNEL}" "" "scal_k" false "" "" false "DOUBLE") + GenerateNamedObjects("${KERNELDIR}/${DCOPYKERNEL}" "C_INTERFACE" "copy_k" false "" "" false "DOUBLE") + GenerateNamedObjects("${KERNELDIR}/${DNRM2KERNEL}" "" "nrm2_k" false "" "" false "DOUBLE") + GenerateNamedObjects("${KERNELDIR}/${DROTKERNEL}" "" "rot_k" false "" "" false "DOUBLE") + GenerateNamedObjects("${KERNELDIR}/${DROTMKERNEL}" "" "rotm_k" false "" "" false "DOUBLE") + GenerateNamedObjects("${KERNELDIR}/${DDOTKERNEL}" "" "dot_k" false "" "" false "DOUBLE") + GenerateNamedObjects("${KERNELDIR}/${DSWAPKERNEL}" "" "swap_k" false "" "" false "DOUBLE") + GenerateNamedObjects("${KERNELDIR}/${DAXPYKERNEL}" "" "axpy_k" false "" "" false "DOUBLE") + + GenerateNamedObjects("${KERNELDIR}/${DGEMVNKERNEL}" "" "gemv_n" false "" "" false "DOUBLE") + GenerateNamedObjects("${KERNELDIR}/${DGEMVTKERNEL}" "TRANS" "gemv_t" false "" "" false "DOUBLE") + GenerateNamedObjects("${KERNELDIR}/${DGEMMKERNEL}" "" "gemm_kernel" false "" "" false "DOUBLE") + if (DGEMMINCOPY) + GenerateNamedObjects("${KERNELDIR}/${DGEMMINCOPY}" "DOUBLE" "${DGEMMINCOPYOBJ}" false "" "" true "DOUBLE") + endif () + if (DGEMMITCOPY) + GenerateNamedObjects("${KERNELDIR}/${DGEMMITCOPY}" "DOUBLE" "${DGEMMITCOPYOBJ}" false "" "" true "DOUBLE") + endif () + if (DGEMMONCOPY) + GenerateNamedObjects("${KERNELDIR}/${DGEMMONCOPY}" "DOUBLE" "${DGEMMONCOPYOBJ}" false "" "" true "DOUBLE") + endif () + if (DGEMMOTCOPY) + GenerateNamedObjects("${KERNELDIR}/${DGEMMOTCOPY}" "DOUBLE" "${DGEMMOTCOPYOBJ}" false "" "" true "DOUBLE") + endif () + GenerateNamedObjects("${KERNELDIR}/${DGEMM_BETA}" "" "gemm_beta" false "" "" false "DOUBLE") + + GenerateNamedObjects("generic/neg_tcopy_${DGEMM_UNROLL_M}.c" "" "neg_tcopy" false "" ${TSUFFIX} false "DOUBLE") + GenerateNamedObjects("generic/laswp_ncopy_${DGEMM_UNROLL_N}.c" "" "laswp_ncopy" false "" ${TSUFFIX} false "DOUBLE") + if (SMALL_MATRIX_OPT) + if (NOT DEFINED DGEMM_SMALL_M_PERMIT) + set(DGEMM_SMALL_M_PERMIT ../generic/gemm_small_matrix_permit.c) + endif () + if (NOT DEFINED DGEMM_SMALL_K_NN) + set(DGEMM_SMALL_K_NN ../generic/gemm_small_matrix_kernel_nn.c) + endif () + if (NOT DEFINED DGEMM_SMALL_K_NT) + set(DGEMM_SMALL_K_NT ../generic/gemm_small_matrix_kernel_nt.c) + endif () + if (NOT DEFINED DGEMM_SMALL_K_TN) + set(DGEMM_SMALL_K_TN ../generic/gemm_small_matrix_kernel_tn.c) + endif () + if (NOT DEFINED DGEMM_SMALL_K_TT) + set(DGEMM_SMALL_K_TT ../generic/gemm_small_matrix_kernel_tt.c) + endif () + if (NOT DEFINED DGEMM_SMALL_K_B0_NN) + set(DGEMM_SMALL_K_B0_NN ../generic/gemm_small_matrix_kernel_nn.c) + endif () + if (NOT DEFINED DGEMM_SMALL_K_B0_NT) + set(DGEMM_SMALL_K_B0_NT ../generic/gemm_small_matrix_kernel_nt.c) + endif () + if (NOT DEFINED DGEMM_SMALL_K_B0_TN) + set(DGEMM_SMALL_K_B0_TN ../generic/gemm_small_matrix_kernel_tn.c) + endif () + if (NOT DEFINED DGEMM_SMALL_K_B0_TT) + set(DGEMM_SMALL_K_B0_TT ../generic/gemm_small_matrix_kernel_tt.c) + endif () + GenerateNamedObjects("${KERNELDIR}/${DGEMM_SMALL_M_PERMIT}" "" "gemm_small_matrix_permit" false "" "" false "DOUBLE") + GenerateNamedObjects("${KERNELDIR}/${DGEMM_SMALL_K_NN}" "NN" "gemm_small_kernel_nn" false "" "" false "DOUBLE") + GenerateNamedObjects("${KERNELDIR}/${DGEMM_SMALL_K_NN}" "NR" "gemm_small_kernel_nr" false "" "" false "DOUBLE") + GenerateNamedObjects("${KERNELDIR}/${DGEMM_SMALL_K_NN}" "RN" "gemm_small_kernel_rn" false "" "" false "DOUBLE") + GenerateNamedObjects("${KERNELDIR}/${DGEMM_SMALL_K_NN}" "RR" "gemm_small_kernel_rr" false "" "" false "DOUBLE") + GenerateNamedObjects("${KERNELDIR}/${DGEMM_SMALL_K_NT}" "NT" "gemm_small_kernel_nt" false "" "" false "DOUBLE") + GenerateNamedObjects("${KERNELDIR}/${DGEMM_SMALL_K_NT}" "NC" "gemm_small_kernel_nc" false "" "" false "DOUBLE") + GenerateNamedObjects("${KERNELDIR}/${DGEMM_SMALL_K_NT}" "RT" "gemm_small_kernel_rt" false "" "" false "DOUBLE") + GenerateNamedObjects("${KERNELDIR}/${DGEMM_SMALL_K_NT}" "RC" "gemm_small_kernel_rc" false "" "" false "DOUBLE") + GenerateNamedObjects("${KERNELDIR}/${DGEMM_SMALL_K_TN}" "TN" "gemm_small_kernel_tn" false "" "" false "DOUBLE") + GenerateNamedObjects("${KERNELDIR}/${DGEMM_SMALL_K_TN}" "TR" "gemm_small_kernel_tr" false "" "" false "DOUBLE") + GenerateNamedObjects("${KERNELDIR}/${DGEMM_SMALL_K_TN}" "CN" "gemm_small_kernel_cn" false "" "" false "DOUBLE") + GenerateNamedObjects("${KERNELDIR}/${DGEMM_SMALL_K_TN}" "CR" "gemm_small_kernel_cr" false "" "" false "DOUBLE") + GenerateNamedObjects("${KERNELDIR}/${DGEMM_SMALL_K_TT}" "TT" "gemm_small_kernel_tt" false "" "" false "DOUBLE") + GenerateNamedObjects("${KERNELDIR}/${DGEMM_SMALL_K_TT}" "TC" "gemm_small_kernel_tc" false "" "" false "DOUBLE") + GenerateNamedObjects("${KERNELDIR}/${DGEMM_SMALL_K_TT}" "CT" "gemm_small_kernel_ct" false "" "" false "DOUBLE") + GenerateNamedObjects("${KERNELDIR}/${DGEMM_SMALL_K_TT}" "CC" "gemm_small_kernel_cc" false "" "" false "DOUBLE") + GenerateNamedObjects("${KERNELDIR}/${DGEMM_SMALL_K_B0_NN}" "NN;B0" "gemm_small_kernel_b0_nn" false "" "" false "DOUBLE") + GenerateNamedObjects("${KERNELDIR}/${DGEMM_SMALL_K_B0_NN}" "NR;B0" "gemm_small_kernel_b0_nr" false "" "" false "DOUBLE") + GenerateNamedObjects("${KERNELDIR}/${DGEMM_SMALL_K_B0_NN}" "RN;B0" "gemm_small_kernel_b0_rn" false "" "" false "DOUBLE") + GenerateNamedObjects("${KERNELDIR}/${DGEMM_SMALL_K_B0_NN}" "RR;B0" "gemm_small_kernel_b0_rr" false "" "" false "DOUBLE") + GenerateNamedObjects("${KERNELDIR}/${DGEMM_SMALL_K_B0_NT}" "NT;B0" "gemm_small_kernel_b0_nt" false "" "" false "DOUBLE") + GenerateNamedObjects("${KERNELDIR}/${DGEMM_SMALL_K_B0_NT}" "NC;B0" "gemm_small_kernel_b0_nc" false "" "" false "DOUBLE") + GenerateNamedObjects("${KERNELDIR}/${DGEMM_SMALL_K_B0_NT}" "RT;B0" "gemm_small_kernel_b0_rt" false "" "" false "DOUBLE") + GenerateNamedObjects("${KERNELDIR}/${DGEMM_SMALL_K_B0_NT}" "RC;B0" "gemm_small_kernel_b0_rc" false "" "" false "DOUBLE") + GenerateNamedObjects("${KERNELDIR}/${DGEMM_SMALL_K_B0_TN}" "TN;B0" "gemm_small_kernel_b0_tn" false "" "" false "DOUBLE") + GenerateNamedObjects("${KERNELDIR}/${DGEMM_SMALL_K_B0_TN}" "TR;B0" "gemm_small_kernel_b0_tr" false "" "" false "DOUBLE") + GenerateNamedObjects("${KERNELDIR}/${DGEMM_SMALL_K_B0_TN}" "CN;B0" "gemm_small_kernel_b0_cn" false "" "" false "DOUBLE") + GenerateNamedObjects("${KERNELDIR}/${DGEMM_SMALL_K_B0_TN}" "CR;B0" "gemm_small_kernel_b0_cr" false "" "" false "DOUBLE") + GenerateNamedObjects("${KERNELDIR}/${DGEMM_SMALL_K_B0_TT}" "TT;B0" "gemm_small_kernel_b0_tt" false "" "" false "DOUBLE") + GenerateNamedObjects("${KERNELDIR}/${DGEMM_SMALL_K_B0_TT}" "TC;B0" "gemm_small_kernel_b0_tc" false "" "" false "DOUBLE") + GenerateNamedObjects("${KERNELDIR}/${DGEMM_SMALL_K_B0_TT}" "CT;B0" "gemm_small_kernel_b0_ct" false "" "" false "DOUBLE") + GenerateNamedObjects("${KERNELDIR}/${DGEMM_SMALL_K_B0_TT}" "CC;B0" "gemm_small_kernel_b0_cc" false "" "" false "DOUBLE") + endif () + endif () + if (BUILD_COMPLEX16 AND NOT BUILD_SINGLE) + GenerateNamedObjects("${KERNELDIR}/${SSCALKERNEL}" "" "scal_k" false "" "" false "SINGLE") + endif() + if (BUILD_COMPLEX160 AND NOT BUILD_COMPLEX) + GenerateNamedObjects("${KERNELDIR}/${CAMAXKERNEL}" "USE_ABS" "amax_k" false "" "" false "COMPLEX") + GenerateNamedObjects("${KERNELDIR}/${CAMINKERNEL}" "USE_ABS;USE_MIN" "amin_k" false "" "" false "COMPLEX") + if (DEFINED CMAXKERNEL) + GenerateNamedObjects("${KERNELDIR}/${CMAXKERNEL}" "" "max_k" false "" "" false "COMPLEX") + endif () + if (DEFINED CMINKERNEL) + GenerateNamedObjects("${KERNELDIR}/${CMINKERNEL}" "USE_MIN" "min_k" false "" "" false "COMPLEX") + endif () + GenerateNamedObjects("${KERNELDIR}/${ICAMAXKERNEL}" "USE_ABS" "i*amax_k" false "" "" false "COMPLEX") + GenerateNamedObjects("${KERNELDIR}/${ICAMINKERNEL}" "USE_ABS;USE_MIN" "i*amin_k" false "" "" false "COMPLEX") + if (DEFINED ICMAXKERNEL) + GenerateNamedObjects("${KERNELDIR}/${ICMAXKERNEL}" "" "i*max_k" false "" "" false "COMPLEX") + endif () + if (DEFINED ICMINKERNEL) + GenerateNamedObjects("${KERNELDIR}/${ICMINKERNEL}" "USE_MIN" "i*min_k" false "" "" false "COMPLEX") + endif () + GenerateNamedObjects("${KERNELDIR}/${CASUMKERNEL}" "" "asum_k" false "" "" false "COMPLEX") + GenerateNamedObjects("${KERNELDIR}/${CAXPYKERNEL}" "" "axpy_k" false "" "" false "COMPLEX") + GenerateNamedObjects("${KERNELDIR}/${CCOPYKERNEL}" "C_INTERFACE" "copy_k" false "" "" false "COMPLEX") + GenerateNamedObjects("${KERNELDIR}/${CNRM2KERNEL}" "" "nrm2_k" false "" "" false "COMPLEX") + GenerateNamedObjects("${KERNELDIR}/${CROTKERNEL}" "" "rot_k" false "" "" false "COMPLEX") + GenerateNamedObjects("${KERNELDIR}/${CSCALKERNEL}" "" "scal_k" false "" "" false "COMPLEX") + GenerateNamedObjects("${KERNELDIR}/${CSWAPKERNEL}" "" "swap_k" false "" "" false "COMPLEX") + GenerateNamedObjects("${KERNELDIR}/${CAXPBYKERNEL}" "" "axpby_k" false "" "" false "COMPLEX") + GenerateNamedObjects("${KERNELDIR}/${CSUMKERNEL}" "" "sum_k" false "" "" false "COMPLEX") + GenerateNamedObjects("${KERNELDIR}/${CAXPYKERNEL}" "CONJ" "axpyc_k" false "" "" false "COMPLEX") + GenerateNamedObjects("${KERNELDIR}/${CDOTKERNEL}" "" "dotu_k" false "" "" false "COMPLEX") + GenerateNamedObjects("${KERNELDIR}/${CDOTKERNEL}" "CONJ" "dotc_k" false "" "" false "COMPLEX") + GenerateNamedObjects("${KERNELDIR}/${CGEMVNKERNEL}" "" "gemv_n" false "" "" false "COMPLEX") + GenerateNamedObjects("${KERNELDIR}/${CGEMVTKERNEL}" "TRANSA" "gemv_t" false "" "" false "COMPLEX") + GenerateNamedObjects("${KERNELDIR}/${CGEMVNKERNEL}" "CONJ" "gemv_r" false "" "" false "COMPLEX") + GenerateNamedObjects("${KERNELDIR}/${CGEMVTKERNEL}" "CONJ;TRANSA" "gemv_c" false "" "" false "COMPLEX") + GenerateNamedObjects("${KERNELDIR}/${CGEMVNKERNEL}" "XCONJ" "gemv_o" false "" "" false "COMPLEX") + GenerateNamedObjects("${KERNELDIR}/${CGEMVTKERNEL}" "XCONJ;TRANSA" "gemv_u" false "" "" false "COMPLEX") + GenerateNamedObjects("${KERNELDIR}/${CGEMVNKERNEL}" "XCONJ;CONJ" "gemv_s" false "" "" false "COMPLEX") + GenerateNamedObjects("${KERNELDIR}/${CGEMVTKERNEL}" "XCONJ;CONJ;TRANSA" "gemv_d" false "" "" false "COMPLEX") + GenerateNamedObjects("${KERNELDIR}/${CTRSMKERNEL_LN}" "UPPER;LN;TRSMKERNEL;CONJ" "trsm_kernel_LR" false "" "" false "COMPLEX") + GenerateNamedObjects("${KERNELDIR}/${CTRSMKERNEL_LT}" "LT;TRSMKERNEL;CONJ" "trsm_kernel_LC" false "" "" false "COMPLEX") + GenerateNamedObjects("${KERNELDIR}/${CTRSMKERNEL_RN}" "UPPER;RN;TRSMKERNEL;CONJ" "trsm_kernel_RR" false "" "" false "COMPLEX") + GenerateNamedObjects("${KERNELDIR}/${CTRSMKERNEL_RT}" "RT;TRSMKERNEL;CONJ" "trsm_kernel_RC" false "" "" false "COMPLEX") + GenerateNamedObjects("${KERNELDIR}/${CTRSMKERNEL_LN}" "UPPER;LN;TRSMKERNEL" "trsm_kernel_LN" false "" "" false "COMPLEX") + GenerateNamedObjects("${KERNELDIR}/${CTRSMKERNEL_LT}" "LT;TRSMKERNEL" "trsm_kernel_LT" false "" "" false "COMPLEX") + GenerateNamedObjects("${KERNELDIR}/${CTRSMKERNEL_RN}" "UPPER;RN;TRSMKERNEL" "trsm_kernel_RN" false "" "" false "COMPLEX") + GenerateNamedObjects("${KERNELDIR}/${CTRSMKERNEL_RT}" "RT;TRSMKERNEL" "trsm_kernel_RT" false "" "" false "COMPLEX") + GenerateNamedObjects("${KERNELDIR}/${CGEMMKERNEL}" "NN" "gemm_kernel_n" false "" "" false "COMPLEX") + GenerateNamedObjects("${KERNELDIR}/${CGEMMKERNEL}" "CN" "gemm_kernel_l" false "" "" false "COMPLEX") + GenerateNamedObjects("${KERNELDIR}/${CGEMMKERNEL}" "NC" "gemm_kernel_r" false "" "" false "COMPLEX") + GenerateNamedObjects("${KERNELDIR}/${CGEMMKERNEL}" "CC" "gemm_kernel_b" false "" "" false "COMPLEX") + if (CGEMMINCOPY) + GenerateNamedObjects("${KERNELDIR}/${CGEMMINCOPY}" "COMPLEX" "${CGEMMINCOPYOBJ}" false "" "" true "COMPLEX") + endif () + + if (CGEMMITCOPY) + GenerateNamedObjects("${KERNELDIR}/${CGEMMITCOPY}" "COMPLEX" "${CGEMMITCOPYOBJ}" false "" "" true "COMPLEX") + endif () + + if (CGEMMONCOPY) + GenerateNamedObjects("${KERNELDIR}/${CGEMMONCOPY}" "COMPLEX" "${CGEMMONCOPYOBJ}" false "" "" true "COMPLEX") + endif () + + if (CGEMMOTCOPY) + GenerateNamedObjects("${KERNELDIR}/${CGEMMOTCOPY}" "COMPLEX" "${CGEMMOTCOPYOBJ}" false "" "" true "COMPLEX") + endif () + GenerateNamedObjects("${KERNELDIR}/${CGEMM_BETA}" "" "gemm_beta" false "" "" false "COMPLEX") + if (SMALL_MATRIX_OPT) + if (NOT DEFINED CGEMM_SMALL_M_PERMIT) + set(CGEMM_SMALL_M_PERMIT ../generic/zgemm_small_matrix_permit) + endif () + if (NOT DEFINED CGEMM_SMALL_K_NN) + set(CGEMM_SMALL_K_NN ../generic/zgemm_small_matrix_kernel_nn) + endif () + if (NOT DEFINED CGEMM_SMALL_K_NT) + set(CGEMM_SMALL_K_NT ../generic/zgemm_small_matrix_kernel_nt) + endif () + if (NOT DEFINED CGEMM_SMALL_K_TN) + set(CGEMM_SMALL_K_TN ../generic/zgemm_small_matrix_kernel_tn) + endif () + if (NOT DEFINED CGEMM_SMALL_K_TT) + set(CGEMM_SMALL_K_TT ../generic/zgemm_small_matrix_kernel_tt) + endif () + if (NOT DEFINED CGEMM_SMALL_K_B0_NN) + set(CGEMM_SMALL_K_B0_NN ../generic/zgemm_small_matrix_kernel_nn) + endif () + if (NOT DEFINED CGEMM_SMALL_K_B0_NT) + set(CGEMM_SMALL_K_B0_NT ../generic/zgemm_small_matrix_kernel_nt) + endif () + if (NOT DEFINED CGEMM_SMALL_K_B0_TN) + set(CGEMM_SMALL_K_B0_TN ../generic/zgemm_small_matrix_kernel_tn) + endif () + if (NOT DEFINED CGEMM_SMALL_K_B0_TT) + set(CGEMM_SMALL_K_B0_TT ../generic/zgemm_small_matrix_kernel_tt) + endif () + GenerateNamedObjects("${KERNELDIR}/${CGEMM_SMALL_M_PERMIT}.c" "" "gemm_small_matrix_permit" false "" "" false "COMPLEX") + GenerateNamedObjects("${KERNELDIR}/${CGEMM_SMALL_K_NN}.c" "NN" "gemm_small_kernel_nn" false "" "" false "COMPLEX") + GenerateNamedObjects("${KERNELDIR}/${CGEMM_SMALL_K_NN}.c" "NR" "gemm_small_kernel_nr" false "" "" false "COMPLEX") + GenerateNamedObjects("${KERNELDIR}/${CGEMM_SMALL_K_NN}.c" "RN" "gemm_small_kernel_rn" false "" "" false "COMPLEX") + GenerateNamedObjects("${KERNELDIR}/${CGEMM_SMALL_K_NN}.c" "RR" "gemm_small_kernel_rr" false "" "" false "COMPLEX") + GenerateNamedObjects("${KERNELDIR}/${CGEMM_SMALL_K_NT}.c" "NT" "gemm_small_kernel_nt" false "" "" false "COMPLEX") + GenerateNamedObjects("${KERNELDIR}/${CGEMM_SMALL_K_NT}.c" "NC" "gemm_small_kernel_nc" false "" "" false "COMPLEX") + GenerateNamedObjects("${KERNELDIR}/${CGEMM_SMALL_K_NT}.c" "RT" "gemm_small_kernel_rt" false "" "" false "COMPLEX") + GenerateNamedObjects("${KERNELDIR}/${CGEMM_SMALL_K_NT}.c" "RC" "gemm_small_kernel_rc" false "" "" false "COMPLEX") + GenerateNamedObjects("${KERNELDIR}/${CGEMM_SMALL_K_TN}.c" "TN" "gemm_small_kernel_tn" false "" "" false "COMPLEX") + GenerateNamedObjects("${KERNELDIR}/${CGEMM_SMALL_K_TN}.c" "TR" "gemm_small_kernel_tr" false "" "" false "COMPLEX") + GenerateNamedObjects("${KERNELDIR}/${CGEMM_SMALL_K_TN}.c" "CN" "gemm_small_kernel_cn" false "" "" false "COMPLEX") + GenerateNamedObjects("${KERNELDIR}/${CGEMM_SMALL_K_TN}.c" "CR" "gemm_small_kernel_cr" false "" "" false "COMPLEX") + GenerateNamedObjects("${KERNELDIR}/${CGEMM_SMALL_K_TT}.c" "TT" "gemm_small_kernel_tt" false "" "" false "COMPLEX") + GenerateNamedObjects("${KERNELDIR}/${CGEMM_SMALL_K_TT}.c" "TC" "gemm_small_kernel_tc" false "" "" false "COMPLEX") + GenerateNamedObjects("${KERNELDIR}/${CGEMM_SMALL_K_TT}.c" "CT" "gemm_small_kernel_ct" false "" "" false "COMPLEX") + GenerateNamedObjects("${KERNELDIR}/${CGEMM_SMALL_K_TT}.c" "CC" "gemm_small_kernel_cc" false "" "" false "COMPLEX") + GenerateNamedObjects("${KERNELDIR}/${CGEMM_SMALL_K_B0_NN}.c" "NN;B0" "gemm_small_kernel_b0_nn" false "" "" false "COMPLEX") + GenerateNamedObjects("${KERNELDIR}/${CGEMM_SMALL_K_B0_NN}.c" "NR;B0" "gemm_small_kernel_b0_nr" false "" "" false "COMPLEX") + GenerateNamedObjects("${KERNELDIR}/${CGEMM_SMALL_K_B0_NN}.c" "RN;B0" "gemm_small_kernel_b0_rn" false "" "" false "COMPLEX") + GenerateNamedObjects("${KERNELDIR}/${CGEMM_SMALL_K_B0_NN}.c" "RR;B0" "gemm_small_kernel_b0_rr" false "" "" false "COMPLEX") + GenerateNamedObjects("${KERNELDIR}/${CGEMM_SMALL_K_B0_NT}.c" "NT;B0" "gemm_small_kernel_b0_nt" false "" "" false "COMPLEX") + GenerateNamedObjects("${KERNELDIR}/${CGEMM_SMALL_K_B0_NT}.c" "NC;B0" "gemm_small_kernel_b0_nc" false "" "" false "COMPLEX") + GenerateNamedObjects("${KERNELDIR}/${CGEMM_SMALL_K_B0_NT}.c" "RT;B0" "gemm_small_kernel_b0_rt" false "" "" false "COMPLEX") + GenerateNamedObjects("${KERNELDIR}/${CGEMM_SMALL_K_B0_NT}.c" "RC;B0" "gemm_small_kernel_b0_rc" false "" "" false "COMPLEX") + GenerateNamedObjects("${KERNELDIR}/${CGEMM_SMALL_K_B0_TN}.c" "TN;B0" "gemm_small_kernel_b0_tn" false "" "" false "COMPLEX") + GenerateNamedObjects("${KERNELDIR}/${CGEMM_SMALL_K_B0_TN}.c" "TR;B0" "gemm_small_kernel_b0_tr" false "" "" false "COMPLEX") + GenerateNamedObjects("${KERNELDIR}/${CGEMM_SMALL_K_B0_TN}.c" "CN;B0" "gemm_small_kernel_b0_cn" false "" "" false "COMPLEX") + GenerateNamedObjects("${KERNELDIR}/${CGEMM_SMALL_K_B0_TN}.c" "CR;B0" "gemm_small_kernel_b0_cr" false "" "" false "COMPLEX") + GenerateNamedObjects("${KERNELDIR}/${CGEMM_SMALL_K_B0_TT}.c" "TT;B0" "gemm_small_kernel_b0_tt" false "" "" false "COMPLEX") + GenerateNamedObjects("${KERNELDIR}/${CGEMM_SMALL_K_B0_TT}.c" "TC;B0" "gemm_small_kernel_b0_tc" false "" "" false "COMPLEX") + GenerateNamedObjects("${KERNELDIR}/${CGEMM_SMALL_K_B0_TT}.c" "CT;B0" "gemm_small_kernel_b0_ct" false "" "" false "COMPLEX") + GenerateNamedObjects("${KERNELDIR}/${CGEMM_SMALL_K_B0_TT}.c" "CC;B0" "gemm_small_kernel_b0_cc" false "" "" false "COMPLEX") + endif () + GenerateNamedObjects("generic/ztrsm_uncopy_${CGEMM_UNROLL_M}.c" "UNIT" "trsm_iunucopy" false "" ${TSUFFIX} false "COMPLEX") + GenerateNamedObjects("generic/ztrsm_uncopy_${CGEMM_UNROLL_M}.c" "" "trsm_iunncopy" false "" ${TSUFFIX} false "COMPLEX") + GenerateNamedObjects("generic/ztrsm_uncopy_${CGEMM_UNROLL_N}.c" "OUTER;UNIT" "trsm_ounucopy" false "" ${TSUFFIX} false "COMPLEX") + GenerateNamedObjects("generic/ztrsm_uncopy_${CGEMM_UNROLL_N}.c" "OUTER" "trsm_ounncopy" false "" ${TSUFFIX} false "COMPLEX") + + GenerateNamedObjects("generic/ztrsm_lncopy_${CGEMM_UNROLL_M}.c" "LOWER;UNIT" "trsm_ilnucopy" false "" ${TSUFFIX} false "COMPLEX") + GenerateNamedObjects("generic/ztrsm_lncopy_${CGEMM_UNROLL_M}.c" "LOWER" "trsm_ilnncopy" false "" ${TSUFFIX} false "COMPLEX") + GenerateNamedObjects("generic/ztrsm_lncopy_${CGEMM_UNROLL_N}.c" "OUTER;LOWER;UNIT" "trsm_olnucopy" false "" ${TSUFFIX} false "COMPLEX") + GenerateNamedObjects("generic/ztrsm_lncopy_${CGEMM_UNROLL_N}.c" "OUTER;LOWER" "trsm_olnncopy" false "" ${TSUFFIX} false "COMPLEX") + + GenerateNamedObjects("generic/ztrsm_utcopy_${CGEMM_UNROLL_M}.c" "UNIT" "trsm_iutucopy" false "" ${TSUFFIX} false "COMPLEX") + GenerateNamedObjects("generic/ztrsm_utcopy_${CGEMM_UNROLL_M}.c" "" "trsm_iutncopy" false "" ${TSUFFIX} false "COMPLEX") + GenerateNamedObjects("generic/ztrsm_utcopy_${CGEMM_UNROLL_N}.c" "OUTER;UNIT" "trsm_outucopy" false "" ${TSUFFIX} false "COMPLEX") + GenerateNamedObjects("generic/ztrsm_utcopy_${CGEMM_UNROLL_N}.c" "OUTER" "trsm_outncopy" false "" ${TSUFFIX} false "COMPLEX") + + GenerateNamedObjects("generic/ztrsm_ltcopy_${CGEMM_UNROLL_M}.c" "LOWER;UNIT" "trsm_iltucopy" false "" ${TSUFFIX} false "COMPLEX") + GenerateNamedObjects("generic/ztrsm_ltcopy_${CGEMM_UNROLL_M}.c" "LOWER" "trsm_iltncopy" false "" ${TSUFFIX} false "COMPLEX") + GenerateNamedObjects("generic/ztrsm_ltcopy_${CGEMM_UNROLL_N}.c" "OUTER;LOWER;UNIT" "trsm_oltucopy" false "" ${TSUFFIX} false "COMPLEX") + GenerateNamedObjects("generic/ztrsm_ltcopy_${CGEMM_UNROLL_N}.c" "OUTER;LOWER" "trsm_oltncopy" false "" ${TSUFFIX} false "COMPLEX") + GenerateNamedObjects("generic/neg_tcopy_${DGEMM_UNROLL_M}.c" "" "neg_tcopy" false "" ${TSUFFIX} false "COMPLEX") + GenerateNamedObjects("generic/laswp_ncopy_${DGEMM_UNROLL_N}.c" "" "laswp_ncopy" false "" ${TSUFFIX} false "COMPLEX") + endif () + endif () + + add_library(kernel${TSUFFIX} OBJECT ${OPENBLAS_SRC}) + set_target_properties(kernel${TSUFFIX} PROPERTIES COMPILE_FLAGS "${KERNEL_DEFINITIONS}") + get_target_property(KERNEL_INCLUDE_DIRECTORIES kernel${TSUFFIX} INCLUDE_DIRECTORIES) + set_target_properties(kernel${TSUFFIX} PROPERTIES INCLUDE_DIRECTORIES "${KERNEL_INCLUDE_DIRECTORIES};${TARGET_CONF_DIR}") + if (USE_GEMM3M) + target_compile_definitions(kernel${TSUFFIX} PRIVATE USE_GEMM3M) + endif() + if (USE_OPENMP) + target_link_libraries(kernel${TSUFFIX} OpenMP::OpenMP_C) + endif() +endfunction () + + +set(ADD_COMMONOBJS 1) +if (${DYNAMIC_ARCH}) + foreach(TARGET_CORE ${DYNAMIC_CORE}) + set(BUILD_KERNEL 1) + set(KDIR "") + set(TSUFFIX "_${TARGET_CORE}") + set(KERNEL_DEFINITIONS "-DBUILD_KERNEL -DTABLE_NAME=gotoblas_${TARGET_CORE} -DTS=${TSUFFIX}") + build_core("${TARGET_CORE}" "${KDIR}" "${TSUFFIX}" "${KERNEL_DEFINITIONS}") + set(ADD_COMMONOBJS 0) + endforeach() +else () + set(TARGET_CONF_DIR ${PROJECT_BINARY_DIR}) + set(TARGET_CORE ${CORE}) + set(KDIR "") + set(TSUFFIX "") + set(KERNEL_DEFINITIONS "") + build_core("${TARGET_CORE}" "${KDIR}" "${TSUFFIX}" "${KERNEL_DEFINITIONS}") +endif () + From bf98e448eb36d6f524cb471ffce6d7f69c8e1467 Mon Sep 17 00:00:00 2001 From: Martin Kroeker Date: Mon, 18 Aug 2025 14:43:08 -0700 Subject: [PATCH 24/35] Add VORTEXM4 to DYNAMIC_ARCH list --- cmake/arch.cmake | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/cmake/arch.cmake b/cmake/arch.cmake index d9a7aafd62..62c9fe96cc 100644 --- a/cmake/arch.cmake +++ b/cmake/arch.cmake @@ -39,14 +39,14 @@ if (DYNAMIC_ARCH) set(DYNAMIC_CORE ${DYNAMIC_CORE} NEOVERSEV1 NEOVERSEN2 ARMV8SVE A64FX) endif () if (${CMAKE_C_COMPILER_VERSION} VERSION_GREATER_EQUAL 14) # SME ACLE supported in GCC >= 14 - set(DYNAMIC_CORE ${DYNAMIC_CORE} ARMV9SME) + set(DYNAMIC_CORE ${DYNAMIC_CORE} ARMV9SME VORTEXM4) endif() elseif (${CMAKE_C_COMPILER_ID} MATCHES "Clang") if (${CMAKE_C_COMPILER_VERSION} VERSION_GREATER_EQUAL 11) # SVE ACLE supported in LLVM >= 11 set(DYNAMIC_CORE ${DYNAMIC_CORE} NEOVERSEV1 NEOVERSEN2 ARMV8SVE A64FX) endif () if (${CMAKE_C_COMPILER_VERSION} VERSION_GREATER_EQUAL 19) # SME ACLE supported in LLVM >= 19 - set(DYNAMIC_CORE ${DYNAMIC_CORE} ARMV9SME) + set(DYNAMIC_CORE ${DYNAMIC_CORE} ARMV9SME VORTEXM4) endif() endif () if (DYNAMIC_LIST) From 4609732e69c00c72b66a966e30f36763b32b6e11 Mon Sep 17 00:00:00 2001 From: Martin Kroeker Date: Mon, 18 Aug 2025 14:54:20 -0700 Subject: [PATCH 25/35] Relax version number requirement for AppleClang --- cmake/arch.cmake | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/cmake/arch.cmake b/cmake/arch.cmake index 62c9fe96cc..a312ef814e 100644 --- a/cmake/arch.cmake +++ b/cmake/arch.cmake @@ -45,7 +45,7 @@ if (DYNAMIC_ARCH) if (${CMAKE_C_COMPILER_VERSION} VERSION_GREATER_EQUAL 11) # SVE ACLE supported in LLVM >= 11 set(DYNAMIC_CORE ${DYNAMIC_CORE} NEOVERSEV1 NEOVERSEN2 ARMV8SVE A64FX) endif () - if (${CMAKE_C_COMPILER_VERSION} VERSION_GREATER_EQUAL 19) # SME ACLE supported in LLVM >= 19 + if (${CMAKE_C_COMPILER_VERSION} VERSION_GREATER_EQUAL 19 OR (${CMAKE_C_COMPILER_ID} MATCHES AppleClang AND ${CMAKE_C_COMPILER_VERSION} VERSION_GREATER_EQUAL 17) ) # SME ACLE supported in LLVM >= 19 and AppleClang >= 17 set(DYNAMIC_CORE ${DYNAMIC_CORE} ARMV9SME VORTEXM4) endif() endif () From 05dbb54362f77cdcd6320947fa30b63a0cfd0cac Mon Sep 17 00:00:00 2001 From: Martin Kroeker Date: Tue, 19 Aug 2025 05:12:09 -0700 Subject: [PATCH 26/35] Delete misplaced file --- kernel/arm64/CMakeLists.txt | 1499 ----------------------------------- 1 file changed, 1499 deletions(-) delete mode 100644 kernel/arm64/CMakeLists.txt diff --git a/kernel/arm64/CMakeLists.txt b/kernel/arm64/CMakeLists.txt deleted file mode 100644 index b4ea62f0d5..0000000000 --- a/kernel/arm64/CMakeLists.txt +++ /dev/null @@ -1,1499 +0,0 @@ -############################################################################### -# Copyright (c) 2025, The OpenBLAS Project -# All rights reserved. -# Redistribution and use in source and binary forms, with or without -# modification, are permitted provided that the following conditions are -# met: -# 1. Redistributions of source code must retain the above copyright -# notice, this list of conditions and the following disclaimer. -# 2. Redistributions in binary form must reproduce the above copyright -# notice, this list of conditions and the following disclaimer in -# the documentation and/or other materials provided with the -# distribution. -# 3. Neither the name of the OpenBLAS project nor the names of -# its contributors may be used to endorse or promote products -# derived from this software without specific prior written permission. -# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" -# AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE -# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE -# ARE DISCLAIMED. IN NO EVENT SHALL THE OPENBLAS PROJECT OR CONTRIBUTORS BE -# LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR -# CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF -# SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS -# INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN -# CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) -# ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE -# POSSIBILITY OF SUCH DAMAGE. -############################################################################### - -include_directories(${PROJECT_SOURCE_DIR}) - -# Makefile -function (build_core TARGET_CORE KDIR TSUFFIX KERNEL_DEFINITIONS) - set (OPENBLAS_SRC "") - set (ARCH_SUFFIX "") - include("${PROJECT_SOURCE_DIR}/cmake/kernel.cmake") - if (${DYNAMIC_ARCH}) - include("${PROJECT_SOURCE_DIR}/cmake/system.cmake") - endif () - ParseMakefileVars("${KERNELDIR}/KERNEL") - ParseMakefileVars("${KERNELDIR}/KERNEL.${TARGET_CORE}") - SetDefaultL1() - SetDefaultL2() - SetDefaultL3() - - set(KERNEL_INTERFACE common_level1.h common_level2.h common_level3.h) - if(NOT NO_LAPACK) - set(KERNEL_INTERFACE ${KERNEL_INTERFACE} common_lapack.h) - endif () - - if (${ADD_COMMONOBJS}) - if (X86) - if (NOT "${CMAKE_C_COMPILER_ID}" STREQUAL "MSVC") - GenerateNamedObjects("${KERNELDIR}/cpuid.S" "" "" false "" "" true) - else() - GenerateNamedObjects("${KERNELDIR}/cpuid_win.c" "" "" false "" "" true) - endif() - endif () - - # don't use float type name mangling here - GenerateNamedObjects("${KERNELDIR}/${LSAME_KERNEL}" "F_INTERFACE" "lsame" false "" "" true "") - GenerateNamedObjects("${KERNELDIR}/${SCABS_KERNEL}" "COMPLEX;F_INTERFACE" "scabs1" false "" "" true "") - GenerateNamedObjects("${KERNELDIR}/${DCABS_KERNEL}" "DOUBLE;COMPLEX;F_INTERFACE" "dcabs1" false "" "" true "") - endif () - - # Run with no ARCH_SUFFIX for above - set (ARCH_SUFFIX "${TSUFFIX}") - # Makefile.L1 - foreach (float_type ${FLOAT_TYPES}) - # a bit of metaprogramming here to pull out the appropriate KERNEL var - string(SUBSTRING ${float_type} 0 1 float_char) - if (${float_type} STREQUAL "BFLOAT16") - set (float_char "SB") - endif () - GenerateNamedObjects("${KERNELDIR}/${${float_char}AMAXKERNEL}" "USE_ABS" "amax_k" false "" "" false ${float_type}) - GenerateNamedObjects("${KERNELDIR}/${${float_char}AMINKERNEL}" "USE_ABS;USE_MIN" "amin_k" false "" "" false ${float_type}) - if (DEFINED ${float_char}MAXKERNEL) - GenerateNamedObjects("${KERNELDIR}/${${float_char}MAXKERNEL}" "" "max_k" false "" "" false ${float_type}) - endif () - if (DEFINED ${float_char}MINKERNEL) - GenerateNamedObjects("${KERNELDIR}/${${float_char}MINKERNEL}" "USE_MIN" "min_k" false "" "" false ${float_type}) - endif () - GenerateNamedObjects("${KERNELDIR}/${I${float_char}AMAXKERNEL}" "USE_ABS" "i*amax_k" false "" "" false ${float_type}) - GenerateNamedObjects("${KERNELDIR}/${I${float_char}AMINKERNEL}" "USE_ABS;USE_MIN" "i*amin_k" false "" "" false ${float_type}) - if (DEFINED I${float_char}MAXKERNEL) - GenerateNamedObjects("${KERNELDIR}/${I${float_char}MAXKERNEL}" "" "i*max_k" false "" "" false ${float_type}) - endif () - if (DEFINED I${float_char}MINKERNEL) - GenerateNamedObjects("${KERNELDIR}/${I${float_char}MINKERNEL}" "USE_MIN" "i*min_k" false "" "" false ${float_type}) - endif () - GenerateNamedObjects("${KERNELDIR}/${${float_char}ASUMKERNEL}" "" "asum_k" false "" "" false ${float_type}) - GenerateNamedObjects("${KERNELDIR}/${${float_char}AXPYKERNEL}" "" "axpy_k" false "" "" false ${float_type}) - GenerateNamedObjects("${KERNELDIR}/${${float_char}COPYKERNEL}" "C_INTERFACE" "copy_k" false "" "" false ${float_type}) - GenerateNamedObjects("${KERNELDIR}/${${float_char}NRM2KERNEL}" "" "nrm2_k" false "" "" false ${float_type}) - GenerateNamedObjects("${KERNELDIR}/${${float_char}ROTKERNEL}" "" "rot_k" false "" "" false ${float_type}) - GenerateNamedObjects("${KERNELDIR}/${${float_char}ROTMKERNEL}" "" "rotm_k" false "" "" false ${float_type}) - GenerateNamedObjects("${KERNELDIR}/${${float_char}SCALKERNEL}" "" "scal_k" false "" "" false ${float_type}) - GenerateNamedObjects("${KERNELDIR}/${${float_char}SWAPKERNEL}" "" "swap_k" false "" "" false ${float_type}) - GenerateNamedObjects("${KERNELDIR}/${${float_char}AXPBYKERNEL}" "" "axpby_k" false "" "" false ${float_type}) - GenerateNamedObjects("${KERNELDIR}/${${float_char}SUMKERNEL}" "" "sum_k" false "" "" false ${float_type}) - - if (${float_type} STREQUAL "COMPLEX" OR ${float_type} STREQUAL "ZCOMPLEX") - GenerateNamedObjects("${KERNELDIR}/${${float_char}AXPYKERNEL}" "CONJ" "axpyc_k" false "" "" false ${float_type}) - GenerateNamedObjects("${KERNELDIR}/${${float_char}DOTKERNEL}" "" "dotu_k" false "" "" false ${float_type}) - GenerateNamedObjects("${KERNELDIR}/${${float_char}DOTKERNEL}" "CONJ" "dotc_k" false "" "" false ${float_type}) - else () - GenerateNamedObjects("${KERNELDIR}/${${float_char}DOTKERNEL}" "" "dot_k" false "" "" false ${float_type}) - endif () - - if (${float_type} STREQUAL "COMPLEX") - GenerateNamedObjects("${KERNELDIR}/${${float_char}ROTKERNEL}" "" "srot_k" false "" "" false ${float_type}) - endif() - if (${float_type} STREQUAL "ZCOMPLEX") - GenerateNamedObjects("${KERNELDIR}/${${float_char}ROTKERNEL}" "" "drot_k" false "" "" false ${float_type}) - endif() - - endforeach () - - #dsdot,sdsdot - GenerateNamedObjects("${KERNELDIR}/${DSDOTKERNEL}" "DSDOT" "d*dot_k" false "" "" false "SINGLE") - GenerateNamedObjects("${KERNELDIR}/${DSDOTKERNEL}" "DSDOT" "dsdot_k" false "" "" false "SINGLE") - - # sbdot - if (BUILD_BFLOAT16) - GenerateNamedObjects("${KERNELDIR}/${BSCALKERNEL}" "BGEMM" "scal_k" false "" "" false "BFLOAT16") - GenerateNamedObjects("${KERNELDIR}/${SBDOTKERNEL}" "SBDOT" "dot_k" false "" "" false "BFLOAT16") - GenerateNamedObjects("${KERNELDIR}/${BF16TOKERNEL}" "SINGLE" "f16tos_k" false "" "" false "BFLOAT16") - GenerateNamedObjects("${KERNELDIR}/${BF16TOKERNEL}" "DOUBLE" "bf16tod_k" false "" "" false "DOUBLE") - GenerateNamedObjects("${KERNELDIR}/${TOBF16KERNEL}" "SINGLE" "stobf16_k" false "" "" false "BFLOAT16") - GenerateNamedObjects("${KERNELDIR}/${TOBF16KERNEL}" "DOUBLE" "dtobf16_k" false "" "" false "BFLOAT16") - endif() - - if ((BUILD_COMPLEX OR BUILD_DOUBLE) AND NOT BUILD_SINGLE) - GenerateNamedObjects("${KERNELDIR}/${SAMAXKERNEL}" "USE_ABS" "amax_k" false "" "" false "SINGLE") - GenerateNamedObjects("${KERNELDIR}/${SAMINKERNEL}" "USE_ABS;USE_MIN" "amin_k" false "" "" false "SINGLE") - GenerateNamedObjects("${KERNELDIR}/${SASUMKERNEL}" "" "asum_k" false "" "" false "SINGLE") - if (DEFINED SMAXKERNEL) - GenerateNamedObjects("${KERNELDIR}/${SMAXKERNEL}" "" "max_k" false "" "" false "SINGLE") - endif () - if (DEFINED SMINKERNEL) - GenerateNamedObjects("${KERNELDIR}/${SMINKERNEL}" "USE_MIN" "min_k" false "" "" false "SINGLE") - endif () - if (DEFINED ISMINKERNEL) - GenerateNamedObjects("${KERNELDIR}/${ISMINKERNEL}" "USE_MIN" "i*min_k" false "" "" false "SINGLE") - endif () - if (DEFINED ISMAXKERNEL) - GenerateNamedObjects("${KERNELDIR}/${ISMAXKERNEL}" "" "i*max_k" false "" "" false "SINGLE") - endif () - GenerateNamedObjects("${KERNELDIR}/${ISAMAXKERNEL}" "USE_ABS" "i*amax_k" false "" "" false "SINGLE") - GenerateNamedObjects("${KERNELDIR}/${ISAMINKERNEL}" "USE_ABS;USE_MIN" "i*amin_k" false "" "" false "SINGLE") - GenerateNamedObjects("${KERNELDIR}/${SSCALKERNEL}" "" "scal_k" false "" "" false "SINGLE") - GenerateNamedObjects("${KERNELDIR}/${SCOPYKERNEL}" "C_INTERFACE" "copy_k" false "" "" false "SINGLE") - GenerateNamedObjects("${KERNELDIR}/${SSWAPKERNEL}" "" "swap_k" false "" "" false "SINGLE") - GenerateNamedObjects("${KERNELDIR}/${SAXPYKERNEL}" "" "axpy_k" false "" "" false "SINGLE") - GenerateNamedObjects("${KERNELDIR}/${SNRM2KERNEL}" "" "nrm2_k" false "" "" false "SINGLE") - GenerateNamedObjects("${KERNELDIR}/${SDOTKERNEL}" "" "dot_k" false "" "" false "SINGLE") - GenerateNamedObjects("${KERNELDIR}/${SROTKERNEL}" "" "rot_k" false "" "" false "SINGLE") - GenerateNamedObjects("${KERNELDIR}/${SROTMKERNEL}" "" "rotm_k" false "" "" false "SINGLE") - endif () - if (BUILD_COMPLEX16 AND NOT BUILD_DOUBLE) - GenerateNamedObjects("${KERNELDIR}/${DAMAXKERNEL}" "USE_ABS" "amax_k" false "" "" false "DOUBLE") - GenerateNamedObjects("${KERNELDIR}/${DAMINKERNEL}" "USE_ABS;USE_MIN" "amin_k" false "" "" false "DOUBLE") - GenerateNamedObjects("${KERNELDIR}/${DASUMKERNEL}" "" "asum_k" false "" "" false "DOUBLE") - if (DEFINED DMAXKERNEL) - GenerateNamedObjects("${KERNELDIR}/${DMAXKERNEL}" "" "max_k" false "" "" false "DOUBLE") - endif () - if (DEFINED DMINKERNEL) - GenerateNamedObjects("${KERNELDIR}/${DMINKERNEL}" "USE_MIN" "min_k" false "" "" false "DOUBLE") - endif () - if (DEFINED IDMINKERNEL) - GenerateNamedObjects("${KERNELDIR}/${IDMINKERNEL}" "USE_MIN" "i*min_k" false "" "" false "DOUBLE") - endif () - if (DEFINED IDMAXKERNEL) - GenerateNamedObjects("${KERNELDIR}/${IDMAXKERNEL}" "" "i*max_k" false "" "" false "DOUBLE") - endif () - GenerateNamedObjects("${KERNELDIR}/${IDAMAXKERNEL}" "USE_ABS" "i*amax_k" false "" "" false "DOUBLE") - GenerateNamedObjects("${KERNELDIR}/${IDAMINKERNEL}" "USE_ABS;USE_MIN" "i*amin_k" false "" "" false "DOUBLE") - GenerateNamedObjects("${KERNELDIR}/${DSCALKERNEL}" "" "scal_k" false "" "" false "DOUBLE") - GenerateNamedObjects("${KERNELDIR}/${DCOPYKERNEL}" "C_INTERFACE" "copy_k" false "" "" false "DOUBLE") - GenerateNamedObjects("${KERNELDIR}/${DNRM2KERNEL}" "" "nrm2_k" false "" "" false "DOUBLE") - GenerateNamedObjects("${KERNELDIR}/${DROTKERNEL}" "" "rot_k" false "" "" false "DOUBLE") - GenerateNamedObjects("${KERNELDIR}/${DROTMKERNEL}" "" "rotm_k" false "" "" false "DOUBLE") - GenerateNamedObjects("${KERNELDIR}/${DDOTKERNEL}" "" "dot_k" false "" "" false "DOUBLE") - GenerateNamedObjects("${KERNELDIR}/${DSWAPKERNEL}" "" "swap_k" false "" "" false "DOUBLE") - GenerateNamedObjects("${KERNELDIR}/${DAXPYKERNEL}" "" "axpy_k" false "" "" false "DOUBLE") - endif () - - # Makefile.L2 - GenerateCombinationObjects("generic/symv_k.c" "LOWER" "U" "" 1 "" "" 3) - GenerateNamedObjects("generic/ger.c" "" "ger_k" false "" "" "" 3) - foreach (float_type ${FLOAT_TYPES}) - string(SUBSTRING ${float_type} 0 1 float_char) - if (${float_type} STREQUAL "COMPLEX" OR ${float_type} STREQUAL "ZCOMPLEX") - GenerateNamedObjects("${KERNELDIR}/${${float_char}GERUKERNEL}" "" "geru_k" false "" "" false ${float_type}) - GenerateNamedObjects("${KERNELDIR}/${${float_char}GERCKERNEL}" "CONJ" "gerc_k" false "" "" false ${float_type}) - GenerateNamedObjects("${KERNELDIR}/${${float_char}GERUKERNEL}" "XCONJ" "gerv_k" false "" "" false ${float_type}) - GenerateNamedObjects("${KERNELDIR}/${${float_char}GERCKERNEL}" "CONJ;XCONJ" "gerd_k" false "" "" false ${float_type}) - - GenerateNamedObjects("${KERNELDIR}/${${float_char}GEMVNKERNEL}" "" "gemv_n" false "" "" false ${float_type}) - GenerateNamedObjects("${KERNELDIR}/${${float_char}GEMVTKERNEL}" "TRANSA" "gemv_t" false "" "" false ${float_type}) - GenerateNamedObjects("${KERNELDIR}/${${float_char}GEMVNKERNEL}" "CONJ" "gemv_r" false "" "" false ${float_type}) - GenerateNamedObjects("${KERNELDIR}/${${float_char}GEMVTKERNEL}" "CONJ;TRANSA" "gemv_c" false "" "" false ${float_type}) - GenerateNamedObjects("${KERNELDIR}/${${float_char}GEMVNKERNEL}" "XCONJ" "gemv_o" false "" "" false ${float_type}) - GenerateNamedObjects("${KERNELDIR}/${${float_char}GEMVTKERNEL}" "XCONJ;TRANSA" "gemv_u" false "" "" false ${float_type}) - GenerateNamedObjects("${KERNELDIR}/${${float_char}GEMVNKERNEL}" "XCONJ;CONJ" "gemv_s" false "" "" false ${float_type}) - GenerateNamedObjects("${KERNELDIR}/${${float_char}GEMVTKERNEL}" "XCONJ;CONJ;TRANSA" "gemv_d" false "" "" false ${float_type}) - - GenerateNamedObjects("${KERNELDIR}/${${float_char}HEMV_U_KERNEL}" "HEMV" "hemv_U" false "" "" false ${float_type}) - GenerateNamedObjects("${KERNELDIR}/${${float_char}HEMV_L_KERNEL}" "HEMV;LOWER" "hemv_L" false "" "" false ${float_type}) - GenerateNamedObjects("${KERNELDIR}/${${float_char}HEMV_V_KERNEL}" "HEMV;HEMVREV" "hemv_V" false "" "" false ${float_type}) - GenerateNamedObjects("${KERNELDIR}/${${float_char}HEMV_M_KERNEL}" "HEMV;HEMVREV;LOWER" "hemv_M" false "" "" false ${float_type}) - - else () - GenerateNamedObjects("${KERNELDIR}/${${float_char}GEMVNKERNEL}" "" "gemv_n" false "" "" false ${float_type}) - GenerateNamedObjects("${KERNELDIR}/${${float_char}GEMVTKERNEL}" "TRANS" "gemv_t" false "" "" false ${float_type}) - endif () - endforeach () - if (BUILD_COMPLEX16 AND NOT BUILD_DOUBLE) - GenerateNamedObjects("${KERNELDIR}/${DGEMVNKERNEL}" "" "gemv_n" false "" "" false "DOUBLE") - GenerateNamedObjects("${KERNELDIR}/${DGEMVTKERNEL}" "TRANS" "gemv_t" false "" "" false "DOUBLE") - endif () - if (BUILD_COMPLEX AND NOT BUILD_SINGLE) - GenerateNamedObjects("${KERNELDIR}/${SGEMVNKERNEL}" "" "gemv_n" false "" "" false "SINGLE") - GenerateNamedObjects("${KERNELDIR}/${SGEMVTKERNEL}" "TRANS" "gemv_t" false "" "" false "SINGLE") - endif () - if (BUILD_BFLOAT16) - GenerateNamedObjects("${KERNELDIR}/${BGEMVNKERNEL}" "BGEMM" "gemv_n" false "" "" false "BFLOAT16") - GenerateNamedObjects("${KERNELDIR}/${BGEMVTKERNEL}" "BGEMM" "gemv_t" false "" "" false "BFLOAT16") - GenerateNamedObjects("${KERNELDIR}/${SBGEMVNKERNEL}" "" "gemv_n" false "" "" false "BFLOAT16") - GenerateNamedObjects("${KERNELDIR}/${SBGEMVTKERNEL}" "" "gemv_t" false "" "" false "BFLOAT16") - endif () - # Makefile.L3 - set(USE_TRMM false) - string(TOUPPER ${TARGET_CORE} UC_TARGET_CORE) - if (ARM OR ARM64 OR RISCV64 OR (UC_TARGET_CORE MATCHES LONGSOON3B) OR (UC_TARGET_CORE MATCHES GENERIC) OR (UC_TARGET_CORE MATCHES HASWELL) OR (UC_TARGET_CORE MATCHES ZEN) OR (UC_TARGET_CORE MATCHES SKYLAKEX) OR (UC_TARGET_CORE MATCHES COOPERLAKE) OR (UC_TARGET_CORE MATCHES SAPPHIRERAPIDS)) - set(USE_TRMM true) - endif () - if (ZARCH OR (UC_TARGET_CORE MATCHES POWER8) OR (UC_TARGET_CORE MATCHES POWER9) OR (UC_TARGET_CORE MATCHES POWER10)) - set(USE_TRMM true) - endif () - set(USE_DIRECT_SGEMM false) - if (X86_64 OR ARM64) - set(USE_DIRECT_SGEMM true) - endif() - if (UC_TARGET_CORE MATCHES ARMV9SME OR UC_TARGET_CORE MATCHES VORTEXM4) - set (HAVE_SME true) - endif () - - if (USE_DIRECT_SGEMM) - # if (NOT DEFINED SGEMMDIRECTKERNEL) - if (X86_64) - set (SGEMMDIRECTKERNEL sgemm_direct_skylakex.c) - set (SGEMMDIRECTPERFORMANT sgemm_direct_performant.c) - # endif() - GenerateNamedObjects("${KERNELDIR}/${SGEMMDIRECTKERNEL}" "" "gemm_direct" false "" "" false SINGLE) - GenerateNamedObjects("${KERNELDIR}/${SGEMMDIRECTPERFORMANT}" "" "gemm_direct_performant" false "" "" false SINGLE) - elseif (ARM64) - set (SGEMMDIRECTPERFORMANT sgemm_direct_performant.c) - set (SGEMMDIRECTKERNEL sgemm_direct_arm64_sme1.c) - set (SGEMMDIRECTKERNEL_ALPHA_BETA sgemm_direct_alpha_beta_arm64_sme1.c) - set (SGEMMDIRECTSMEKERNEL sgemm_direct_sme1_2VLx2VL.S) - set (SGEMMDIRECTPREKERNEL sgemm_direct_sme1_preprocess.S) - GenerateNamedObjects("${KERNELDIR}/${SGEMMDIRECTPERFORMANT}" "" "gemm_direct_performant" false "" "" false SINGLE) - GenerateNamedObjects("${KERNELDIR}/${SGEMMDIRECTKERNEL}" "" "gemm_direct" false "" "" false SINGLE) - GenerateNamedObjects("${KERNELDIR}/${SGEMMDIRECTKERNEL_ALPHA_BETA}" "" "gemm_direct_alpha_beta" false "" "" false SINGLE) - if (HAVE_SME) - GenerateNamedObjects("${KERNELDIR}/${SGEMMDIRECTSMEKERNEL}" "" "gemm_direct_sme1_2VLx2VL" false "" "" false SINGLE) - GenerateNamedObjects("${KERNELDIR}/${SGEMMDIRECTPREKERNEL}" "" "gemm_direct_sme1_preprocess" false "" "" false SINGLE) - endif () - endif () - endif() - - foreach (float_type SINGLE DOUBLE) - string(SUBSTRING ${float_type} 0 1 float_char) - GenerateNamedObjects("${KERNELDIR}/${${float_char}GEMMKERNEL}" "" "gemm_kernel" false "" "" false ${float_type}) - endforeach() - if (BUILD_COMPLEX16 AND NOT BUILD_DOUBLE) - GenerateNamedObjects("${KERNELDIR}/${DGEMMKERNEL}" "" "gemm_kernel" false "" "" false "DOUBLE") - if (DGEMMINCOPY) - GenerateNamedObjects("${KERNELDIR}/${DGEMMINCOPY}" "DOUBLE" "${DGEMMINCOPYOBJ}" false "" "" true "DOUBLE") - endif () - if (DGEMMITCOPY) - GenerateNamedObjects("${KERNELDIR}/${DGEMMITCOPY}" "DOUBLE" "${DGEMMITCOPYOBJ}" false "" "" true "DOUBLE") - endif () - if (DGEMMONCOPY) - GenerateNamedObjects("${KERNELDIR}/${DGEMMONCOPY}" "DOUBLE" "${DGEMMONCOPYOBJ}" false "" "" true "DOUBLE") - endif () - if (DGEMMOTCOPY) - GenerateNamedObjects("${KERNELDIR}/${DGEMMOTCOPY}" "DOUBLE" "${DGEMMOTCOPYOBJ}" false "" "" true "DOUBLE") - endif () - GenerateNamedObjects("${KERNELDIR}/${DGEMM_BETA}" "" "gemm_beta" false "" "" false "DOUBLE") - GenerateNamedObjects("generic/neg_tcopy_${DGEMM_UNROLL_M}.c" "" "neg_tcopy" false "" ${TSUFFIX} false "DOUBLE") - GenerateNamedObjects("generic/laswp_ncopy_${DGEMM_UNROLL_N}.c" "" "laswp_ncopy" false "" ${TSUFFIX} false "DOUBLE") - if (SMALL_MATRIX_OPT) - if (NOT DEFINED DGEMM_SMALL_M_PERMIT) - set(DGEMM_SMALL_M_PERMIT ../generic/gemm_small_matrix_permit.c) - endif () - if (NOT DEFINED DGEMM_SMALL_K_NN) - set(DGEMM_SMALL_K_NN ../generic/gemm_small_matrix_kernel_nn.c) - endif () - if (NOT DEFINED DGEMM_SMALL_K_NT) - set(DGEMM_SMALL_K_NT ../generic/gemm_small_matrix_kernel_nt.c) - endif () - if (NOT DEFINED DGEMM_SMALL_K_TN) - set(DGEMM_SMALL_K_TN ../generic/gemm_small_matrix_kernel_tn.c) - endif () - if (NOT DEFINED DGEMM_SMALL_K_TT) - set(DGEMM_SMALL_K_TT ../generic/gemm_small_matrix_kernel_tt.c) - endif () - if (NOT DEFINED DGEMM_SMALL_K_B0_NN) - set(DGEMM_SMALL_K_B0_NN ../generic/gemm_small_matrix_kernel_nn.c) - endif () - if (NOT DEFINED DGEMM_SMALL_K_B0_NT) - set(DGEMM_SMALL_K_B0_NT ../generic/gemm_small_matrix_kernel_nt.c) - endif () - if (NOT DEFINED DGEMM_SMALL_K_B0_TN) - set(DGEMM_SMALL_K_B0_TN ../generic/gemm_small_matrix_kernel_tn.c) - endif () - if (NOT DEFINED DGEMM_SMALL_K_B0_TT) - set(DGEMM_SMALL_K_B0_TT ../generic/gemm_small_matrix_kernel_tt.c) - endif () - - GenerateNamedObjects("${KERNELDIR}/${DGEMM_SMALL_M_PERMIT}" "" "gemm_small_matrix_permit" false "" "" false "DOUBLE") - GenerateNamedObjects("${KERNELDIR}/${DGEMM_SMALL_K_NN}" "NN" "gemm_small_kernel_nn" false "" "" false "DOUBLE") - GenerateNamedObjects("${KERNELDIR}/${DGEMM_SMALL_K_NN}" "NR" "gemm_small_kernel_nr" false "" "" false "DOUBLE") - GenerateNamedObjects("${KERNELDIR}/${DGEMM_SMALL_K_NN}" "RN" "gemm_small_kernel_rn" false "" "" false "DOUBLE") - GenerateNamedObjects("${KERNELDIR}/${DGEMM_SMALL_K_NN}" "RR" "gemm_small_kernel_rr" false "" "" false "DOUBLE") - GenerateNamedObjects("${KERNELDIR}/${DGEMM_SMALL_K_NT}" "NT" "gemm_small_kernel_nt" false "" "" false "DOUBLE") - GenerateNamedObjects("${KERNELDIR}/${DGEMM_SMALL_K_NT}" "NC" "gemm_small_kernel_nc" false "" "" false "DOUBLE") - GenerateNamedObjects("${KERNELDIR}/${DGEMM_SMALL_K_NT}" "RT" "gemm_small_kernel_rt" false "" "" false "DOUBLE") - GenerateNamedObjects("${KERNELDIR}/${DGEMM_SMALL_K_NT}" "RC" "gemm_small_kernel_rc" false "" "" false "DOUBLE") - GenerateNamedObjects("${KERNELDIR}/${DGEMM_SMALL_K_TN}" "TN" "gemm_small_kernel_tn" false "" "" false "DOUBLE") - GenerateNamedObjects("${KERNELDIR}/${DGEMM_SMALL_K_TN}" "TR" "gemm_small_kernel_tr" false "" "" false "DOUBLE") - GenerateNamedObjects("${KERNELDIR}/${DGEMM_SMALL_K_TN}" "CN" "gemm_small_kernel_cn" false "" "" false "DOUBLE") - GenerateNamedObjects("${KERNELDIR}/${DGEMM_SMALL_K_TN}" "CR" "gemm_small_kernel_cr" false "" "" false "DOUBLE") - GenerateNamedObjects("${KERNELDIR}/${DGEMM_SMALL_K_TT}" "TT" "gemm_small_kernel_tt" false "" "" false "DOUBLE") - GenerateNamedObjects("${KERNELDIR}/${DGEMM_SMALL_K_TT}" "TC" "gemm_small_kernel_tc" false "" "" false "DOUBLE") - GenerateNamedObjects("${KERNELDIR}/${DGEMM_SMALL_K_TT}" "CT" "gemm_small_kernel_ct" false "" "" false "DOUBLE") - GenerateNamedObjects("${KERNELDIR}/${DGEMM_SMALL_K_TT}" "CC" "gemm_small_kernel_cc" false "" "" false "DOUBLE") - GenerateNamedObjects("${KERNELDIR}/${DGEMM_SMALL_K_B0_NN}" "NN;B0" "gemm_small_kernel_b0_nn" false "" "" false "DOUBLE") - GenerateNamedObjects("${KERNELDIR}/${DGEMM_SMALL_K_B0_NN}" "NR;B0" "gemm_small_kernel_b0_nr" false "" "" false "DOUBLE") - GenerateNamedObjects("${KERNELDIR}/${DGEMM_SMALL_K_B0_NN}" "RN;B0" "gemm_small_kernel_b0_rn" false "" "" false "DOUBLE") - GenerateNamedObjects("${KERNELDIR}/${DGEMM_SMALL_K_B0_NN}" "RR;B0" "gemm_small_kernel_b0_rr" false "" "" false "DOUBLE") - GenerateNamedObjects("${KERNELDIR}/${DGEMM_SMALL_K_B0_NT}" "NT;B0" "gemm_small_kernel_b0_nt" false "" "" false "DOUBLE") - GenerateNamedObjects("${KERNELDIR}/${DGEMM_SMALL_K_B0_NT}" "NC;B0" "gemm_small_kernel_b0_nc" false "" "" false "DOUBLE") - GenerateNamedObjects("${KERNELDIR}/${DGEMM_SMALL_K_B0_NT}" "RT;B0" "gemm_small_kernel_b0_rt" false "" "" false "DOUBLE") - GenerateNamedObjects("${KERNELDIR}/${DGEMM_SMALL_K_B0_NT}" "RC;B0" "gemm_small_kernel_b0_rc" false "" "" false "DOUBLE") - GenerateNamedObjects("${KERNELDIR}/${DGEMM_SMALL_K_B0_TN}" "TN;B0" "gemm_small_kernel_b0_tn" false "" "" false "DOUBLE") - GenerateNamedObjects("${KERNELDIR}/${DGEMM_SMALL_K_B0_TN}" "TR;B0" "gemm_small_kernel_b0_tr" false "" "" false "DOUBLE") - GenerateNamedObjects("${KERNELDIR}/${DGEMM_SMALL_K_B0_TN}" "CN;B0" "gemm_small_kernel_b0_cn" false "" "" false "DOUBLE") - GenerateNamedObjects("${KERNELDIR}/${DGEMM_SMALL_K_B0_TN}" "CR;B0" "gemm_small_kernel_b0_cr" false "" "" false "DOUBLE") - GenerateNamedObjects("${KERNELDIR}/${DGEMM_SMALL_K_B0_TT}" "TT;B0" "gemm_small_kernel_b0_tt" false "" "" false "DOUBLE") - GenerateNamedObjects("${KERNELDIR}/${DGEMM_SMALL_K_B0_TT}" "TC;B0" "gemm_small_kernel_b0_tc" false "" "" false "DOUBLE") - GenerateNamedObjects("${KERNELDIR}/${DGEMM_SMALL_K_B0_TT}" "CT;B0" "gemm_small_kernel_b0_ct" false "" "" false "DOUBLE") - GenerateNamedObjects("${KERNELDIR}/${DGEMM_SMALL_K_B0_TT}" "CC;B0" "gemm_small_kernel_b0_cc" false "" "" false "DOUBLE") - endif () - - endif () - if ((BUILD_DOUBLE OR BUILD_COMPLEX) AND NOT BUILD_SINGLE) - GenerateNamedObjects("${KERNELDIR}/${SGEMMKERNEL}" "" "gemm_kernel" false "" "" false "SINGLE") - if (SGEMMINCOPY) - GenerateNamedObjects("${KERNELDIR}/${SGEMMINCOPY}" "SINGLE" "${SGEMMINCOPYOBJ}" false "" "" true "SINGLE") - endif () - if (SGEMMITCOPY) - GenerateNamedObjects("${KERNELDIR}/${SGEMMITCOPY}" "SINGLE" "${SGEMMITCOPYOBJ}" false "" "" true "SINGLE") - endif () - if (SGEMMONCOPY) - GenerateNamedObjects("${KERNELDIR}/${SGEMMONCOPY}" "SINGLE" "${SGEMMONCOPYOBJ}" false "" "" true "SINGLE") - endif () - if (SGEMMOTCOPY) - GenerateNamedObjects("${KERNELDIR}/${SGEMMOTCOPY}" "SINGLE" "${SGEMMOTCOPYOBJ}" false "" "" true "SINGLE") - endif () - GenerateNamedObjects("${KERNELDIR}/${SGEMM_BETA}" "" "gemm_beta" false "" "" false "SINGLE") - endif () - - if (BUILD_BFLOAT16) - if (BGEMMINCOPY) - GenerateNamedObjects("${KERNELDIR}/${BGEMMINCOPY}" "BGEMM" "${BGEMMINCOPYOBJ}" false "" "" true "BFLOAT16") - endif () - if (BGEMMITCOPY) - GenerateNamedObjects("${KERNELDIR}/${BGEMMITCOPY}" "BGEMM" "${BGEMMITCOPYOBJ}" false "" "" true "BFLOAT16") - endif () - if (BGEMMONCOPY) - GenerateNamedObjects("${KERNELDIR}/${BGEMMONCOPY}" "BGEMM" "${BGEMMONCOPYOBJ}" false "" "" true "BFLOAT16") - endif () - if (BGEMMOTCOPY) - GenerateNamedObjects("${KERNELDIR}/${BGEMMOTCOPY}" "BGEMM" "${BGEMMOTCOPYOBJ}" false "" "" true "BFLOAT16") - endif () - GenerateNamedObjects("${KERNELDIR}/${BGEMMKERNEL}" "BGEMM" "gemm_kernel" false "" "" false "BFLOAT16") - GenerateNamedObjects("${KERNELDIR}/${BGEMM_BETA}" "BGEMM" "gemm_beta" false "" "" false "BFLOAT16") - if (SBGEMMINCOPY) - GenerateNamedObjects("${KERNELDIR}/${SBGEMMINCOPY}" "" "${SBGEMMINCOPYOBJ}" false "" "" true "BFLOAT16") - endif () - if (SBGEMMITCOPY) - GenerateNamedObjects("${KERNELDIR}/${SBGEMMITCOPY}" "" "${SBGEMMITCOPYOBJ}" false "" "" true "BFLOAT16") - endif () - if (SBGEMMONCOPY) - GenerateNamedObjects("${KERNELDIR}/${SBGEMMONCOPY}" "" "${SBGEMMONCOPYOBJ}" false "" "" true "BFLOAT16") - endif () - if (SBGEMMOTCOPY) - GenerateNamedObjects("${KERNELDIR}/${SBGEMMOTCOPY}" "" "${SBGEMMOTCOPYOBJ}" false "" "" true "BFLOAT16") - endif () - GenerateNamedObjects("${KERNELDIR}/${SBGEMMKERNEL}" "" "gemm_kernel" false "" "" false "BFLOAT16") - GenerateNamedObjects("${KERNELDIR}/${SBGEMM_BETA}" "" "gemm_beta" false "" "" false "BFLOAT16") - endif () - if (BUILD_HFLOAT16) - if (SHGEMMINCOPY) - GenerateNamedObjects("${KERNELDIR}/${SHGEMMINCOPY}" "" "${SHGEMMINCOPYOBJ}" false "" "" true "HFLOAT16") - endif () - if (SHGEMMITCOPY) - GenerateNamedObjects("${KERNELDIR}/${SHGEMMITCOPY}" "" "${SHGEMMITCOPYOBJ}" false "" "" true "HFLOAT16") - endif () - if (SHGEMMONCOPY) - GenerateNamedObjects("${KERNELDIR}/${SHGEMMONCOPY}" "" "${SHGEMMONCOPYOBJ}" false "" "" true "HFLOAT16") - endif () - if (SHGEMMOTCOPY) - GenerateNamedObjects("${KERNELDIR}/${SHGEMMOTCOPY}" "" "${SHGEMMOTCOPYOBJ}" false "" "" true "HFLOAT16") - endif () - GenerateNamedObjects("${KERNELDIR}/${SHGEMMKERNEL}" "" "gemm_kernel" false "" "" false "HFLOAT16") - GenerateNamedObjects("${KERNELDIR}/${SHGEMM_BETA}" "" "gemm_beta" false "" "" false "HFLOAT16") - endif () - foreach (float_type ${FLOAT_TYPES}) - string(SUBSTRING ${float_type} 0 1 float_char) - if (${float_char}GEMMINCOPY) - GenerateNamedObjects("${KERNELDIR}/${${float_char}GEMMINCOPY}" "${float_type}" "${${float_char}GEMMINCOPYOBJ}" false "" "" true ${float_type}) - endif () - - if (${float_char}GEMMITCOPY) - GenerateNamedObjects("${KERNELDIR}/${${float_char}GEMMITCOPY}" "${float_type}" "${${float_char}GEMMITCOPYOBJ}" false "" "" true ${float_type}) - endif () - - if (${float_char}GEMMONCOPY) - GenerateNamedObjects("${KERNELDIR}/${${float_char}GEMMONCOPY}" "${float_type}" "${${float_char}GEMMONCOPYOBJ}" false "" "" true ${float_type}) - endif () - - if (${float_char}GEMMOTCOPY) - GenerateNamedObjects("${KERNELDIR}/${${float_char}GEMMOTCOPY}" "${float_type}" "${${float_char}GEMMOTCOPYOBJ}" false "" "" true ${float_type}) - endif () - - GenerateNamedObjects("${KERNELDIR}/${${float_char}GEMM_BETA}" "" "gemm_beta" false "" "" false ${float_type}) - - if (USE_TRMM) - set(TRMM_KERNEL "${${float_char}TRMMKERNEL}") - else () - set(TRMM_KERNEL "${${float_char}GEMMKERNEL}") - endif () - - if (${float_type} STREQUAL "COMPLEX" OR ${float_type} STREQUAL "ZCOMPLEX") - - # just enumerate all these. there is an extra define for these indicating which side is a conjugate (e.g. CN NC NN) that I don't really want to work into GenerateCombinationObjects - - GenerateNamedObjects("${KERNELDIR}/${${float_char}GEMMKERNEL}" "NN" "gemm_kernel_n" false "" "" false ${float_type}) - GenerateNamedObjects("${KERNELDIR}/${${float_char}GEMMKERNEL}" "CN" "gemm_kernel_l" false "" "" false ${float_type}) - GenerateNamedObjects("${KERNELDIR}/${${float_char}GEMMKERNEL}" "NC" "gemm_kernel_r" false "" "" false ${float_type}) - GenerateNamedObjects("${KERNELDIR}/${${float_char}GEMMKERNEL}" "CC" "gemm_kernel_b" false "" "" false ${float_type}) - - GenerateNamedObjects("${KERNELDIR}/${TRMM_KERNEL}" "TRMMKERNEL;LEFT;NN" "trmm_kernel_LN" false "" "" false ${float_type}) - GenerateNamedObjects("${KERNELDIR}/${TRMM_KERNEL}" "TRMMKERNEL;LEFT;TRANSA;NN" "trmm_kernel_LT" false "" "" false ${float_type}) - GenerateNamedObjects("${KERNELDIR}/${TRMM_KERNEL}" "TRMMKERNEL;LEFT;CONJ;CN" "trmm_kernel_LR" false "" "" false ${float_type}) - GenerateNamedObjects("${KERNELDIR}/${TRMM_KERNEL}" "TRMMKERNEL;LEFT;TRANSA;CONJ;CN" "trmm_kernel_LC" false "" "" false ${float_type}) - GenerateNamedObjects("${KERNELDIR}/${TRMM_KERNEL}" "TRMMKERNEL;NN" "trmm_kernel_RN" false "" "" false ${float_type}) - GenerateNamedObjects("${KERNELDIR}/${TRMM_KERNEL}" "TRMMKERNEL;TRANSA;NN" "trmm_kernel_RT" false "" "" false ${float_type}) - GenerateNamedObjects("${KERNELDIR}/${TRMM_KERNEL}" "TRMMKERNEL;CONJ;NC" "trmm_kernel_RR" false "" "" false ${float_type}) - GenerateNamedObjects("${KERNELDIR}/${TRMM_KERNEL}" "TRMMKERNEL;TRANSA;CONJ;NC" "trmm_kernel_RC" false "" "" false ${float_type}) - - GenerateNamedObjects("${KERNELDIR}/${${float_char}TRSMKERNEL_LN}" "UPPER;LN;TRSMKERNEL;CONJ" "trsm_kernel_LR" false "" "" false ${float_type}) - GenerateNamedObjects("${KERNELDIR}/${${float_char}TRSMKERNEL_LT}" "LT;TRSMKERNEL;CONJ" "trsm_kernel_LC" false "" "" false ${float_type}) - GenerateNamedObjects("${KERNELDIR}/${${float_char}TRSMKERNEL_RN}" "UPPER;RN;TRSMKERNEL;CONJ" "trsm_kernel_RR" false "" "" false ${float_type}) - GenerateNamedObjects("${KERNELDIR}/${${float_char}TRSMKERNEL_RT}" "RT;TRSMKERNEL;CONJ" "trsm_kernel_RC" false "" "" false ${float_type}) - - - #hemm -if (NOT DEFINED ${float_char}HEMMUTCOPY_M) - set(HEMMUTCOPY_M "generic/zhemm_utcopy_${${float_char}GEMM_UNROLL_M}.c") - set(HEMMLTCOPY_M "generic/zhemm_ltcopy_${${float_char}GEMM_UNROLL_M}.c") -else () - set(HEMMUTCOPY_M "${KERNELDIR}/${${float_char}HEMMUTCOPY_M}") - set(HEMMLTCOPY_M "${KERNELDIR}/${${float_char}HEMMLTCOPY_M}") -endif() - GenerateNamedObjects(${HEMMUTCOPY_M} "" "hemm_iutcopy" false "" "" false ${float_type}) - GenerateNamedObjects(${HEMMLTCOPY_M} "LOWER" "hemm_iltcopy" false "" "" false ${float_type}) - GenerateNamedObjects("generic/zhemm_utcopy_${${float_char}GEMM_UNROLL_N}.c" "OUTER" "hemm_outcopy" false "" "" false ${float_type}) - GenerateNamedObjects("generic/zhemm_ltcopy_${${float_char}GEMM_UNROLL_N}.c" "OUTER;LOWER" "hemm_oltcopy" false "" "" false ${float_type}) - - # symm for c and z -if (NOT DEFINED ${float_char}SYMMUCOPY_M) - set(SYMMUCOPY_M "generic/zsymm_ucopy_${${float_char}GEMM_UNROLL_M}.c") - set(SYMMLCOPY_M "generic/zsymm_lcopy_${${float_char}GEMM_UNROLL_M}.c") -else () - set(SYMMUCOPY_M "${KERNELDIR}/${${float_char}SYMMUCOPY_M}") - set(SYMMLCOPY_M "${KERNELDIR}/${${float_char}SYMMLCOPY_M}") -endif() - GenerateNamedObjects("generic/zsymm_ucopy_${${float_char}GEMM_UNROLL_N}.c" "OUTER" "symm_outcopy" false "" "" false ${float_type}) - GenerateNamedObjects(${SYMMUCOPY_M} "" "symm_iutcopy" false "" "" false ${float_type}) - - GenerateNamedObjects("generic/zsymm_lcopy_${${float_char}GEMM_UNROLL_N}.c" "LOWER;OUTER" "symm_oltcopy" false "" "" false ${float_type}) - GenerateNamedObjects(${SYMMLCOPY_M} "LOWER" "symm_iltcopy" false "" "" false ${float_type}) - - -if (NOT DEFINED ${float_char}TRMMUNCOPY_M) - set(TRMMUNCOPY_M "generic/ztrmm_uncopy_${${float_char}GEMM_UNROLL_M}.c") - set(TRMMLNCOPY_M "generic/ztrmm_lncopy_${${float_char}GEMM_UNROLL_M}.c") - set(TRMMUTCOPY_M "generic/ztrmm_utcopy_${${float_char}GEMM_UNROLL_M}.c") - set(TRMMLTCOPY_M "generic/ztrmm_ltcopy_${${float_char}GEMM_UNROLL_M}.c") -else () - set(TRMMUNCOPY_M "${KERNELDIR}/${${float_char}TRMMUNCOPY_M}") - set(TRMMLNCOPY_M "${KERNELDIR}/${${float_char}TRMMLNCOPY_M}") - set(TRMMUTCOPY_M "${KERNELDIR}/${${float_char}TRMMUTCOPY_M}") - set(TRMMLTCOPY_M "${KERNELDIR}/${${float_char}TRMMLTCOPY_M}") -endif () - GenerateNamedObjects(${TRMMUNCOPY_M} "UNIT" "trmm_iunucopy" false "" "" false ${float_type}) - GenerateNamedObjects(${TRMMUNCOPY_M} "" "trmm_iunncopy" false "" "" false ${float_type}) - GenerateNamedObjects("generic/ztrmm_uncopy_${${float_char}GEMM_UNROLL_N}.c" "OUTER;UNIT" "trmm_ounucopy" false "" "" false ${float_type}) - GenerateNamedObjects("generic/ztrmm_uncopy_${${float_char}GEMM_UNROLL_N}.c" "OUTER" "trmm_ounncopy" false "" "" false ${float_type}) - - GenerateNamedObjects(${TRMMLNCOPY_M} "LOWER;UNIT" "trmm_ilnucopy" false "" "" false ${float_type}) - GenerateNamedObjects(${TRMMLNCOPY_M} "LOWER" "trmm_ilnncopy" false "" "" false ${float_type}) - GenerateNamedObjects("generic/ztrmm_lncopy_${${float_char}GEMM_UNROLL_N}.c" "OUTER;LOWER;UNIT" "trmm_olnucopy" false "" "" false ${float_type}) - GenerateNamedObjects("generic/ztrmm_lncopy_${${float_char}GEMM_UNROLL_N}.c" "OUTER;LOWER" "trmm_olnncopy" false "" "" false ${float_type}) - - GenerateNamedObjects(${TRMMUTCOPY_M} "UNIT" "trmm_iutucopy" false "" "" false ${float_type}) - GenerateNamedObjects(${TRMMUTCOPY_M} "" "trmm_iutncopy" false "" "" false ${float_type}) - GenerateNamedObjects("generic/ztrmm_utcopy_${${float_char}GEMM_UNROLL_N}.c" "OUTER;UNIT" "trmm_outucopy" false "" "" false ${float_type}) - GenerateNamedObjects("generic/ztrmm_utcopy_${${float_char}GEMM_UNROLL_N}.c" "OUTER" "trmm_outncopy" false "" "" false ${float_type}) - - GenerateNamedObjects(${TRMMLTCOPY_M} "LOWER;UNIT" "trmm_iltucopy" false "" "" false ${float_type}) - GenerateNamedObjects(${TRMMLTCOPY_M} "LOWER" "trmm_iltncopy" false "" "" false ${float_type}) - GenerateNamedObjects("generic/ztrmm_ltcopy_${${float_char}GEMM_UNROLL_N}.c" "OUTER;LOWER;UNIT" "trmm_oltucopy" false "" "" false ${float_type}) - GenerateNamedObjects("generic/ztrmm_ltcopy_${${float_char}GEMM_UNROLL_N}.c" "OUTER;LOWER" "trmm_oltncopy" false "" "" false ${float_type}) - - -if (NOT DEFINED ZTRSMCOPYLN_M) - set(ZTRSMUNCOPY_M "generic/ztrsm_uncopy_${${float_char}GEMM_UNROLL_M}.c") - set(ZTRSMLNCOPY_M "generic/ztrsm_lncopy_${${float_char}GEMM_UNROLL_M}.c") - set(ZTRSMUTCOPY_M "generic/ztrsm_utcopy_${${float_char}GEMM_UNROLL_M}.c") - set(ZTRSMLTCOPY_M "generic/ztrsm_ltcopy_${${float_char}GEMM_UNROLL_M}.c") -else () - set(ZTRSMUNCOPY_M "${KERNELDIR}/${ZTRSMCOPYUN_M}") - set(ZTRSMLNCOPY_M "${KERNELDIR}/${ZTRSMCOPYLN_M}") - set(ZTRSMUTCOPY_M "${KERNELDIR}/${ZTRSMCOPYUT_M}") - set(ZTRSMLTCOPY_M "${KERNELDIR}/${ZTRSMCOPYLT_M}") -endif () - GenerateNamedObjects(${ZTRSMUNCOPY_M} "UNIT" "trsm_iunucopy" false "" "" false ${float_type}) - GenerateNamedObjects(${ZTRSMUNCOPY_M} "" "trsm_iunncopy" false "" "" false ${float_type}) - GenerateNamedObjects("generic/ztrsm_uncopy_${${float_char}GEMM_UNROLL_N}.c" "OUTER;UNIT" "trsm_ounucopy" false "" "" false ${float_type}) - GenerateNamedObjects("generic/ztrsm_uncopy_${${float_char}GEMM_UNROLL_N}.c" "OUTER" "trsm_ounncopy" false "" "" false ${float_type}) - - GenerateNamedObjects(${ZTRSMLNCOPY_M} "LOWER;UNIT" "trsm_ilnucopy" false "" "" false ${float_type}) - GenerateNamedObjects(${ZTRSMLNCOPY_M} "LOWER" "trsm_ilnncopy" false "" "" false ${float_type}) - GenerateNamedObjects("generic/ztrsm_lncopy_${${float_char}GEMM_UNROLL_N}.c" "OUTER;LOWER;UNIT" "trsm_olnucopy" false "" "" false ${float_type}) - GenerateNamedObjects("generic/ztrsm_lncopy_${${float_char}GEMM_UNROLL_N}.c" "OUTER;LOWER" "trsm_olnncopy" false "" "" false ${float_type}) - - GenerateNamedObjects(${ZTRSMUTCOPY_M} "UNIT" "trsm_iutucopy" false "" "" false ${float_type}) - GenerateNamedObjects(${ZTRSMUTCOPY_M} "" "trsm_iutncopy" false "" "" false ${float_type}) - GenerateNamedObjects("generic/ztrsm_utcopy_${${float_char}GEMM_UNROLL_N}.c" "OUTER;UNIT" "trsm_outucopy" false "" "" false ${float_type}) - GenerateNamedObjects("generic/ztrsm_utcopy_${${float_char}GEMM_UNROLL_N}.c" "OUTER" "trsm_outncopy" false "" "" false ${float_type}) - - GenerateNamedObjects(${ZTRSMLTCOPY_M} "LOWER;UNIT" "trsm_iltucopy" false "" "" false ${float_type}) - GenerateNamedObjects(${ZTRSMLTCOPY_M} "LOWER" "trsm_iltncopy" false "" "" false ${float_type}) - GenerateNamedObjects("generic/ztrsm_ltcopy_${${float_char}GEMM_UNROLL_N}.c" "OUTER;LOWER;UNIT" "trsm_oltucopy" false "" "" false ${float_type}) - GenerateNamedObjects("generic/ztrsm_ltcopy_${${float_char}GEMM_UNROLL_N}.c" "OUTER;LOWER" "trsm_oltncopy" false "" "" false ${float_type}) - - #gemm3m - if (USE_GEMM3M) - GenerateNamedObjects("${KERNELDIR}/${${float_char}GEMM3MKERNEL}" "NN" "gemm3m_kernel" false "" "" false ${float_type}) - - GenerateNamedObjects("generic/zgemm3m_ncopy_${${float_char}GEMM3M_UNROLL_N}.c" "USE_ALPHA" "gemm3m_oncopyb" false "" "" false ${float_type}) - GenerateNamedObjects("generic/zgemm3m_ncopy_${${float_char}GEMM3M_UNROLL_N}.c" "USE_ALPHA;REAL_ONLY" "gemm3m_oncopyr" false "" "" false ${float_type}) - GenerateNamedObjects("generic/zgemm3m_ncopy_${${float_char}GEMM3M_UNROLL_N}.c" "USE_ALPHA;IMAGE_ONLY" "gemm3m_oncopyi" false "" "" false ${float_type}) - - GenerateNamedObjects("generic/zgemm3m_tcopy_${${float_char}GEMM3M_UNROLL_N}.c" "USE_ALPHA" "gemm3m_otcopyb" false "" "" false ${float_type}) - GenerateNamedObjects("generic/zgemm3m_tcopy_${${float_char}GEMM3M_UNROLL_N}.c" "USE_ALPHA;REAL_ONLY" "gemm3m_otcopyr" false "" "" false ${float_type}) - GenerateNamedObjects("generic/zgemm3m_tcopy_${${float_char}GEMM3M_UNROLL_N}.c" "USE_ALPHA;IMAGE_ONLY" "gemm3m_otcopyi" false "" "" false ${float_type}) - - GenerateNamedObjects("generic/zgemm3m_ncopy_${${float_char}GEMM3M_UNROLL_M}.c" "ICOPY" "gemm3m_incopyb" false "" "" false ${float_type}) - GenerateNamedObjects("generic/zgemm3m_ncopy_${${float_char}GEMM3M_UNROLL_M}.c" "ICOPY;REAL_ONLY" "gemm3m_incopyr" false "" "" false ${float_type}) - GenerateNamedObjects("generic/zgemm3m_ncopy_${${float_char}GEMM3M_UNROLL_M}.c" "ICOPY;IMAGE_ONLY" "gemm3m_incopyi" false "" "" false ${float_type}) - - GenerateNamedObjects("generic/zgemm3m_tcopy_${${float_char}GEMM3M_UNROLL_M}.c" "ICOPY" "gemm3m_itcopyb" false "" "" false ${float_type}) - GenerateNamedObjects("generic/zgemm3m_tcopy_${${float_char}GEMM3M_UNROLL_M}.c" "ICOPY;REAL_ONLY" "gemm3m_itcopyr" false "" "" false ${float_type}) - GenerateNamedObjects("generic/zgemm3m_tcopy_${${float_char}GEMM3M_UNROLL_M}.c" "ICOPY;IMAGE_ONLY" "gemm3m_itcopyi" false "" "" false ${float_type}) - -#hemm3m and symm3m - foreach(name symm3m hemm3m) - GenerateNamedObjects("generic/z${name}_ucopy_${${float_char}GEMM3M_UNROLL_M}.c" "USE_ALPHA" "${name}_oucopyb.c" false "" "" false ${float_type}) - GenerateNamedObjects("generic/z${name}_ucopy_${${float_char}GEMM3M_UNROLL_M}.c" "USE_ALPHA;REAL_ONLY" "${name}_oucopyr.c" false "" "" false ${float_type}) - GenerateNamedObjects("generic/z${name}_ucopy_${${float_char}GEMM3M_UNROLL_M}.c" "USE_ALPHA;IMAGE_ONLY" "${name}_oucopyi.c" false "" "" false ${float_type}) - GenerateNamedObjects("generic/z${name}_lcopy_${${float_char}GEMM3M_UNROLL_M}.c" "USE_ALPHA" "${name}_olcopyb.c" false "" "" false ${float_type}) - GenerateNamedObjects("generic/z${name}_lcopy_${${float_char}GEMM3M_UNROLL_M}.c" "USE_ALPHA;REAL_ONLY" "${name}_olcopyr.c" false "" "" false ${float_type}) - GenerateNamedObjects("generic/z${name}_lcopy_${${float_char}GEMM3M_UNROLL_M}.c" "USE_ALPHA;IMAGE_ONLY" "${name}_olcopyi.c" false "" "" false ${float_type}) - GenerateNamedObjects("generic/z${name}_ucopy_${${float_char}GEMM3M_UNROLL_M}.c" "" "${name}_iucopyb.c" false "" "" false ${float_type}) - GenerateNamedObjects("generic/z${name}_ucopy_${${float_char}GEMM3M_UNROLL_M}.c" "REAL_ONLY" "${name}_iucopyr.c" false "" "" false ${float_type}) - GenerateNamedObjects("generic/z${name}_ucopy_${${float_char}GEMM3M_UNROLL_M}.c" "IMAGE_ONLY" "${name}_iucopyi.c" false "" "" false ${float_type}) - GenerateNamedObjects("generic/z${name}_lcopy_${${float_char}GEMM3M_UNROLL_M}.c" "" "${name}_ilcopyb.c" false "" "" false ${float_type}) - GenerateNamedObjects("generic/z${name}_lcopy_${${float_char}GEMM3M_UNROLL_M}.c" "REAL_ONLY" "${name}_ilcopyr.c" false "" "" false ${float_type}) - GenerateNamedObjects("generic/z${name}_lcopy_${${float_char}GEMM3M_UNROLL_M}.c" "IMAGE_ONLY" "${name}_ilcopyi.c" false "" "" false ${float_type}) - - endforeach () - - endif() - - else () #For real - GenerateCombinationObjects("${KERNELDIR}/${TRMM_KERNEL}" "LEFT;TRANSA" "R;N" "TRMMKERNEL" 2 "trmm_kernel" false ${float_type}) - - # symm for s and d -if (NOT DEFINED ${float_char}SYMMUCOPY_M) - set(SYMMUCOPY_M "generic/symm_ucopy_${${float_char}GEMM_UNROLL_M}.c") - set(SYMMLCOPY_M "generic/symm_lcopy_${${float_char}GEMM_UNROLL_M}.c") -else () - set(SYMMUCOPY_M "${KERNELDIR}/${${float_char}SYMMUCOPY_M}") - set(SYMMLCOPY_M "${KERNELDIR}/${${float_char}SYMMLCOPY_M}") -endif() - GenerateNamedObjects("generic/symm_ucopy_${${float_char}GEMM_UNROLL_N}.c" "OUTER" "symm_outcopy" false "" "" false ${float_type}) - GenerateNamedObjects(${SYMMUCOPY_M} "" "symm_iutcopy" false "" "" false ${float_type}) - - GenerateNamedObjects("generic/symm_lcopy_${${float_char}GEMM_UNROLL_N}.c" "LOWER;OUTER" "symm_oltcopy" false "" "" false ${float_type}) - GenerateNamedObjects(${SYMMLCOPY_M} "LOWER" "symm_iltcopy" false "" "" false ${float_type}) - - # These don't use a scheme that is easy to iterate over - the filenames have part of the DEFINE codes in them, for UPPER/TRANS but not for UNIT/OUTER. Also TRANS is not passed in as a define. - # Could simplify it a bit by pairing up by -UUNIT/-DUNIT. - -if (NOT DEFINED ${float_char}TRMMUNCOPY_M) - set(TRMMUNCOPY_M "generic/trmm_uncopy_${${float_char}GEMM_UNROLL_M}.c") - set(TRMMLNCOPY_M "generic/trmm_lncopy_${${float_char}GEMM_UNROLL_M}.c") - set(TRMMUTCOPY_M "generic/trmm_utcopy_${${float_char}GEMM_UNROLL_M}.c") - set(TRMMLTCOPY_M "generic/trmm_ltcopy_${${float_char}GEMM_UNROLL_M}.c") -else () - set(TRMMUNCOPY_M "${KERNELDIR}/${${float_char}TRMMUNCOPY_M}") - set(TRMMLNCOPY_M "${KERNELDIR}/${${float_char}TRMMLNCOPY_M}") - set(TRMMUTCOPY_M "${KERNELDIR}/${${float_char}TRMMUTCOPY_M}") - set(TRMMLTCOPY_M "${KERNELDIR}/${${float_char}TRMMLTCOPY_M}") -endif () - GenerateNamedObjects(${TRMMUNCOPY_M} "UNIT" "trmm_iunucopy" false "" "" false ${float_type}) - GenerateNamedObjects(${TRMMUNCOPY_M} "" "trmm_iunncopy" false "" "" false ${float_type}) - GenerateNamedObjects("generic/trmm_uncopy_${${float_char}GEMM_UNROLL_N}.c" "OUTER;UNIT" "trmm_ounucopy" false "" "" false ${float_type}) - GenerateNamedObjects("generic/trmm_uncopy_${${float_char}GEMM_UNROLL_N}.c" "OUTER" "trmm_ounncopy" false "" "" false ${float_type}) - - GenerateNamedObjects(${TRMMLNCOPY_M} "LOWER;UNIT" "trmm_ilnucopy" false "" "" false ${float_type}) - GenerateNamedObjects(${TRMMLNCOPY_M} "LOWER" "trmm_ilnncopy" false "" "" false ${float_type}) - GenerateNamedObjects("generic/trmm_lncopy_${${float_char}GEMM_UNROLL_N}.c" "OUTER;LOWER;UNIT" "trmm_olnucopy" false "" "" false ${float_type}) - GenerateNamedObjects("generic/trmm_lncopy_${${float_char}GEMM_UNROLL_N}.c" "OUTER;LOWER" "trmm_olnncopy" false "" "" false ${float_type}) - - GenerateNamedObjects(${TRMMUTCOPY_M} "UNIT" "trmm_iutucopy" false "" "" false ${float_type}) - GenerateNamedObjects(${TRMMUTCOPY_M} "" "trmm_iutncopy" false "" "" false ${float_type}) - GenerateNamedObjects("generic/trmm_utcopy_${${float_char}GEMM_UNROLL_N}.c" "OUTER;UNIT" "trmm_outucopy" false "" "" false ${float_type}) - GenerateNamedObjects("generic/trmm_utcopy_${${float_char}GEMM_UNROLL_N}.c" "OUTER" "trmm_outncopy" false "" "" false ${float_type}) - - GenerateNamedObjects(${TRMMLTCOPY_M} "LOWER;UNIT" "trmm_iltucopy" false "" "" false ${float_type}) - GenerateNamedObjects(${TRMMLTCOPY_M} "LOWER" "trmm_iltncopy" false "" "" false ${float_type}) - GenerateNamedObjects("generic/trmm_ltcopy_${${float_char}GEMM_UNROLL_N}.c" "OUTER;LOWER;UNIT" "trmm_oltucopy" false "" "" false ${float_type}) - GenerateNamedObjects("generic/trmm_ltcopy_${${float_char}GEMM_UNROLL_N}.c" "OUTER;LOWER" "trmm_oltncopy" false "" "" false ${float_type}) - - -if (NOT DEFINED TRSMCOPYLN_M) - set(TRSMUNCOPY_M "generic/trsm_uncopy_${${float_char}GEMM_UNROLL_M}.c") - set(TRSMLNCOPY_M "generic/trsm_lncopy_${${float_char}GEMM_UNROLL_M}.c") - set(TRSMUTCOPY_M "generic/trsm_utcopy_${${float_char}GEMM_UNROLL_M}.c") - set(TRSMLTCOPY_M "generic/trsm_ltcopy_${${float_char}GEMM_UNROLL_M}.c") -else () - set(TRSMUNCOPY_M "${KERNELDIR}/${TRSMCOPYUN_M}") - set(TRSMLNCOPY_M "${KERNELDIR}/${TRSMCOPYLN_M}") - set(TRSMUTCOPY_M "${KERNELDIR}/${TRSMCOPYUT_M}") - set(TRSMLTCOPY_M "${KERNELDIR}/${TRSMCOPYLT_M}") -endif () - GenerateNamedObjects(${TRSMUNCOPY_M} "UNIT" "trsm_iunucopy" false "" "" false ${float_type}) - GenerateNamedObjects(${TRSMUNCOPY_M} "" "trsm_iunncopy" false "" "" false ${float_type}) - GenerateNamedObjects("generic/trsm_uncopy_${${float_char}GEMM_UNROLL_N}.c" "OUTER;UNIT" "trsm_ounucopy" false "" "" false ${float_type}) - GenerateNamedObjects("generic/trsm_uncopy_${${float_char}GEMM_UNROLL_N}.c" "OUTER" "trsm_ounncopy" false "" "" false ${float_type}) - - GenerateNamedObjects(${TRSMLNCOPY_M} "LOWER;UNIT" "trsm_ilnucopy" false "" "" false ${float_type}) - GenerateNamedObjects(${TRSMLNCOPY_M} "LOWER" "trsm_ilnncopy" false "" "" false ${float_type}) - GenerateNamedObjects("generic/trsm_lncopy_${${float_char}GEMM_UNROLL_N}.c" "OUTER;LOWER;UNIT" "trsm_olnucopy" false "" "" false ${float_type}) - GenerateNamedObjects("generic/trsm_lncopy_${${float_char}GEMM_UNROLL_N}.c" "OUTER;LOWER" "trsm_olnncopy" false "" "" false ${float_type}) - - GenerateNamedObjects(${TRSMUTCOPY_M} "UNIT" "trsm_iutucopy" false "" "" false ${float_type}) - GenerateNamedObjects(${TRSMUTCOPY_M} "" "trsm_iutncopy" false "" "" false ${float_type}) - GenerateNamedObjects("generic/trsm_utcopy_${${float_char}GEMM_UNROLL_N}.c" "OUTER;UNIT" "trsm_outucopy" false "" "" false ${float_type}) - GenerateNamedObjects("generic/trsm_utcopy_${${float_char}GEMM_UNROLL_N}.c" "OUTER" "trsm_outncopy" false "" "" false ${float_type}) - - GenerateNamedObjects(${TRSMLTCOPY_M} "LOWER;UNIT" "trsm_iltucopy" false "" "" false ${float_type}) - GenerateNamedObjects(${TRSMLTCOPY_M} "LOWER" "trsm_iltncopy" false "" "" false ${float_type}) - GenerateNamedObjects("generic/trsm_ltcopy_${${float_char}GEMM_UNROLL_N}.c" "OUTER;LOWER;UNIT" "trsm_oltucopy" false "" "" false ${float_type}) - GenerateNamedObjects("generic/trsm_ltcopy_${${float_char}GEMM_UNROLL_N}.c" "OUTER;LOWER" "trsm_oltncopy" false "" "" false ${float_type}) - - endif () - - GenerateNamedObjects("${KERNELDIR}/${${float_char}TRSMKERNEL_LN}" "UPPER;LN;TRSMKERNEL" "trsm_kernel_LN" false "" "" false ${float_type}) - GenerateNamedObjects("${KERNELDIR}/${${float_char}TRSMKERNEL_LT}" "LT;TRSMKERNEL" "trsm_kernel_LT" false "" "" false ${float_type}) - GenerateNamedObjects("${KERNELDIR}/${${float_char}TRSMKERNEL_RN}" "UPPER;RN;TRSMKERNEL" "trsm_kernel_RN" false "" "" false ${float_type}) - GenerateNamedObjects("${KERNELDIR}/${${float_char}TRSMKERNEL_RT}" "RT;TRSMKERNEL" "trsm_kernel_RT" false "" "" false ${float_type}) - - if (NOT DEFINED ${float_char}GEMM_SMALL_M_PERMIT) - if (${float_char} STREQUAL "Z" OR ${float_char} STREQUAL "C") - set(${float_char}GEMM_SMALL_M_PERMIT ../generic/zgemm_small_matrix_permit.c) - else () - set(${float_char}GEMM_SMALL_M_PERMIT ../generic/gemm_small_matrix_permit.c) - endif () - endif () - if (NOT DEFINED ${float_char}GEMM_SMALL_K_NN) - if (${float_char} STREQUAL "Z" OR ${float_char} STREQUAL "C") - set(${float_char}GEMM_SMALL_K_NN ../generic/zgemm_small_matrix_kernel_nn.c) - else () - set(${float_char}GEMM_SMALL_K_NN ../generic/gemm_small_matrix_kernel_nn.c) - endif () - endif () - if (NOT DEFINED ${float_char}GEMM_SMALL_K_NT) - if (${float_char} STREQUAL "Z" OR ${float_char} STREQUAL "C") - set(${float_char}GEMM_SMALL_K_NT ../generic/zgemm_small_matrix_kernel_nt.c) - else () - set(${float_char}GEMM_SMALL_K_NT ../generic/gemm_small_matrix_kernel_nt.c) - endif () - endif () - if (NOT DEFINED ${float_char}GEMM_SMALL_K_TN) - if (${float_char} STREQUAL "Z" OR ${float_char} STREQUAL "C") - set(${float_char}GEMM_SMALL_K_TN ../generic/zgemm_small_matrix_kernel_tn.c) - else () - set(${float_char}GEMM_SMALL_K_TN ../generic/gemm_small_matrix_kernel_tn.c) - endif () - endif () - if (NOT DEFINED ${float_char}GEMM_SMALL_K_TT) - if (${float_char} STREQUAL "Z" OR ${float_char} STREQUAL "C") - set(${float_char}GEMM_SMALL_K_TT ../generic/zgemm_small_matrix_kernel_tt.c) - else () - set(${float_char}GEMM_SMALL_K_TT ../generic/gemm_small_matrix_kernel_tt.c) - endif () - endif () - if (NOT DEFINED ${float_char}GEMM_SMALL_K_B0_NN) - if (${float_char} STREQUAL "Z" OR ${float_char} STREQUAL "C") - set(${float_char}GEMM_SMALL_K_B0_NN ../generic/zgemm_small_matrix_kernel_nn.c) - else () - set(${float_char}GEMM_SMALL_K_B0_NN ../generic/gemm_small_matrix_kernel_nn.c) - endif () - endif () - if (NOT DEFINED ${float_char}GEMM_SMALL_K_B0_NT) - if (${float_char} STREQUAL "Z" OR ${float_char} STREQUAL "C") - set(${float_char}GEMM_SMALL_K_B0_NT ../generic/zgemm_small_matrix_kernel_nt.c) - else () - set(${float_char}GEMM_SMALL_K_B0_NT ../generic/gemm_small_matrix_kernel_nt.c) - endif () - endif () - if (NOT DEFINED ${float_char}GEMM_SMALL_K_B0_TN) - if (${float_char} STREQUAL "Z" OR ${float_char} STREQUAL "C") - set(${float_char}GEMM_SMALL_K_B0_TN ../generic/zgemm_small_matrix_kernel_tn.c) - else () - set(${float_char}GEMM_SMALL_K_B0_TN ../generic/gemm_small_matrix_kernel_tn.c) - endif () - endif () - if (NOT DEFINED ${float_char}GEMM_SMALL_K_B0_TT) - if (${float_char} STREQUAL "Z" OR ${float_char} STREQUAL "C") - set(${float_char}GEMM_SMALL_K_B0_TT ../generic/zgemm_small_matrix_kernel_tt.c) - else () - set(${float_char}GEMM_SMALL_K_B0_TT ../generic/gemm_small_matrix_kernel_tt.c) - endif () - endif () - - if (SMALL_MATRIX_OPT) - GenerateNamedObjects("${KERNELDIR}/${${float_char}GEMM_SMALL_M_PERMIT}" "" "gemm_small_matrix_permit" false "" "" false ${float_type}) - if (${float_char} STREQUAL "Z" OR ${float_char} STREQUAL "C") - GenerateNamedObjects("${KERNELDIR}/${${float_char}GEMM_SMALL_K_NN}" "NN" "gemm_small_kernel_nn" false "" "" false ${float_type}) - GenerateNamedObjects("${KERNELDIR}/${${float_char}GEMM_SMALL_K_NN}" "NR" "gemm_small_kernel_nr" false "" "" false ${float_type}) - GenerateNamedObjects("${KERNELDIR}/${${float_char}GEMM_SMALL_K_NN}" "RN" "gemm_small_kernel_rn" false "" "" false ${float_type}) - GenerateNamedObjects("${KERNELDIR}/${${float_char}GEMM_SMALL_K_NN}" "RR" "gemm_small_kernel_rr" false "" "" false ${float_type}) - GenerateNamedObjects("${KERNELDIR}/${${float_char}GEMM_SMALL_K_NT}" "NT" "gemm_small_kernel_nt" false "" "" false ${float_type}) - GenerateNamedObjects("${KERNELDIR}/${${float_char}GEMM_SMALL_K_NT}" "NC" "gemm_small_kernel_nc" false "" "" false ${float_type}) - GenerateNamedObjects("${KERNELDIR}/${${float_char}GEMM_SMALL_K_NT}" "RT" "gemm_small_kernel_rt" false "" "" false ${float_type}) - GenerateNamedObjects("${KERNELDIR}/${${float_char}GEMM_SMALL_K_NT}" "RC" "gemm_small_kernel_rc" false "" "" false ${float_type}) - GenerateNamedObjects("${KERNELDIR}/${${float_char}GEMM_SMALL_K_TN}" "TN" "gemm_small_kernel_tn" false "" "" false ${float_type}) - GenerateNamedObjects("${KERNELDIR}/${${float_char}GEMM_SMALL_K_TN}" "TR" "gemm_small_kernel_tr" false "" "" false ${float_type}) - GenerateNamedObjects("${KERNELDIR}/${${float_char}GEMM_SMALL_K_TN}" "CN" "gemm_small_kernel_cn" false "" "" false ${float_type}) - GenerateNamedObjects("${KERNELDIR}/${${float_char}GEMM_SMALL_K_TN}" "CR" "gemm_small_kernel_cr" false "" "" false ${float_type}) - GenerateNamedObjects("${KERNELDIR}/${${float_char}GEMM_SMALL_K_TT}" "TT" "gemm_small_kernel_tt" false "" "" false ${float_type}) - GenerateNamedObjects("${KERNELDIR}/${${float_char}GEMM_SMALL_K_TT}" "TC" "gemm_small_kernel_tc" false "" "" false ${float_type}) - GenerateNamedObjects("${KERNELDIR}/${${float_char}GEMM_SMALL_K_TT}" "CT" "gemm_small_kernel_ct" false "" "" false ${float_type}) - GenerateNamedObjects("${KERNELDIR}/${${float_char}GEMM_SMALL_K_TT}" "CC" "gemm_small_kernel_cc" false "" "" false ${float_type}) - GenerateNamedObjects("${KERNELDIR}/${${float_char}GEMM_SMALL_K_B0_NN}" "NN;B0" "gemm_small_kernel_b0_nn" false "" "" false ${float_type}) - GenerateNamedObjects("${KERNELDIR}/${${float_char}GEMM_SMALL_K_B0_NN}" "NR;B0" "gemm_small_kernel_b0_nr" false "" "" false ${float_type}) - GenerateNamedObjects("${KERNELDIR}/${${float_char}GEMM_SMALL_K_B0_NN}" "RN;B0" "gemm_small_kernel_b0_rn" false "" "" false ${float_type}) - GenerateNamedObjects("${KERNELDIR}/${${float_char}GEMM_SMALL_K_B0_NN}" "RR;B0" "gemm_small_kernel_b0_rr" false "" "" false ${float_type}) - GenerateNamedObjects("${KERNELDIR}/${${float_char}GEMM_SMALL_K_B0_NT}" "NT;B0" "gemm_small_kernel_b0_nt" false "" "" false ${float_type}) - GenerateNamedObjects("${KERNELDIR}/${${float_char}GEMM_SMALL_K_B0_NT}" "NC;B0" "gemm_small_kernel_b0_nc" false "" "" false ${float_type}) - GenerateNamedObjects("${KERNELDIR}/${${float_char}GEMM_SMALL_K_B0_NT}" "RT;B0" "gemm_small_kernel_b0_rt" false "" "" false ${float_type}) - GenerateNamedObjects("${KERNELDIR}/${${float_char}GEMM_SMALL_K_B0_NT}" "RC;B0" "gemm_small_kernel_b0_rc" false "" "" false ${float_type}) - GenerateNamedObjects("${KERNELDIR}/${${float_char}GEMM_SMALL_K_B0_TN}" "TN;B0" "gemm_small_kernel_b0_tn" false "" "" false ${float_type}) - GenerateNamedObjects("${KERNELDIR}/${${float_char}GEMM_SMALL_K_B0_TN}" "TR;B0" "gemm_small_kernel_b0_tr" false "" "" false ${float_type}) - GenerateNamedObjects("${KERNELDIR}/${${float_char}GEMM_SMALL_K_B0_TN}" "CN;B0" "gemm_small_kernel_b0_cn" false "" "" false ${float_type}) - GenerateNamedObjects("${KERNELDIR}/${${float_char}GEMM_SMALL_K_B0_TN}" "CR;B0" "gemm_small_kernel_b0_cr" false "" "" false ${float_type}) - GenerateNamedObjects("${KERNELDIR}/${${float_char}GEMM_SMALL_K_B0_TT}" "TT;B0" "gemm_small_kernel_b0_tt" false "" "" false ${float_type}) - GenerateNamedObjects("${KERNELDIR}/${${float_char}GEMM_SMALL_K_B0_TT}" "TC;B0" "gemm_small_kernel_b0_tc" false "" "" false ${float_type}) - GenerateNamedObjects("${KERNELDIR}/${${float_char}GEMM_SMALL_K_B0_TT}" "CT;B0" "gemm_small_kernel_b0_ct" false "" "" false ${float_type}) - GenerateNamedObjects("${KERNELDIR}/${${float_char}GEMM_SMALL_K_B0_TT}" "CC;B0" "gemm_small_kernel_b0_cc" false "" "" false ${float_type}) - - else () - GenerateNamedObjects("${KERNELDIR}/${${float_char}GEMM_SMALL_K_NN}" "" "gemm_small_kernel_nn" false "" "" false ${float_type}) - GenerateNamedObjects("${KERNELDIR}/${${float_char}GEMM_SMALL_K_NT}" "" "gemm_small_kernel_nt" false "" "" false ${float_type}) - GenerateNamedObjects("${KERNELDIR}/${${float_char}GEMM_SMALL_K_TN}" "" "gemm_small_kernel_tn" false "" "" false ${float_type}) - GenerateNamedObjects("${KERNELDIR}/${${float_char}GEMM_SMALL_K_TT}" "" "gemm_small_kernel_tt" false "" "" false ${float_type}) - GenerateNamedObjects("${KERNELDIR}/${${float_char}GEMM_SMALL_K_B0_NN}" "B0" "gemm_small_kernel_b0_nn" false "" "" false ${float_type}) - GenerateNamedObjects("${KERNELDIR}/${${float_char}GEMM_SMALL_K_B0_NT}" "B0" "gemm_small_kernel_b0_nt" false "" "" false ${float_type}) - GenerateNamedObjects("${KERNELDIR}/${${float_char}GEMM_SMALL_K_B0_TN}" "B0" "gemm_small_kernel_b0_tn" false "" "" false ${float_type}) - GenerateNamedObjects("${KERNELDIR}/${${float_char}GEMM_SMALL_K_B0_TT}" "B0" "gemm_small_kernel_b0_tt" false "" "" false ${float_type}) - endif () - if (BUILD_BFLOAT16) - if (NOT DEFINED SBGEMM_SMALL_M_PERMIT) - set(SBGEMM_SMALL_M_PERMIT ../generic/gemm_small_matrix_permit.c) - endif () - if (NOT DEFINED SBGEMM_SMALL_K_NN) - set(SBGEMM_SMALL_K_NN ../generic/gemm_small_matrix_kernel_nn.c) - endif () - if (NOT DEFINED SBGEMM_SMALL_K_NT) - set(SBGEMM_SMALL_K_NT ../generic/gemm_small_matrix_kernel_nt.c) - endif () - if (NOT DEFINED SBGEMM_SMALL_K_TN) - set(SBGEMM_SMALL_K_TN ../generic/gemm_small_matrix_kernel_tn.c) - endif () - if (NOT DEFINED SBGEMM_SMALL_K_TT) - set(SBGEMM_SMALL_K_TT ../generic/gemm_small_matrix_kernel_tt.c) - endif () - if (NOT DEFINED SBGEMM_SMALL_K_B0_NN) - set(SBGEMM_SMALL_K_B0_NN ../generic/gemm_small_matrix_kernel_nn.c) - endif () - if (NOT DEFINED SBGEMM_SMALL_K_B0_NT) - set(SBGEMM_SMALL_K_B0_NT ../generic/gemm_small_matrix_kernel_nt.c) - endif () - if (NOT DEFINED SBGEMM_SMALL_K_B0_TN) - set(SBGEMM_SMALL_K_B0_TN ../generic/gemm_small_matrix_kernel_tn.c) - endif () - if (NOT DEFINED SBGEMM_SMALL_K_B0_TT) - set(SBGEMM_SMALL_K_B0_TT ../generic/gemm_small_matrix_kernel_tt.c) - endif () - GenerateNamedObjects("${KERNELDIR}/${SBGEMM_SMALL_M_PERMIT}" "" "gemm_small_matrix_permit" false "" "" false "BFLOAT16") - GenerateNamedObjects("${KERNELDIR}/${SBGEMM_SMALL_K_NN}" "" "gemm_small_kernel_nn" false "" "" false "BFLOAT16") - GenerateNamedObjects("${KERNELDIR}/${SBGEMM_SMALL_K_NT}" "" "gemm_small_kernel_nt" false "" "" false "BFLOAT16") - GenerateNamedObjects("${KERNELDIR}/${SBGEMM_SMALL_K_TN}" "" "gemm_small_kernel_tn" false "" "" false "BFLOAT16") - GenerateNamedObjects("${KERNELDIR}/${SBGEMM_SMALL_K_TT}" "" "gemm_small_kernel_tt" false "" "" false "BFLOAT16") - GenerateNamedObjects("${KERNELDIR}/${SBGEMM_SMALL_K_B0_NN}" "B0" "gemm_small_kernel_b0_nn" false "" "" false "BFLOAT16") - GenerateNamedObjects("${KERNELDIR}/${SBGEMM_SMALL_K_B0_NT}" "B0" "gemm_small_kernel_b0_nt" false "" "" false "BFLOAT16") - GenerateNamedObjects("${KERNELDIR}/${SBGEMM_SMALL_K_B0_TN}" "B0" "gemm_small_kernel_b0_tn" false "" "" false "BFLOAT16") - GenerateNamedObjects("${KERNELDIR}/${SBGEMM_SMALL_K_B0_TT}" "B0" "gemm_small_kernel_b0_tt" false "" "" false "BFLOAT16") - endif () - - if (BUILD_HFLOAT16) - if (NOT DEFINED SHGEMM_SMALL_M_PERMIT) - set(SHGEMM_SMALL_M_PERMIT ../generic/gemm_small_matrix_permit.c) - endif () - if (NOT DEFINED SHGEMM_SMALL_K_NN) - set(SHGEMM_SMALL_K_NN ../generic/gemm_small_matrix_kernel_nn.c) - endif () - if (NOT DEFINED SHGEMM_SMALL_K_NT) - set(SHGEMM_SMALL_K_NT ../generic/gemm_small_matrix_kernel_nt.c) - endif () - if (NOT DEFINED SHGEMM_SMALL_K_TN) - set(SHGEMM_SMALL_K_TN ../generic/gemm_small_matrix_kernel_tn.c) - endif () - if (NOT DEFINED SHGEMM_SMALL_K_TT) - set(SHGEMM_SMALL_K_TT ../generic/gemm_small_matrix_kernel_tt.c) - endif () - if (NOT DEFINED SHGEMM_SMALL_K_B0_NN) - set(SHGEMM_SMALL_K_B0_NN ../generic/gemm_small_matrix_kernel_nn.c) - endif () - if (NOT DEFINED SHGEMM_SMALL_K_B0_NT) - set(SHGEMM_SMALL_K_B0_NT ../generic/gemm_small_matrix_kernel_nt.c) - endif () - if (NOT DEFINED SHGEMM_SMALL_K_B0_TN) - set(SHGEMM_SMALL_K_B0_TN ../generic/gemm_small_matrix_kernel_tn.c) - endif () - if (NOT DEFINED SHGEMM_SMALL_K_B0_TT) - set(SHGEMM_SMALL_K_B0_TT ../generic/gemm_small_matrix_kernel_tt.c) - endif () - GenerateNamedObjects("${KERNELDIR}/${SHGEMM_SMALL_M_PERMIT}" "" "gemm_small_matrix_permit" false "" "" false "HFLOAT16") - GenerateNamedObjects("${KERNELDIR}/${SHGEMM_SMALL_K_NN}" "" "gemm_small_kernel_nn" false "" "" false "HFLOAT16") - GenerateNamedObjects("${KERNELDIR}/${SHGEMM_SMALL_K_NT}" "" "gemm_small_kernel_nt" false "" "" false "HFLOAT16") - GenerateNamedObjects("${KERNELDIR}/${SHGEMM_SMALL_K_TN}" "" "gemm_small_kernel_tn" false "" "" false "HFLOAT16") - GenerateNamedObjects("${KERNELDIR}/${SHGEMM_SMALL_K_TT}" "" "gemm_small_kernel_tt" false "" "" false "HFLOAT16") - GenerateNamedObjects("${KERNELDIR}/${SHGEMM_SMALL_K_B0_NN}" "B0" "gemm_small_kernel_b0_nn" false "" "" false "HFLOAT16") - GenerateNamedObjects("${KERNELDIR}/${SHGEMM_SMALL_K_B0_NT}" "B0" "gemm_small_kernel_b0_nt" false "" "" false "HFLOAT16") - GenerateNamedObjects("${KERNELDIR}/${SHGEMM_SMALL_K_B0_TN}" "B0" "gemm_small_kernel_b0_tn" false "" "" false "HFLOAT16") - GenerateNamedObjects("${KERNELDIR}/${SHGEMM_SMALL_K_B0_TT}" "B0" "gemm_small_kernel_b0_tt" false "" "" false "HFLOAT16") - endif () - endif () - - if (NOT DEFINED ${float_char}OMATCOPY_CN) - if (${float_char} STREQUAL "Z" OR ${float_char} STREQUAL "C") - set(${float_char}OMATCOPY_CN ../arm/zomatcopy_cn.c) - else () - set(${float_char}OMATCOPY_CN ../arm/omatcopy_cn.c) - endif () - endif () - if (NOT DEFINED ${float_char}OMATCOPY_RN) - if (${float_char} STREQUAL "Z" OR ${float_char} STREQUAL "C") - set(${float_char}OMATCOPY_RN ../arm/zomatcopy_rn.c) - else () - set(${float_char}OMATCOPY_RN ../arm/omatcopy_rn.c) - endif () - endif () - if (NOT DEFINED ${float_char}OMATCOPY_CT) - if (${float_char} STREQUAL "Z" OR ${float_char} STREQUAL "C") - set(${float_char}OMATCOPY_CT ../arm/zomatcopy_ct.c) - else () - set(${float_char}OMATCOPY_CT ../arm/omatcopy_ct.c) - endif () - endif () - if (NOT DEFINED ${float_char}OMATCOPY_RT) - if (${float_char} STREQUAL "Z" OR ${float_char} STREQUAL "C") - set(${float_char}OMATCOPY_RT ../arm/zomatcopy_rt.c) - else () - set(${float_char}OMATCOPY_RT ../arm/omatcopy_rt.c) - endif () - endif () - - GenerateNamedObjects("${KERNELDIR}/${${float_char}OMATCOPY_CN}" "" "omatcopy_k_cn" false "" "" false ${float_type}) - GenerateNamedObjects("${KERNELDIR}/${${float_char}OMATCOPY_RN}" "ROWM" "omatcopy_k_rn" false "" "" false ${float_type}) - GenerateNamedObjects("${KERNELDIR}/${${float_char}OMATCOPY_CT}" "" "omatcopy_k_ct" false "" "" false ${float_type}) - GenerateNamedObjects("${KERNELDIR}/${${float_char}OMATCOPY_RT}" "ROWM" "omatcopy_k_rt" false "" "" false ${float_type}) - - if (NOT DEFINED ${float_char}OMATCOPY_CNC) - if (${float_char} STREQUAL "Z" OR ${float_char} STREQUAL "C") - set(${float_char}OMATCOPY_CNC ../arm/zomatcopy_cnc.c) - endif () - endif () - if (NOT DEFINED ${float_char}OMATCOPY_RNC) - if (${float_char} STREQUAL "Z" OR ${float_char} STREQUAL "C") - set(${float_char}OMATCOPY_RNC ../arm/zomatcopy_rnc.c) - endif () - endif () - if (NOT DEFINED ${float_char}OMATCOPY_CTC) - if (${float_char} STREQUAL "Z" OR ${float_char} STREQUAL "C") - set(${float_char}OMATCOPY_CTC ../arm/zomatcopy_ctc.c) - endif () - endif () - if (NOT DEFINED ${float_char}OMATCOPY_RTC) - if (${float_char} STREQUAL "Z" OR ${float_char} STREQUAL "C") - set(${float_char}OMATCOPY_RTC ../arm/zomatcopy_rtc.c) - endif () - endif () - - if (${float_char} STREQUAL "Z" OR ${float_char} STREQUAL "C") - GenerateNamedObjects("${KERNELDIR}/${${float_char}OMATCOPY_CNC}" "CONJ" "omatcopy_k_cnc" false "" "" false ${float_type}) - GenerateNamedObjects("${KERNELDIR}/${${float_char}OMATCOPY_RNC}" "CONJ;ROWM" "omatcopy_k_rnc" false "" "" false ${float_type}) - GenerateNamedObjects("${KERNELDIR}/${${float_char}OMATCOPY_CTC}" "CONJ" "omatcopy_k_ctc" false "" "" false ${float_type}) - GenerateNamedObjects("${KERNELDIR}/${${float_char}OMATCOPY_RTC}" "CONJ;ROWM" "omatcopy_k_rtc" false "" "" false ${float_type}) - endif() - - #imatcopy - if (NOT DEFINED ${float_char}IMATCOPY_CN) - if (${float_char} STREQUAL "Z" OR ${float_char} STREQUAL "C") - set(${float_char}IMATCOPY_CN ../generic/zimatcopy_cn.c) - else () - set(${float_char}IMATCOPY_CN ../generic/imatcopy_cn.c) - endif () - endif () - - if (NOT DEFINED ${float_char}IMATCOPY_RN) - if (${float_char} STREQUAL "Z" OR ${float_char} STREQUAL "C") - set(${float_char}IMATCOPY_RN ../generic/zimatcopy_rn.c) - else () - set(${float_char}IMATCOPY_RN ../generic/imatcopy_rn.c) - endif () - endif () - - if (NOT DEFINED ${float_char}IMATCOPY_CT) - if (${float_char} STREQUAL "Z" OR ${float_char} STREQUAL "C") - set(${float_char}IMATCOPY_CT ../generic/zimatcopy_ct.c) - else () - set(${float_char}IMATCOPY_CT ../generic/imatcopy_ct.c) - endif () - endif () - - if (NOT DEFINED ${float_char}IMATCOPY_RT) - if (${float_char} STREQUAL "Z" OR ${float_char} STREQUAL "C") - set(${float_char}IMATCOPY_RT ../generic/zimatcopy_rt.c) - else () - set(${float_char}IMATCOPY_RT ../generic/imatcopy_rt.c) - endif () - endif () - - GenerateNamedObjects("${KERNELDIR}/${${float_char}IMATCOPY_CN}" "" "imatcopy_k_cn" false "" "" false ${float_type}) - GenerateNamedObjects("${KERNELDIR}/${${float_char}IMATCOPY_RN}" "ROWM" "imatcopy_k_rn" false "" "" false ${float_type}) - GenerateNamedObjects("${KERNELDIR}/${${float_char}IMATCOPY_CT}" "" "imatcopy_k_ct" false "" "" false ${float_type}) - GenerateNamedObjects("${KERNELDIR}/${${float_char}IMATCOPY_RT}" "ROWM" "imatcopy_k_rt" false "" "" false ${float_type}) - - - if (NOT DEFINED ${float_char}IMATCOPY_CNC) - if (${float_char} STREQUAL "Z" OR ${float_char} STREQUAL "C") - set(${float_char}IMATCOPY_CNC ../generic/zimatcopy_cnc.c) - endif () - endif () - if (NOT DEFINED ${float_char}IMATCOPY_RNC) - if (${float_char} STREQUAL "Z" OR ${float_char} STREQUAL "C") - set(${float_char}IMATCOPY_RNC ../generic/zimatcopy_rnc.c) - endif () - endif () - if (NOT DEFINED ${float_char}IMATCOPY_CTC) - if (${float_char} STREQUAL "Z" OR ${float_char} STREQUAL "C") - set(${float_char}IMATCOPY_CTC ../generic/zimatcopy_ctc.c) - endif () - endif () - if (NOT DEFINED ${float_char}IMATCOPY_RTC) - if (${float_char} STREQUAL "Z" OR ${float_char} STREQUAL "C") - set(${float_char}IMATCOPY_RTC ../generic/zimatcopy_rtc.c) - endif () - endif () - - if (${float_char} STREQUAL "Z" OR ${float_char} STREQUAL "C") - GenerateNamedObjects("${KERNELDIR}/${${float_char}IMATCOPY_CNC}" "CONJ" "imatcopy_k_cnc" false "" "" false ${float_type}) - GenerateNamedObjects("${KERNELDIR}/${${float_char}IMATCOPY_RNC}" "CONJ;ROWM" "imatcopy_k_rnc" false "" "" false ${float_type}) - GenerateNamedObjects("${KERNELDIR}/${${float_char}IMATCOPY_CTC}" "CONJ" "imatcopy_k_ctc" false "" "" false ${float_type}) - GenerateNamedObjects("${KERNELDIR}/${${float_char}IMATCOPY_RTC}" "CONJ;ROWM" "imatcopy_k_rtc" false "" "" false ${float_type}) - endif() - - #geadd - GenerateNamedObjects("${KERNELDIR}/${${float_char}GEADD_KERNEL}" "" "geadd_k" false "" "" false ${float_type}) - endforeach () - - if ((BUILD_DOUBLE OR BUILD_COMPLEX) AND NOT BUILD_SINGLE) - GenerateNamedObjects("${KERNELDIR}/${STRSMKERNEL_LN}" "UPPER;LN;TRSMKERNEL" "trsm_kernel_LN" false "" "" false "SINGLE") - GenerateNamedObjects("${KERNELDIR}/${STRSMKERNEL_LT}" "LT;TRSMKERNEL" "trsm_kernel_LT" false "" "" false "SINGLE") - GenerateNamedObjects("${KERNELDIR}/${STRSMKERNEL_RN}" "UPPER;RN;TRSMKERNEL" "trsm_kernel_RN" false "" "" false "SINGLE") - GenerateNamedObjects("${KERNELDIR}/${STRSMKERNEL_RT}" "RT;TRSMKERNEL" "trsm_kernel_RT" false "" "" false "SINGLE") - GenerateNamedObjects("generic/trsm_uncopy_${SGEMM_UNROLL_M}.c" "UNIT" "trsm_iunucopy" false "" "" false "SINGLE") - GenerateNamedObjects("generic/trsm_uncopy_${SGEMM_UNROLL_M}.c" "" "trsm_iunncopy" false "" "" false "SINGLE") - GenerateNamedObjects("generic/trsm_uncopy_${SGEMM_UNROLL_N}.c" "OUTER;UNIT" "trsm_ounucopy" false "" "" false "SINGLE") - GenerateNamedObjects("generic/trsm_uncopy_${SGEMM_UNROLL_N}.c" "OUTER" "trsm_ounncopy" false "" "" false "SINGLE") - - GenerateNamedObjects("generic/trsm_lncopy_${SGEMM_UNROLL_M}.c" "LOWER;UNIT" "trsm_ilnucopy" false "" "" false "SINGLE") - GenerateNamedObjects("generic/trsm_lncopy_${SGEMM_UNROLL_M}.c" "LOWER" "trsm_ilnncopy" false "" "" false "SINGLE") - GenerateNamedObjects("generic/trsm_lncopy_${SGEMM_UNROLL_N}.c" "OUTER;LOWER;UNIT" "trsm_olnucopy" false "" "" false "SINGLE") - GenerateNamedObjects("generic/trsm_lncopy_${SGEMM_UNROLL_N}.c" "OUTER;LOWER" "trsm_olnncopy" false "" "" false "SINGLE") - - GenerateNamedObjects("generic/trsm_utcopy_${SGEMM_UNROLL_M}.c" "UNIT" "trsm_iutucopy" false "" "" false "SINGLE") - GenerateNamedObjects("generic/trsm_utcopy_${SGEMM_UNROLL_M}.c" "" "trsm_iutncopy" false "" "" false "SINGLE") - GenerateNamedObjects("generic/trsm_utcopy_${SGEMM_UNROLL_N}.c" "OUTER;UNIT" "trsm_outucopy" false "" "" false "SINGLE") - GenerateNamedObjects("generic/trsm_utcopy_${SGEMM_UNROLL_N}.c" "OUTER" "trsm_outncopy" false "" "" false "SINGLE") - - GenerateNamedObjects("generic/trsm_ltcopy_${SGEMM_UNROLL_M}.c" "LOWER;UNIT" "trsm_iltucopy" false "" "" false "SINGLE") - GenerateNamedObjects("generic/trsm_ltcopy_${SGEMM_UNROLL_M}.c" "LOWER" "trsm_iltncopy" false "" "" false "SINGLE") - GenerateNamedObjects("generic/trsm_ltcopy_${SGEMM_UNROLL_N}.c" "OUTER;LOWER;UNIT" "trsm_oltucopy" false "" "" false "SINGLE") - GenerateNamedObjects("generic/trsm_ltcopy_${SGEMM_UNROLL_N}.c" "OUTER;LOWER" "trsm_oltncopy" false "" "" false "SINGLE") - if (SMALL_MATRIX_OPT) - if (NOT DEFINED SGEMM_SMALL_M_PERMIT) - set(SGEMM_SMALL_M_PERMIT ../generic/gemm_small_matrix_permit.c) - endif () - if (NOT DEFINED SGEMM_SMALL_K_NN) - set(SGEMM_SMALL_K_NN ../generic/gemm_small_matrix_kernel_nn.c) - endif () - if (NOT DEFINED SGEMM_SMALL_K_NT) - set(SGEMM_SMALL_K_NT ../generic/gemm_small_matrix_kernel_nt.c) - endif () - if (NOT DEFINED SGEMM_SMALL_K_TN) - set(SGEMM_SMALL_K_TN ../generic/gemm_small_matrix_kernel_tn.c) - endif () - if (NOT DEFINED SGEMM_SMALL_K_TT) - set(SGEMM_SMALL_K_TT ../generic/gemm_small_matrix_kernel_tt.c) - endif () - if (NOT DEFINED SGEMM_SMALL_K_B0_NN) - set(SGEMM_SMALL_K_B0_NN ../generic/gemm_small_matrix_kernel_nn.c) - endif () - if (NOT DEFINED SGEMM_SMALL_K_B0_NT) - set(SGEMM_SMALL_K_B0_NT ../generic/gemm_small_matrix_kernel_nt.c) - endif () - if (NOT DEFINED SGEMM_SMALL_K_B0_TN) - set(SGEMM_SMALL_K_B0_TN ../generic/gemm_small_matrix_kernel_tn.c) - endif () - if (NOT DEFINED SGEMM_SMALL_K_B0_TT) - set(SGEMM_SMALL_K_B0_TT ../generic/gemm_small_matrix_kernel_tt.c) - endif () - GenerateNamedObjects("${KERNELDIR}/${SGEMM_SMALL_M_PERMIT}" "" "gemm_small_matrix_permit" false "" "" false "SINGLE") - GenerateNamedObjects("${KERNELDIR}/${SGEMM_SMALL_K_NN}" "" "gemm_small_kernel_nn" false "" "" false "SINGLE") - GenerateNamedObjects("${KERNELDIR}/${SGEMM_SMALL_K_NT}" "" "gemm_small_kernel_nt" false "" "" false "SINGLE") - GenerateNamedObjects("${KERNELDIR}/${SGEMM_SMALL_K_TN}" "" "gemm_small_kernel_tn" false "" "" false "SINGLE") - GenerateNamedObjects("${KERNELDIR}/${SGEMM_SMALL_K_TT}" "" "gemm_small_kernel_tt" false "" "" false "SINGLE") - GenerateNamedObjects("${KERNELDIR}/${SGEMM_SMALL_K_B0_NN}" "B0" "gemm_small_kernel_b0_nn" false "" "" false "SINGLE") - GenerateNamedObjects("${KERNELDIR}/${SGEMM_SMALL_K_B0_NT}" "B0" "gemm_small_kernel_b0_nt" false "" "" false "SINGLE") - GenerateNamedObjects("${KERNELDIR}/${SGEMM_SMALL_K_B0_TN}" "B0" "gemm_small_kernel_b0_tn" false "" "" false "SINGLE") - GenerateNamedObjects("${KERNELDIR}/${SGEMM_SMALL_K_B0_TT}" "B0" "gemm_small_kernel_b0_tt" false "" "" false "SINGLE") - endif () - - endif () - - # Makefile.LA - if(NOT NO_LAPACK) - foreach (float_type ${FLOAT_TYPES}) - string(SUBSTRING ${float_type} 0 1 float_char) - if (${float_type} STREQUAL "BFLOAT16") - set (float_char "SB") - endif () - if (NOT DEFINED ${float_char}NEG_TCOPY) - if (${float_char} STREQUAL "Z" OR ${float_char} STREQUAL "C" OR ${float_char} STREQUAL "X") - set(${float_char}NEG_TCOPY ../generic/zneg_tcopy_${${float_char}GEMM_UNROLL_M}.c) - else () - set(${float_char}NEG_TCOPY ../generic/neg_tcopy_${${float_char}GEMM_UNROLL_M}.c) - endif () - endif () - - if (NOT DEFINED ${float_char}LASWP_NCOPY) - if (${float_char} STREQUAL "Z" OR ${float_char} STREQUAL "C" OR ${float_char} STREQUAL "X") - set(${float_char}LASWP_NCOPY ../generic/zlaswp_ncopy_${${float_char}GEMM_UNROLL_N}.c) - else () - set(${float_char}LASWP_NCOPY ../generic/laswp_ncopy_${${float_char}GEMM_UNROLL_N}.c) - endif () - endif () - GenerateNamedObjects("${KERNELDIR}/${${float_char}NEG_TCOPY}" "" "neg_tcopy" false "" "" false ${float_type}) - GenerateNamedObjects("${KERNELDIR}/${${float_char}LASWP_NCOPY}" "" "laswp_ncopy" false "" "" false ${float_type}) - endforeach() - if (BUILD_COMPLEX AND NOT BUILD_SINGLE) - if (NOT DEFINED SNEG_TCOPY) - set(SNEG_TCOPY ../generic/neg_tcopy_${SGEMM_UNROLL_M}.c) - endif () - - if (NOT DEFINED SLASWP_NCOPY) - set(SLASWP_NCOPY ../generic/laswp_ncopy_${SGEMM_UNROLL_N}.c) - endif () - GenerateNamedObjects("${KERNELDIR}/${SNEG_TCOPY}" "" "neg_tcopy" false "" "" false "SINGLE") - GenerateNamedObjects("${KERNELDIR}/${SLASWP_NCOPY}" "" "laswp_ncopy" false "" "" false "SINGLE") - endif() - if (BUILD_COMPLEX16 AND NOT BUILD_DOUBLE) - if (NOT DEFINED DNEG_TCOPY) - set(DNEG_TCOPY ../generic/neg_tcopy_${DGEMM_UNROLL_M}.c) - endif () - - if (NOT DEFINED DLASWP_NCOPY) - set(DLASWP_NCOPY ../generic/laswp_ncopy_${DGEMM_UNROLL_N}.c) - endif () - GenerateNamedObjects("${KERNELDIR}/${DNEG_TCOPY}" "" "neg_tcopy" false "" "" false "DOUBLE") - GenerateNamedObjects("${KERNELDIR}/${DLASWP_NCOPY}" "" "laswp_ncopy" false "" "" false "DOUBLE") - endif() - endif() - - if (${DYNAMIC_ARCH}) - set(SETPARAM_TARGET_DIR ${CMAKE_CURRENT_BINARY_DIR}${CMAKE_FILES_DIRECTORY}) - file(READ ${CMAKE_CURRENT_SOURCE_DIR}/setparam-ref.c SETPARAM_REF_CONTENTS) - string(REPLACE "TS" "${TSUFFIX}" SETPARAM_REF_CONTENTS_NEW "${SETPARAM_REF_CONTENTS}") - file(WRITE ${SETPARAM_TARGET_DIR}/setparam${TSUFFIX}.tmp "${SETPARAM_REF_CONTENTS_NEW}") - configure_file(${SETPARAM_TARGET_DIR}/setparam${TSUFFIX}.tmp ${SETPARAM_TARGET_DIR}/setparam${TSUFFIX}.c COPYONLY) - set(OPENBLAS_SRC ${OPENBLAS_SRC} ${SETPARAM_TARGET_DIR}/setparam${TSUFFIX}.c) - file(REMOVE ${SETPARAM_TARGET_DIR}/setparam${TSUFFIX}.tmp) - - set(KERNEL_TSUFFIX_CONTENTS "") - foreach (KERNEL_INTERFACE_H ${KERNEL_INTERFACE}) - file(READ ${PROJECT_SOURCE_DIR}/${KERNEL_INTERFACE_H} KERNEL_INTERFACE_H_CONTENTS) - string(REGEX REPLACE " *\\(" "${TSUFFIX}(" KERNEL_INTERFACE_H_CONTENTS_NEW "${KERNEL_INTERFACE_H_CONTENTS}") - set(KERNEL_TSUFFIX_CONTENTS "${KERNEL_TSUFFIX_CONTENTS}\n${KERNEL_INTERFACE_H_CONTENTS_NEW}") - endforeach() - file(WRITE ${SETPARAM_TARGET_DIR}/kernel${TSUFFIX}.tmp "${KERNEL_TSUFFIX_CONTENTS}") - configure_file(${SETPARAM_TARGET_DIR}/kernel${TSUFFIX}.tmp ${SETPARAM_TARGET_DIR}/kernel${TSUFFIX}.h COPYONLY) - file(REMOVE ${SETPARAM_TARGET_DIR}/kernel${TSUFFIX}.tmp) - - foreach (float_type ${FLOAT_TYPES}) - # a bit of metaprogramming here to pull out the appropriate KERNEL var - string(SUBSTRING ${float_type} 0 1 float_char) - if (${float_type} STREQUAL "BFLOAT16") - set (float_char "SB") - endif () - GenerateNamedObjects("generic/neg_tcopy_${${float_char}GEMM_UNROLL_M}.c" "" "neg_tcopy" false "" ${TSUFFIX} false ${float_type}) - GenerateNamedObjects("generic/laswp_ncopy_${${float_char}GEMM_UNROLL_N}.c" "" "laswp_ncopy" false "" ${TSUFFIX} false ${float_type}) - endforeach () - - if (BUILD_COMPLEX AND NOT BUILD_SINGLE) - GenerateNamedObjects("${KERNELDIR}/${SGEMVNKERNEL}" "" "gemv_n" false "" "" false "SINGLE") - GenerateNamedObjects("${KERNELDIR}/${SGEMVTKERNEL}" "TRANS" "gemv_t" false "" "" false "SINGLE") - GenerateNamedObjects("generic/neg_tcopy_${SGEMM_UNROLL_M}.c" "" "neg_tcopy" false "" ${TSUFFIX} false "SINGLE") - GenerateNamedObjects("generic/laswp_ncopy_${SGEMM_UNROLL_N}.c" "" "laswp_ncopy" false "" ${TSUFFIX} false "SINGLE") - endif () - if (BUILD_DOUBLE AND NOT BUILD_SINGLE) - GenerateNamedObjects("generic/neg_tcopy_${SGEMM_UNROLL_M}.c" "" "neg_tcopy" false "" ${TSUFFIX} false "SINGLE") - GenerateNamedObjects("generic/laswp_ncopy_${SGEMM_UNROLL_N}.c" "" "laswp_ncopy" false "" ${TSUFFIX} false "SINGLE") - GenerateNamedObjects("${KERNELDIR}/${STRSMKERNEL_LN}" "UPPER;LN;TRSMKERNEL" "trsm_kernel_LN" false "" ${TSUFFIX} false "SINGLE") - GenerateNamedObjects("${KERNELDIR}/${STRSMKERNEL_LT}" "LT;TRSMKERNEL" "trsm_kernel_LT" false "" ${TSUFFIX} false "SINGLE") - GenerateNamedObjects("${KERNELDIR}/${STRSMKERNEL_RN}" "UPPER;RN;TRSMKERNEL" "trsm_kernel_RN" false "" ${TSUFFIX} false "SINGLE") - GenerateNamedObjects("${KERNELDIR}/${STRSMKERNEL_RT}" "RT;TRSMKERNEL" "trsm_kernel_RT" false "" ${TSUFFIX} false "SINGLE") - GenerateNamedObjects("generic/trsm_uncopy_${SGEMM_UNROLL_M}.c" "UNIT" "trsm_iunucopy" false "" ${TSUFFIX} false "SINGLE") - GenerateNamedObjects("generic/trsm_uncopy_${SGEMM_UNROLL_M}.c" "" "trsm_iunncopy" false "" ${TSUFFIX} false "SINGLE") - GenerateNamedObjects("generic/trsm_uncopy_${SGEMM_UNROLL_N}.c" "OUTER;UNIT" "trsm_ounucopy" false "" ${TSUFFIX} false "SINGLE") - GenerateNamedObjects("generic/trsm_uncopy_${SGEMM_UNROLL_N}.c" "OUTER" "trsm_ounncopy" false "" ${TSUFFIX} false "SINGLE") - - GenerateNamedObjects("generic/trsm_lncopy_${SGEMM_UNROLL_M}.c" "LOWER;UNIT" "trsm_ilnucopy" false "" ${TSUFFIX} false "SINGLE") - GenerateNamedObjects("generic/trsm_lncopy_${SGEMM_UNROLL_M}.c" "LOWER" "trsm_ilnncopy" false "" ${TSUFFIX} false "SINGLE") - GenerateNamedObjects("generic/trsm_lncopy_${SGEMM_UNROLL_N}.c" "OUTER;LOWER;UNIT" "trsm_olnucopy" false "" ${TSUFFIX} false "SINGLE") - GenerateNamedObjects("generic/trsm_lncopy_${SGEMM_UNROLL_N}.c" "OUTER;LOWER" "trsm_olnncopy" false "" ${TSUFFIX} false "SINGLE") - - GenerateNamedObjects("generic/trsm_utcopy_${SGEMM_UNROLL_M}.c" "UNIT" "trsm_iutucopy" false "" ${TSUFFIX} false "SINGLE") - GenerateNamedObjects("generic/trsm_utcopy_${SGEMM_UNROLL_M}.c" "" "trsm_iutncopy" false "" ${TSUFFIX} false "SINGLE") - GenerateNamedObjects("generic/trsm_utcopy_${SGEMM_UNROLL_N}.c" "OUTER;UNIT" "trsm_outucopy" false "" ${TSUFFIX} false "SINGLE") - GenerateNamedObjects("generic/trsm_utcopy_${SGEMM_UNROLL_N}.c" "OUTER" "trsm_outncopy" false "" ${TSUFFIX} false "SINGLE") - - GenerateNamedObjects("generic/trsm_ltcopy_${SGEMM_UNROLL_M}.c" "LOWER;UNIT" "trsm_iltucopy" false "" ${TSUFFIX} false "SINGLE") - GenerateNamedObjects("generic/trsm_ltcopy_${SGEMM_UNROLL_M}.c" "LOWER" "trsm_iltncopy" false "" ${TSUFFIX} false "SINGLE") - GenerateNamedObjects("generic/trsm_ltcopy_${SGEMM_UNROLL_N}.c" "OUTER;LOWER;UNIT" "trsm_oltucopy" false "" ${TSUFFIX} false "SINGLE") - GenerateNamedObjects("generic/trsm_ltcopy_${SGEMM_UNROLL_N}.c" "OUTER;LOWER" "trsm_oltncopy" false "" ${TSUFFIX} false "SINGLE") - - if (SGEMMINCOPY) - GenerateNamedObjects("${KERNELDIR}/${SGEMMINCOPY}" "SINGLE" "${SGEMMINCOPYOBJ}" false "" "" true "SINGLE") - endif () - if (SGEMMITCOPY) - GenerateNamedObjects("${KERNELDIR}/${SGEMMITCOPY}" "SINGLE" "${SGEMMITCOPYOBJ}" false "" "" true "SINGLE") - endif () - if (SGEMMONCOPY) - GenerateNamedObjects("${KERNELDIR}/${SGEMMONCOPY}" "SINGLE" "${SGEMMONCOPYOBJ}" false "" "" true "SINGLE") - endif () - if (SGEMMOTCOPY) - GenerateNamedObjects("${KERNELDIR}/${SGEMMOTCOPY}" "SINGLE" "${SGEMMOTCOPYOBJ}" false "" "" true "SINGLE") - endif () - GenerateNamedObjects("${KERNELDIR}/${SGEMVNKERNEL}" "" "gemv_n" false "" "" false "SINGLE") - GenerateNamedObjects("${KERNELDIR}/${SGEMVTKERNEL}" "TRANS" "gemv_t" false "" "" false "SINGLE") - endif () - - if (BUILD_COMPLEX16 AND NOT BUILD_DOUBLE) - GenerateNamedObjects("${KERNELDIR}/${DAMAXKERNEL}" "USE_ABS" "amax_k" false "" "" false "DOUBLE") - GenerateNamedObjects("${KERNELDIR}/${DAMINKERNEL}" "USE_ABS;USE_MIN" "amin_k" false "" "" false "DOUBLE") - GenerateNamedObjects("${KERNELDIR}/${DASUMKERNEL}" "" "asum_k" false "" "" false "DOUBLE") - if (DEFINED DMAXKERNEL) - GenerateNamedObjects("${KERNELDIR}/${DMAXKERNEL}" "" "max_k" false "" "" false "DOUBLE") - endif () - if (DEFINED DMINKERNEL) - GenerateNamedObjects("${KERNELDIR}/${DMINKERNEL}" "USE_MIN" "min_k" false "" "" false "DOUBLE") - endif () - if (DEFINED IDMINKERNEL) - GenerateNamedObjects("${KERNELDIR}/${IDMINKERNEL}" "USE_MIN" "i*min_k" false "" "" false "DOUBLE") - endif () - if (DEFINED IDMAXKERNEL) - GenerateNamedObjects("${KERNELDIR}/${IDMAXKERNEL}" "" "i*max_k" false "" "" false "DOUBLE") - endif () - GenerateNamedObjects("${KERNELDIR}/${IDAMAXKERNEL}" "USE_ABS" "i*amax_k" false "" "" false "DOUBLE") - GenerateNamedObjects("${KERNELDIR}/${IDAMINKERNEL}" "USE_ABS;USE_MIN" "i*amin_k" false "" "" false "DOUBLE") - GenerateNamedObjects("${KERNELDIR}/${DSCALKERNEL}" "" "scal_k" false "" "" false "DOUBLE") - GenerateNamedObjects("${KERNELDIR}/${DCOPYKERNEL}" "C_INTERFACE" "copy_k" false "" "" false "DOUBLE") - GenerateNamedObjects("${KERNELDIR}/${DNRM2KERNEL}" "" "nrm2_k" false "" "" false "DOUBLE") - GenerateNamedObjects("${KERNELDIR}/${DROTKERNEL}" "" "rot_k" false "" "" false "DOUBLE") - GenerateNamedObjects("${KERNELDIR}/${DROTMKERNEL}" "" "rotm_k" false "" "" false "DOUBLE") - GenerateNamedObjects("${KERNELDIR}/${DDOTKERNEL}" "" "dot_k" false "" "" false "DOUBLE") - GenerateNamedObjects("${KERNELDIR}/${DSWAPKERNEL}" "" "swap_k" false "" "" false "DOUBLE") - GenerateNamedObjects("${KERNELDIR}/${DAXPYKERNEL}" "" "axpy_k" false "" "" false "DOUBLE") - - GenerateNamedObjects("${KERNELDIR}/${DGEMVNKERNEL}" "" "gemv_n" false "" "" false "DOUBLE") - GenerateNamedObjects("${KERNELDIR}/${DGEMVTKERNEL}" "TRANS" "gemv_t" false "" "" false "DOUBLE") - GenerateNamedObjects("${KERNELDIR}/${DGEMMKERNEL}" "" "gemm_kernel" false "" "" false "DOUBLE") - if (DGEMMINCOPY) - GenerateNamedObjects("${KERNELDIR}/${DGEMMINCOPY}" "DOUBLE" "${DGEMMINCOPYOBJ}" false "" "" true "DOUBLE") - endif () - if (DGEMMITCOPY) - GenerateNamedObjects("${KERNELDIR}/${DGEMMITCOPY}" "DOUBLE" "${DGEMMITCOPYOBJ}" false "" "" true "DOUBLE") - endif () - if (DGEMMONCOPY) - GenerateNamedObjects("${KERNELDIR}/${DGEMMONCOPY}" "DOUBLE" "${DGEMMONCOPYOBJ}" false "" "" true "DOUBLE") - endif () - if (DGEMMOTCOPY) - GenerateNamedObjects("${KERNELDIR}/${DGEMMOTCOPY}" "DOUBLE" "${DGEMMOTCOPYOBJ}" false "" "" true "DOUBLE") - endif () - GenerateNamedObjects("${KERNELDIR}/${DGEMM_BETA}" "" "gemm_beta" false "" "" false "DOUBLE") - - GenerateNamedObjects("generic/neg_tcopy_${DGEMM_UNROLL_M}.c" "" "neg_tcopy" false "" ${TSUFFIX} false "DOUBLE") - GenerateNamedObjects("generic/laswp_ncopy_${DGEMM_UNROLL_N}.c" "" "laswp_ncopy" false "" ${TSUFFIX} false "DOUBLE") - if (SMALL_MATRIX_OPT) - if (NOT DEFINED DGEMM_SMALL_M_PERMIT) - set(DGEMM_SMALL_M_PERMIT ../generic/gemm_small_matrix_permit.c) - endif () - if (NOT DEFINED DGEMM_SMALL_K_NN) - set(DGEMM_SMALL_K_NN ../generic/gemm_small_matrix_kernel_nn.c) - endif () - if (NOT DEFINED DGEMM_SMALL_K_NT) - set(DGEMM_SMALL_K_NT ../generic/gemm_small_matrix_kernel_nt.c) - endif () - if (NOT DEFINED DGEMM_SMALL_K_TN) - set(DGEMM_SMALL_K_TN ../generic/gemm_small_matrix_kernel_tn.c) - endif () - if (NOT DEFINED DGEMM_SMALL_K_TT) - set(DGEMM_SMALL_K_TT ../generic/gemm_small_matrix_kernel_tt.c) - endif () - if (NOT DEFINED DGEMM_SMALL_K_B0_NN) - set(DGEMM_SMALL_K_B0_NN ../generic/gemm_small_matrix_kernel_nn.c) - endif () - if (NOT DEFINED DGEMM_SMALL_K_B0_NT) - set(DGEMM_SMALL_K_B0_NT ../generic/gemm_small_matrix_kernel_nt.c) - endif () - if (NOT DEFINED DGEMM_SMALL_K_B0_TN) - set(DGEMM_SMALL_K_B0_TN ../generic/gemm_small_matrix_kernel_tn.c) - endif () - if (NOT DEFINED DGEMM_SMALL_K_B0_TT) - set(DGEMM_SMALL_K_B0_TT ../generic/gemm_small_matrix_kernel_tt.c) - endif () - GenerateNamedObjects("${KERNELDIR}/${DGEMM_SMALL_M_PERMIT}" "" "gemm_small_matrix_permit" false "" "" false "DOUBLE") - GenerateNamedObjects("${KERNELDIR}/${DGEMM_SMALL_K_NN}" "NN" "gemm_small_kernel_nn" false "" "" false "DOUBLE") - GenerateNamedObjects("${KERNELDIR}/${DGEMM_SMALL_K_NN}" "NR" "gemm_small_kernel_nr" false "" "" false "DOUBLE") - GenerateNamedObjects("${KERNELDIR}/${DGEMM_SMALL_K_NN}" "RN" "gemm_small_kernel_rn" false "" "" false "DOUBLE") - GenerateNamedObjects("${KERNELDIR}/${DGEMM_SMALL_K_NN}" "RR" "gemm_small_kernel_rr" false "" "" false "DOUBLE") - GenerateNamedObjects("${KERNELDIR}/${DGEMM_SMALL_K_NT}" "NT" "gemm_small_kernel_nt" false "" "" false "DOUBLE") - GenerateNamedObjects("${KERNELDIR}/${DGEMM_SMALL_K_NT}" "NC" "gemm_small_kernel_nc" false "" "" false "DOUBLE") - GenerateNamedObjects("${KERNELDIR}/${DGEMM_SMALL_K_NT}" "RT" "gemm_small_kernel_rt" false "" "" false "DOUBLE") - GenerateNamedObjects("${KERNELDIR}/${DGEMM_SMALL_K_NT}" "RC" "gemm_small_kernel_rc" false "" "" false "DOUBLE") - GenerateNamedObjects("${KERNELDIR}/${DGEMM_SMALL_K_TN}" "TN" "gemm_small_kernel_tn" false "" "" false "DOUBLE") - GenerateNamedObjects("${KERNELDIR}/${DGEMM_SMALL_K_TN}" "TR" "gemm_small_kernel_tr" false "" "" false "DOUBLE") - GenerateNamedObjects("${KERNELDIR}/${DGEMM_SMALL_K_TN}" "CN" "gemm_small_kernel_cn" false "" "" false "DOUBLE") - GenerateNamedObjects("${KERNELDIR}/${DGEMM_SMALL_K_TN}" "CR" "gemm_small_kernel_cr" false "" "" false "DOUBLE") - GenerateNamedObjects("${KERNELDIR}/${DGEMM_SMALL_K_TT}" "TT" "gemm_small_kernel_tt" false "" "" false "DOUBLE") - GenerateNamedObjects("${KERNELDIR}/${DGEMM_SMALL_K_TT}" "TC" "gemm_small_kernel_tc" false "" "" false "DOUBLE") - GenerateNamedObjects("${KERNELDIR}/${DGEMM_SMALL_K_TT}" "CT" "gemm_small_kernel_ct" false "" "" false "DOUBLE") - GenerateNamedObjects("${KERNELDIR}/${DGEMM_SMALL_K_TT}" "CC" "gemm_small_kernel_cc" false "" "" false "DOUBLE") - GenerateNamedObjects("${KERNELDIR}/${DGEMM_SMALL_K_B0_NN}" "NN;B0" "gemm_small_kernel_b0_nn" false "" "" false "DOUBLE") - GenerateNamedObjects("${KERNELDIR}/${DGEMM_SMALL_K_B0_NN}" "NR;B0" "gemm_small_kernel_b0_nr" false "" "" false "DOUBLE") - GenerateNamedObjects("${KERNELDIR}/${DGEMM_SMALL_K_B0_NN}" "RN;B0" "gemm_small_kernel_b0_rn" false "" "" false "DOUBLE") - GenerateNamedObjects("${KERNELDIR}/${DGEMM_SMALL_K_B0_NN}" "RR;B0" "gemm_small_kernel_b0_rr" false "" "" false "DOUBLE") - GenerateNamedObjects("${KERNELDIR}/${DGEMM_SMALL_K_B0_NT}" "NT;B0" "gemm_small_kernel_b0_nt" false "" "" false "DOUBLE") - GenerateNamedObjects("${KERNELDIR}/${DGEMM_SMALL_K_B0_NT}" "NC;B0" "gemm_small_kernel_b0_nc" false "" "" false "DOUBLE") - GenerateNamedObjects("${KERNELDIR}/${DGEMM_SMALL_K_B0_NT}" "RT;B0" "gemm_small_kernel_b0_rt" false "" "" false "DOUBLE") - GenerateNamedObjects("${KERNELDIR}/${DGEMM_SMALL_K_B0_NT}" "RC;B0" "gemm_small_kernel_b0_rc" false "" "" false "DOUBLE") - GenerateNamedObjects("${KERNELDIR}/${DGEMM_SMALL_K_B0_TN}" "TN;B0" "gemm_small_kernel_b0_tn" false "" "" false "DOUBLE") - GenerateNamedObjects("${KERNELDIR}/${DGEMM_SMALL_K_B0_TN}" "TR;B0" "gemm_small_kernel_b0_tr" false "" "" false "DOUBLE") - GenerateNamedObjects("${KERNELDIR}/${DGEMM_SMALL_K_B0_TN}" "CN;B0" "gemm_small_kernel_b0_cn" false "" "" false "DOUBLE") - GenerateNamedObjects("${KERNELDIR}/${DGEMM_SMALL_K_B0_TN}" "CR;B0" "gemm_small_kernel_b0_cr" false "" "" false "DOUBLE") - GenerateNamedObjects("${KERNELDIR}/${DGEMM_SMALL_K_B0_TT}" "TT;B0" "gemm_small_kernel_b0_tt" false "" "" false "DOUBLE") - GenerateNamedObjects("${KERNELDIR}/${DGEMM_SMALL_K_B0_TT}" "TC;B0" "gemm_small_kernel_b0_tc" false "" "" false "DOUBLE") - GenerateNamedObjects("${KERNELDIR}/${DGEMM_SMALL_K_B0_TT}" "CT;B0" "gemm_small_kernel_b0_ct" false "" "" false "DOUBLE") - GenerateNamedObjects("${KERNELDIR}/${DGEMM_SMALL_K_B0_TT}" "CC;B0" "gemm_small_kernel_b0_cc" false "" "" false "DOUBLE") - endif () - endif () - if (BUILD_COMPLEX16 AND NOT BUILD_SINGLE) - GenerateNamedObjects("${KERNELDIR}/${SSCALKERNEL}" "" "scal_k" false "" "" false "SINGLE") - endif() - if (BUILD_COMPLEX160 AND NOT BUILD_COMPLEX) - GenerateNamedObjects("${KERNELDIR}/${CAMAXKERNEL}" "USE_ABS" "amax_k" false "" "" false "COMPLEX") - GenerateNamedObjects("${KERNELDIR}/${CAMINKERNEL}" "USE_ABS;USE_MIN" "amin_k" false "" "" false "COMPLEX") - if (DEFINED CMAXKERNEL) - GenerateNamedObjects("${KERNELDIR}/${CMAXKERNEL}" "" "max_k" false "" "" false "COMPLEX") - endif () - if (DEFINED CMINKERNEL) - GenerateNamedObjects("${KERNELDIR}/${CMINKERNEL}" "USE_MIN" "min_k" false "" "" false "COMPLEX") - endif () - GenerateNamedObjects("${KERNELDIR}/${ICAMAXKERNEL}" "USE_ABS" "i*amax_k" false "" "" false "COMPLEX") - GenerateNamedObjects("${KERNELDIR}/${ICAMINKERNEL}" "USE_ABS;USE_MIN" "i*amin_k" false "" "" false "COMPLEX") - if (DEFINED ICMAXKERNEL) - GenerateNamedObjects("${KERNELDIR}/${ICMAXKERNEL}" "" "i*max_k" false "" "" false "COMPLEX") - endif () - if (DEFINED ICMINKERNEL) - GenerateNamedObjects("${KERNELDIR}/${ICMINKERNEL}" "USE_MIN" "i*min_k" false "" "" false "COMPLEX") - endif () - GenerateNamedObjects("${KERNELDIR}/${CASUMKERNEL}" "" "asum_k" false "" "" false "COMPLEX") - GenerateNamedObjects("${KERNELDIR}/${CAXPYKERNEL}" "" "axpy_k" false "" "" false "COMPLEX") - GenerateNamedObjects("${KERNELDIR}/${CCOPYKERNEL}" "C_INTERFACE" "copy_k" false "" "" false "COMPLEX") - GenerateNamedObjects("${KERNELDIR}/${CNRM2KERNEL}" "" "nrm2_k" false "" "" false "COMPLEX") - GenerateNamedObjects("${KERNELDIR}/${CROTKERNEL}" "" "rot_k" false "" "" false "COMPLEX") - GenerateNamedObjects("${KERNELDIR}/${CSCALKERNEL}" "" "scal_k" false "" "" false "COMPLEX") - GenerateNamedObjects("${KERNELDIR}/${CSWAPKERNEL}" "" "swap_k" false "" "" false "COMPLEX") - GenerateNamedObjects("${KERNELDIR}/${CAXPBYKERNEL}" "" "axpby_k" false "" "" false "COMPLEX") - GenerateNamedObjects("${KERNELDIR}/${CSUMKERNEL}" "" "sum_k" false "" "" false "COMPLEX") - GenerateNamedObjects("${KERNELDIR}/${CAXPYKERNEL}" "CONJ" "axpyc_k" false "" "" false "COMPLEX") - GenerateNamedObjects("${KERNELDIR}/${CDOTKERNEL}" "" "dotu_k" false "" "" false "COMPLEX") - GenerateNamedObjects("${KERNELDIR}/${CDOTKERNEL}" "CONJ" "dotc_k" false "" "" false "COMPLEX") - GenerateNamedObjects("${KERNELDIR}/${CGEMVNKERNEL}" "" "gemv_n" false "" "" false "COMPLEX") - GenerateNamedObjects("${KERNELDIR}/${CGEMVTKERNEL}" "TRANSA" "gemv_t" false "" "" false "COMPLEX") - GenerateNamedObjects("${KERNELDIR}/${CGEMVNKERNEL}" "CONJ" "gemv_r" false "" "" false "COMPLEX") - GenerateNamedObjects("${KERNELDIR}/${CGEMVTKERNEL}" "CONJ;TRANSA" "gemv_c" false "" "" false "COMPLEX") - GenerateNamedObjects("${KERNELDIR}/${CGEMVNKERNEL}" "XCONJ" "gemv_o" false "" "" false "COMPLEX") - GenerateNamedObjects("${KERNELDIR}/${CGEMVTKERNEL}" "XCONJ;TRANSA" "gemv_u" false "" "" false "COMPLEX") - GenerateNamedObjects("${KERNELDIR}/${CGEMVNKERNEL}" "XCONJ;CONJ" "gemv_s" false "" "" false "COMPLEX") - GenerateNamedObjects("${KERNELDIR}/${CGEMVTKERNEL}" "XCONJ;CONJ;TRANSA" "gemv_d" false "" "" false "COMPLEX") - GenerateNamedObjects("${KERNELDIR}/${CTRSMKERNEL_LN}" "UPPER;LN;TRSMKERNEL;CONJ" "trsm_kernel_LR" false "" "" false "COMPLEX") - GenerateNamedObjects("${KERNELDIR}/${CTRSMKERNEL_LT}" "LT;TRSMKERNEL;CONJ" "trsm_kernel_LC" false "" "" false "COMPLEX") - GenerateNamedObjects("${KERNELDIR}/${CTRSMKERNEL_RN}" "UPPER;RN;TRSMKERNEL;CONJ" "trsm_kernel_RR" false "" "" false "COMPLEX") - GenerateNamedObjects("${KERNELDIR}/${CTRSMKERNEL_RT}" "RT;TRSMKERNEL;CONJ" "trsm_kernel_RC" false "" "" false "COMPLEX") - GenerateNamedObjects("${KERNELDIR}/${CTRSMKERNEL_LN}" "UPPER;LN;TRSMKERNEL" "trsm_kernel_LN" false "" "" false "COMPLEX") - GenerateNamedObjects("${KERNELDIR}/${CTRSMKERNEL_LT}" "LT;TRSMKERNEL" "trsm_kernel_LT" false "" "" false "COMPLEX") - GenerateNamedObjects("${KERNELDIR}/${CTRSMKERNEL_RN}" "UPPER;RN;TRSMKERNEL" "trsm_kernel_RN" false "" "" false "COMPLEX") - GenerateNamedObjects("${KERNELDIR}/${CTRSMKERNEL_RT}" "RT;TRSMKERNEL" "trsm_kernel_RT" false "" "" false "COMPLEX") - GenerateNamedObjects("${KERNELDIR}/${CGEMMKERNEL}" "NN" "gemm_kernel_n" false "" "" false "COMPLEX") - GenerateNamedObjects("${KERNELDIR}/${CGEMMKERNEL}" "CN" "gemm_kernel_l" false "" "" false "COMPLEX") - GenerateNamedObjects("${KERNELDIR}/${CGEMMKERNEL}" "NC" "gemm_kernel_r" false "" "" false "COMPLEX") - GenerateNamedObjects("${KERNELDIR}/${CGEMMKERNEL}" "CC" "gemm_kernel_b" false "" "" false "COMPLEX") - if (CGEMMINCOPY) - GenerateNamedObjects("${KERNELDIR}/${CGEMMINCOPY}" "COMPLEX" "${CGEMMINCOPYOBJ}" false "" "" true "COMPLEX") - endif () - - if (CGEMMITCOPY) - GenerateNamedObjects("${KERNELDIR}/${CGEMMITCOPY}" "COMPLEX" "${CGEMMITCOPYOBJ}" false "" "" true "COMPLEX") - endif () - - if (CGEMMONCOPY) - GenerateNamedObjects("${KERNELDIR}/${CGEMMONCOPY}" "COMPLEX" "${CGEMMONCOPYOBJ}" false "" "" true "COMPLEX") - endif () - - if (CGEMMOTCOPY) - GenerateNamedObjects("${KERNELDIR}/${CGEMMOTCOPY}" "COMPLEX" "${CGEMMOTCOPYOBJ}" false "" "" true "COMPLEX") - endif () - GenerateNamedObjects("${KERNELDIR}/${CGEMM_BETA}" "" "gemm_beta" false "" "" false "COMPLEX") - if (SMALL_MATRIX_OPT) - if (NOT DEFINED CGEMM_SMALL_M_PERMIT) - set(CGEMM_SMALL_M_PERMIT ../generic/zgemm_small_matrix_permit) - endif () - if (NOT DEFINED CGEMM_SMALL_K_NN) - set(CGEMM_SMALL_K_NN ../generic/zgemm_small_matrix_kernel_nn) - endif () - if (NOT DEFINED CGEMM_SMALL_K_NT) - set(CGEMM_SMALL_K_NT ../generic/zgemm_small_matrix_kernel_nt) - endif () - if (NOT DEFINED CGEMM_SMALL_K_TN) - set(CGEMM_SMALL_K_TN ../generic/zgemm_small_matrix_kernel_tn) - endif () - if (NOT DEFINED CGEMM_SMALL_K_TT) - set(CGEMM_SMALL_K_TT ../generic/zgemm_small_matrix_kernel_tt) - endif () - if (NOT DEFINED CGEMM_SMALL_K_B0_NN) - set(CGEMM_SMALL_K_B0_NN ../generic/zgemm_small_matrix_kernel_nn) - endif () - if (NOT DEFINED CGEMM_SMALL_K_B0_NT) - set(CGEMM_SMALL_K_B0_NT ../generic/zgemm_small_matrix_kernel_nt) - endif () - if (NOT DEFINED CGEMM_SMALL_K_B0_TN) - set(CGEMM_SMALL_K_B0_TN ../generic/zgemm_small_matrix_kernel_tn) - endif () - if (NOT DEFINED CGEMM_SMALL_K_B0_TT) - set(CGEMM_SMALL_K_B0_TT ../generic/zgemm_small_matrix_kernel_tt) - endif () - GenerateNamedObjects("${KERNELDIR}/${CGEMM_SMALL_M_PERMIT}.c" "" "gemm_small_matrix_permit" false "" "" false "COMPLEX") - GenerateNamedObjects("${KERNELDIR}/${CGEMM_SMALL_K_NN}.c" "NN" "gemm_small_kernel_nn" false "" "" false "COMPLEX") - GenerateNamedObjects("${KERNELDIR}/${CGEMM_SMALL_K_NN}.c" "NR" "gemm_small_kernel_nr" false "" "" false "COMPLEX") - GenerateNamedObjects("${KERNELDIR}/${CGEMM_SMALL_K_NN}.c" "RN" "gemm_small_kernel_rn" false "" "" false "COMPLEX") - GenerateNamedObjects("${KERNELDIR}/${CGEMM_SMALL_K_NN}.c" "RR" "gemm_small_kernel_rr" false "" "" false "COMPLEX") - GenerateNamedObjects("${KERNELDIR}/${CGEMM_SMALL_K_NT}.c" "NT" "gemm_small_kernel_nt" false "" "" false "COMPLEX") - GenerateNamedObjects("${KERNELDIR}/${CGEMM_SMALL_K_NT}.c" "NC" "gemm_small_kernel_nc" false "" "" false "COMPLEX") - GenerateNamedObjects("${KERNELDIR}/${CGEMM_SMALL_K_NT}.c" "RT" "gemm_small_kernel_rt" false "" "" false "COMPLEX") - GenerateNamedObjects("${KERNELDIR}/${CGEMM_SMALL_K_NT}.c" "RC" "gemm_small_kernel_rc" false "" "" false "COMPLEX") - GenerateNamedObjects("${KERNELDIR}/${CGEMM_SMALL_K_TN}.c" "TN" "gemm_small_kernel_tn" false "" "" false "COMPLEX") - GenerateNamedObjects("${KERNELDIR}/${CGEMM_SMALL_K_TN}.c" "TR" "gemm_small_kernel_tr" false "" "" false "COMPLEX") - GenerateNamedObjects("${KERNELDIR}/${CGEMM_SMALL_K_TN}.c" "CN" "gemm_small_kernel_cn" false "" "" false "COMPLEX") - GenerateNamedObjects("${KERNELDIR}/${CGEMM_SMALL_K_TN}.c" "CR" "gemm_small_kernel_cr" false "" "" false "COMPLEX") - GenerateNamedObjects("${KERNELDIR}/${CGEMM_SMALL_K_TT}.c" "TT" "gemm_small_kernel_tt" false "" "" false "COMPLEX") - GenerateNamedObjects("${KERNELDIR}/${CGEMM_SMALL_K_TT}.c" "TC" "gemm_small_kernel_tc" false "" "" false "COMPLEX") - GenerateNamedObjects("${KERNELDIR}/${CGEMM_SMALL_K_TT}.c" "CT" "gemm_small_kernel_ct" false "" "" false "COMPLEX") - GenerateNamedObjects("${KERNELDIR}/${CGEMM_SMALL_K_TT}.c" "CC" "gemm_small_kernel_cc" false "" "" false "COMPLEX") - GenerateNamedObjects("${KERNELDIR}/${CGEMM_SMALL_K_B0_NN}.c" "NN;B0" "gemm_small_kernel_b0_nn" false "" "" false "COMPLEX") - GenerateNamedObjects("${KERNELDIR}/${CGEMM_SMALL_K_B0_NN}.c" "NR;B0" "gemm_small_kernel_b0_nr" false "" "" false "COMPLEX") - GenerateNamedObjects("${KERNELDIR}/${CGEMM_SMALL_K_B0_NN}.c" "RN;B0" "gemm_small_kernel_b0_rn" false "" "" false "COMPLEX") - GenerateNamedObjects("${KERNELDIR}/${CGEMM_SMALL_K_B0_NN}.c" "RR;B0" "gemm_small_kernel_b0_rr" false "" "" false "COMPLEX") - GenerateNamedObjects("${KERNELDIR}/${CGEMM_SMALL_K_B0_NT}.c" "NT;B0" "gemm_small_kernel_b0_nt" false "" "" false "COMPLEX") - GenerateNamedObjects("${KERNELDIR}/${CGEMM_SMALL_K_B0_NT}.c" "NC;B0" "gemm_small_kernel_b0_nc" false "" "" false "COMPLEX") - GenerateNamedObjects("${KERNELDIR}/${CGEMM_SMALL_K_B0_NT}.c" "RT;B0" "gemm_small_kernel_b0_rt" false "" "" false "COMPLEX") - GenerateNamedObjects("${KERNELDIR}/${CGEMM_SMALL_K_B0_NT}.c" "RC;B0" "gemm_small_kernel_b0_rc" false "" "" false "COMPLEX") - GenerateNamedObjects("${KERNELDIR}/${CGEMM_SMALL_K_B0_TN}.c" "TN;B0" "gemm_small_kernel_b0_tn" false "" "" false "COMPLEX") - GenerateNamedObjects("${KERNELDIR}/${CGEMM_SMALL_K_B0_TN}.c" "TR;B0" "gemm_small_kernel_b0_tr" false "" "" false "COMPLEX") - GenerateNamedObjects("${KERNELDIR}/${CGEMM_SMALL_K_B0_TN}.c" "CN;B0" "gemm_small_kernel_b0_cn" false "" "" false "COMPLEX") - GenerateNamedObjects("${KERNELDIR}/${CGEMM_SMALL_K_B0_TN}.c" "CR;B0" "gemm_small_kernel_b0_cr" false "" "" false "COMPLEX") - GenerateNamedObjects("${KERNELDIR}/${CGEMM_SMALL_K_B0_TT}.c" "TT;B0" "gemm_small_kernel_b0_tt" false "" "" false "COMPLEX") - GenerateNamedObjects("${KERNELDIR}/${CGEMM_SMALL_K_B0_TT}.c" "TC;B0" "gemm_small_kernel_b0_tc" false "" "" false "COMPLEX") - GenerateNamedObjects("${KERNELDIR}/${CGEMM_SMALL_K_B0_TT}.c" "CT;B0" "gemm_small_kernel_b0_ct" false "" "" false "COMPLEX") - GenerateNamedObjects("${KERNELDIR}/${CGEMM_SMALL_K_B0_TT}.c" "CC;B0" "gemm_small_kernel_b0_cc" false "" "" false "COMPLEX") - endif () - GenerateNamedObjects("generic/ztrsm_uncopy_${CGEMM_UNROLL_M}.c" "UNIT" "trsm_iunucopy" false "" ${TSUFFIX} false "COMPLEX") - GenerateNamedObjects("generic/ztrsm_uncopy_${CGEMM_UNROLL_M}.c" "" "trsm_iunncopy" false "" ${TSUFFIX} false "COMPLEX") - GenerateNamedObjects("generic/ztrsm_uncopy_${CGEMM_UNROLL_N}.c" "OUTER;UNIT" "trsm_ounucopy" false "" ${TSUFFIX} false "COMPLEX") - GenerateNamedObjects("generic/ztrsm_uncopy_${CGEMM_UNROLL_N}.c" "OUTER" "trsm_ounncopy" false "" ${TSUFFIX} false "COMPLEX") - - GenerateNamedObjects("generic/ztrsm_lncopy_${CGEMM_UNROLL_M}.c" "LOWER;UNIT" "trsm_ilnucopy" false "" ${TSUFFIX} false "COMPLEX") - GenerateNamedObjects("generic/ztrsm_lncopy_${CGEMM_UNROLL_M}.c" "LOWER" "trsm_ilnncopy" false "" ${TSUFFIX} false "COMPLEX") - GenerateNamedObjects("generic/ztrsm_lncopy_${CGEMM_UNROLL_N}.c" "OUTER;LOWER;UNIT" "trsm_olnucopy" false "" ${TSUFFIX} false "COMPLEX") - GenerateNamedObjects("generic/ztrsm_lncopy_${CGEMM_UNROLL_N}.c" "OUTER;LOWER" "trsm_olnncopy" false "" ${TSUFFIX} false "COMPLEX") - - GenerateNamedObjects("generic/ztrsm_utcopy_${CGEMM_UNROLL_M}.c" "UNIT" "trsm_iutucopy" false "" ${TSUFFIX} false "COMPLEX") - GenerateNamedObjects("generic/ztrsm_utcopy_${CGEMM_UNROLL_M}.c" "" "trsm_iutncopy" false "" ${TSUFFIX} false "COMPLEX") - GenerateNamedObjects("generic/ztrsm_utcopy_${CGEMM_UNROLL_N}.c" "OUTER;UNIT" "trsm_outucopy" false "" ${TSUFFIX} false "COMPLEX") - GenerateNamedObjects("generic/ztrsm_utcopy_${CGEMM_UNROLL_N}.c" "OUTER" "trsm_outncopy" false "" ${TSUFFIX} false "COMPLEX") - - GenerateNamedObjects("generic/ztrsm_ltcopy_${CGEMM_UNROLL_M}.c" "LOWER;UNIT" "trsm_iltucopy" false "" ${TSUFFIX} false "COMPLEX") - GenerateNamedObjects("generic/ztrsm_ltcopy_${CGEMM_UNROLL_M}.c" "LOWER" "trsm_iltncopy" false "" ${TSUFFIX} false "COMPLEX") - GenerateNamedObjects("generic/ztrsm_ltcopy_${CGEMM_UNROLL_N}.c" "OUTER;LOWER;UNIT" "trsm_oltucopy" false "" ${TSUFFIX} false "COMPLEX") - GenerateNamedObjects("generic/ztrsm_ltcopy_${CGEMM_UNROLL_N}.c" "OUTER;LOWER" "trsm_oltncopy" false "" ${TSUFFIX} false "COMPLEX") - GenerateNamedObjects("generic/neg_tcopy_${DGEMM_UNROLL_M}.c" "" "neg_tcopy" false "" ${TSUFFIX} false "COMPLEX") - GenerateNamedObjects("generic/laswp_ncopy_${DGEMM_UNROLL_N}.c" "" "laswp_ncopy" false "" ${TSUFFIX} false "COMPLEX") - endif () - endif () - - add_library(kernel${TSUFFIX} OBJECT ${OPENBLAS_SRC}) - set_target_properties(kernel${TSUFFIX} PROPERTIES COMPILE_FLAGS "${KERNEL_DEFINITIONS}") - get_target_property(KERNEL_INCLUDE_DIRECTORIES kernel${TSUFFIX} INCLUDE_DIRECTORIES) - set_target_properties(kernel${TSUFFIX} PROPERTIES INCLUDE_DIRECTORIES "${KERNEL_INCLUDE_DIRECTORIES};${TARGET_CONF_DIR}") - if (USE_GEMM3M) - target_compile_definitions(kernel${TSUFFIX} PRIVATE USE_GEMM3M) - endif() - if (USE_OPENMP) - target_link_libraries(kernel${TSUFFIX} OpenMP::OpenMP_C) - endif() -endfunction () - - -set(ADD_COMMONOBJS 1) -if (${DYNAMIC_ARCH}) - foreach(TARGET_CORE ${DYNAMIC_CORE}) - set(BUILD_KERNEL 1) - set(KDIR "") - set(TSUFFIX "_${TARGET_CORE}") - set(KERNEL_DEFINITIONS "-DBUILD_KERNEL -DTABLE_NAME=gotoblas_${TARGET_CORE} -DTS=${TSUFFIX}") - build_core("${TARGET_CORE}" "${KDIR}" "${TSUFFIX}" "${KERNEL_DEFINITIONS}") - set(ADD_COMMONOBJS 0) - endforeach() -else () - set(TARGET_CONF_DIR ${PROJECT_BINARY_DIR}) - set(TARGET_CORE ${CORE}) - set(KDIR "") - set(TSUFFIX "") - set(KERNEL_DEFINITIONS "") - build_core("${TARGET_CORE}" "${KDIR}" "${TSUFFIX}" "${KERNEL_DEFINITIONS}") -endif () - From 107c883c8a434ad5f24606876c5f5885ad8db113 Mon Sep 17 00:00:00 2001 From: Martin Kroeker Date: Tue, 19 Aug 2025 05:13:28 -0700 Subject: [PATCH 27/35] Update SME-related kernels --- kernel/CMakeLists.txt | 8 +++++--- 1 file changed, 5 insertions(+), 3 deletions(-) diff --git a/kernel/CMakeLists.txt b/kernel/CMakeLists.txt index a2e349d32d..b4ea62f0d5 100644 --- a/kernel/CMakeLists.txt +++ b/kernel/CMakeLists.txt @@ -241,7 +241,7 @@ function (build_core TARGET_CORE KDIR TSUFFIX KERNEL_DEFINITIONS) if (X86_64 OR ARM64) set(USE_DIRECT_SGEMM true) endif() - if (UC_TARGET_CORE MATCHES ARMV9SME) + if (UC_TARGET_CORE MATCHES ARMV9SME OR UC_TARGET_CORE MATCHES VORTEXM4) set (HAVE_SME true) endif () @@ -254,14 +254,16 @@ function (build_core TARGET_CORE KDIR TSUFFIX KERNEL_DEFINITIONS) GenerateNamedObjects("${KERNELDIR}/${SGEMMDIRECTKERNEL}" "" "gemm_direct" false "" "" false SINGLE) GenerateNamedObjects("${KERNELDIR}/${SGEMMDIRECTPERFORMANT}" "" "gemm_direct_performant" false "" "" false SINGLE) elseif (ARM64) + set (SGEMMDIRECTPERFORMANT sgemm_direct_performant.c) set (SGEMMDIRECTKERNEL sgemm_direct_arm64_sme1.c) set (SGEMMDIRECTKERNEL_ALPHA_BETA sgemm_direct_alpha_beta_arm64_sme1.c) - set (SGEMMDIRECTSMEKERNEL sgemm_direct_sme1.S) + set (SGEMMDIRECTSMEKERNEL sgemm_direct_sme1_2VLx2VL.S) set (SGEMMDIRECTPREKERNEL sgemm_direct_sme1_preprocess.S) + GenerateNamedObjects("${KERNELDIR}/${SGEMMDIRECTPERFORMANT}" "" "gemm_direct_performant" false "" "" false SINGLE) GenerateNamedObjects("${KERNELDIR}/${SGEMMDIRECTKERNEL}" "" "gemm_direct" false "" "" false SINGLE) GenerateNamedObjects("${KERNELDIR}/${SGEMMDIRECTKERNEL_ALPHA_BETA}" "" "gemm_direct_alpha_beta" false "" "" false SINGLE) if (HAVE_SME) - GenerateNamedObjects("${KERNELDIR}/${SGEMMDIRECTSMEKERNEL}" "" "gemm_direct_sme1" false "" "" false SINGLE) + GenerateNamedObjects("${KERNELDIR}/${SGEMMDIRECTSMEKERNEL}" "" "gemm_direct_sme1_2VLx2VL" false "" "" false SINGLE) GenerateNamedObjects("${KERNELDIR}/${SGEMMDIRECTPREKERNEL}" "" "gemm_direct_sme1_preprocess" false "" "" false SINGLE) endif () endif () From 501728a354fc70d47ef4f69b5f181b8a38b325c9 Mon Sep 17 00:00:00 2001 From: Martin Kroeker Date: Wed, 20 Aug 2025 06:24:38 -0700 Subject: [PATCH 28/35] adjust register 20 accesses to 21 after moving x18 --- kernel/arm64/sgemm_direct_sme1_2VLx2VL.S | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/kernel/arm64/sgemm_direct_sme1_2VLx2VL.S b/kernel/arm64/sgemm_direct_sme1_2VLx2VL.S index ebbd0cadd9..afb662c1fb 100644 --- a/kernel/arm64/sgemm_direct_sme1_2VLx2VL.S +++ b/kernel/arm64/sgemm_direct_sme1_2VLx2VL.S @@ -62,7 +62,7 @@ add C2, N, C4 //N + SVLs add C3, C5, C4 //K*SVLs + SVLs whilelt p2.s, M_cntr, M //Tile 0,1 predicate (M dimension) - sub w20, w20, #2 //SVLs-2 + sub w21, w21, #2 //SVLs-2 .M_Loop: incw M_cntr @@ -199,7 +199,7 @@ process_K_less_than_equal_2: st1w {za1h.s[w13, #0]}, p5, [Cptr1] st1w {za2h.s[w13, #0]}, p6, [Cptr0, C6, lsl #2] st1w {za3h.s[w13, #0]}, p7, [Cptr1, C6, lsl #2] - cmp w13, w20 + cmp w13, w21 b.mi .Loop_store_ZA psel p4, p0, p2.s[w13, 1] psel p5, p1, p2.s[w13, 1] From edaa73fd2423ca332c41fdf16966da8ddb1c5ca3 Mon Sep 17 00:00:00 2001 From: Martin Kroeker Date: Wed, 20 Aug 2025 06:33:28 -0700 Subject: [PATCH 29/35] Hide the local 2VLx2VL symbol as static is insufficient for this with gcc --- kernel/arm64/sgemm_direct_alpha_beta_arm64_sme1.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/kernel/arm64/sgemm_direct_alpha_beta_arm64_sme1.c b/kernel/arm64/sgemm_direct_alpha_beta_arm64_sme1.c index 7a46e40f3a..9fb7e0d43f 100644 --- a/kernel/arm64/sgemm_direct_alpha_beta_arm64_sme1.c +++ b/kernel/arm64/sgemm_direct_alpha_beta_arm64_sme1.c @@ -111,6 +111,7 @@ return; } __arm_new("za") __arm_locally_streaming +__attribute__((visibility("hidden"))) static void sgemm_direct_alpha_beta_sme1_2VLx2VL(uint64_t m, uint64_t k, uint64_t n, const float* alpha,\ const float *ba, const float *restrict bb, const float* beta,\ float *restrict C) { @@ -209,3 +210,4 @@ void CNAME (BLASLONG M, BLASLONG N, BLASLONG K, float alpha, float * __restrict float beta, float * __restrict R, BLASLONG strideR){fprintf(stderr,"empty sgemm_direct_alpha_beta should not be called!!!\n");} #endif + From 1ee8879c787c19b6a6c092def2016e76e93ffefd Mon Sep 17 00:00:00 2001 From: Martin Kroeker Date: Wed, 20 Aug 2025 09:59:32 -0700 Subject: [PATCH 30/35] Add VORTEXM4 --- getarch.c | 14 ++++++++++++++ 1 file changed, 14 insertions(+) diff --git a/getarch.c b/getarch.c index 417a3d08ca..72097ada97 100644 --- a/getarch.c +++ b/getarch.c @@ -1654,6 +1654,20 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. #define CORENAME "VORTEX" #endif +#ifdef FORCE_VORTEXM4 +#define FORCE +#define ARCHITECTURE "ARM64" +#define SUBARCHITECTURE "VORTEXM4" +#define SUBDIRNAME "arm64" +#define ARCHCONFIG "-DVORTEXM4 " \ + "-DL1_DATA_SIZE=32768 -DL1_DATA_LINESIZE=64 " \ + "-DL2_SIZE=262144 -DL2_LINESIZE=64 " \ + "-DDTB_DEFAULT_ENTRIES=64 -DDTB_SIZE=4096 -DL2_ASSOCIATIVE=32 " \ + "-DHAVE_VFPV4 -DHAVE_VFPV3 -DHAVE_VFP -DHAVE_NEON -DHAVE_SME -DARMV8" +#define LIBNAME "vortexm4" +#define CORENAME "VORTEXM4" +#endif + #ifdef FORCE_A64FX #define ARMV8 #define FORCE From 7f89c6f353091aa09540b51158ab85a220b1192b Mon Sep 17 00:00:00 2001 From: Martin Kroeker Date: Sat, 23 Aug 2025 14:20:15 -0700 Subject: [PATCH 31/35] smh-based direct sgemm currently requires leading dimensions to be same as matrix dimension --- interface/gemm.c | 11 ++++++----- 1 file changed, 6 insertions(+), 5 deletions(-) diff --git a/interface/gemm.c b/interface/gemm.c index 62bc442467..92c75093c3 100644 --- a/interface/gemm.c +++ b/interface/gemm.c @@ -266,6 +266,7 @@ void NAME(char *TRANSA, char *TRANSB, int transa, transb, nrowa, nrowb; blasint info; + int order = -1; char transA, transB; IFLOAT *buffer; @@ -557,15 +558,16 @@ void CNAME(enum CBLAS_ORDER order, enum CBLAS_TRANSPOSE TransA, enum CBLAS_TRANS if (strcmp(gotoblas_corename(), "armv9sme") == 0 || strcmp(gotoblas_corename(), "vortexm4") == 0) // if (support_sme1()) #endif - if (order == CblasRowMajor && beta == 0 && alpha == 1.0 && TransA == CblasNoTrans && TransB == CblasNoTrans&& SGEMM_DIRECT_PERFORMANT(m,n,k)) { + if (order == CblasRowMajor && m==lda && n ==ldb && k==ldc && beta == 0 && alpha == 1.0 && TransA == CblasNoTrans && TransB == CblasNoTrans&& SGEMM_DIRECT_PERFORMANT(m,n,k)) { SGEMM_DIRECT(m, n, k, a, lda, b, ldb, c, ldc); return; } else - if (order == CblasRowMajor && beta != 0. && (!(alpha==1.&&beta==1.)) && TransA == CblasNoTrans && TransB == CblasNoTrans&& SGEMM_DIRECT_PERFORMANT(m,n,k)) { + if (order == CblasRowMajor && m==lda && n==ldb && k==ldc && TransA == CblasNoTrans && TransB == CblasNoTrans&& SGEMM_DIRECT_PERFORMANT(m,n,k)) { SGEMM_DIRECT_ALPHA_BETA(m, n, k, alpha, a, lda, b, ldb, beta, c, ldc); return; } + #endif #endif @@ -587,9 +589,6 @@ else if ((args.m == 0) || (args.n == 0)) return; - - - #if 0 fprintf(stderr, "m = %4d n = %d k = %d lda = %4d ldb = %4d ldc = %4d\n", args.m, args.n, args.k, args.lda, args.ldb, args.ldc); @@ -626,6 +625,7 @@ else } bool is_efficient_gemv = have_tuned_gemv || ((NT == 'N') || (NT == 'T' && inc_x == 1)); if (is_efficient_gemv) { +fprintf(stderr,"gemv_forwarding\n"); GEMV(&NT, &m, &n, args.alpha, args.a, &lda, args.b, &inc_x, args.beta, args.c, &inc_y); return; } @@ -649,6 +649,7 @@ else } bool is_efficient_gemv = have_tuned_gemv || ((NT == 'N' && inc_y == 1) || (NT == 'T' && inc_x == 1)); if (is_efficient_gemv) { +fprintf(stderr,"gemv_forwarding\n"); GEMV(&NT, &m, &n, args.alpha, args.b, &ldb, args.a, &inc_x, args.beta, args.c, &inc_y); return; } From 8e50b8d5255a26bc8293d1b83586a2035a8d8ccb Mon Sep 17 00:00:00 2001 From: Martin Kroeker Date: Sat, 23 Aug 2025 14:36:49 -0700 Subject: [PATCH 32/35] Add d8 to d15 to clobber lists as the code does not expressly save them --- kernel/arm64/sgemm_direct_alpha_beta_arm64_sme1.c | 10 ++++------ 1 file changed, 4 insertions(+), 6 deletions(-) diff --git a/kernel/arm64/sgemm_direct_alpha_beta_arm64_sme1.c b/kernel/arm64/sgemm_direct_alpha_beta_arm64_sme1.c index 9fb7e0d43f..f2de509c77 100644 --- a/kernel/arm64/sgemm_direct_alpha_beta_arm64_sme1.c +++ b/kernel/arm64/sgemm_direct_alpha_beta_arm64_sme1.c @@ -111,7 +111,6 @@ return; } __arm_new("za") __arm_locally_streaming -__attribute__((visibility("hidden"))) static void sgemm_direct_alpha_beta_sme1_2VLx2VL(uint64_t m, uint64_t k, uint64_t n, const float* alpha,\ const float *ba, const float *restrict bb, const float* beta,\ float *restrict C) { @@ -177,11 +176,11 @@ void CNAME (BLASLONG M, BLASLONG N, BLASLONG K, float alpha, float * __restrict * of reading directly from vector (z) registers. * */ asm volatile("" : : :"p0", "p1", "p2", "p3", "p4", "p5", "p6", "p7", - "p8", "p9", "p10", "p11", "p12", "p13", "p14", "p15", + "p8", "p9", "p10", "p11", "p12", "p13", "p14", "p15", "d8", "d9", "d10", "d11", "d12", "d13", "d14", "d15", "z0", "z1", "z2", "z3", "z4", "z5", "z6", "z7", "z8", "z9", "z10", "z11", "z12", "z13", "z14", "z15", "z16", "z17", "z18", "z19", "z20", "z21", "z22", "z23", - "z24", "z25", "z26", "z27", "z28", "z29", "z30", "z31"); + "z24", "z25", "z26", "z27", "z28", "z29", "z30", "z31","za"); /* Pre-process the left matrix to make it suitable for matrix sum of outer-product calculation @@ -190,11 +189,11 @@ void CNAME (BLASLONG M, BLASLONG N, BLASLONG K, float alpha, float * __restrict SME1_PREPROCESS(M, K, A, A_mod); asm volatile("" : : :"p0", "p1", "p2", "p3", "p4", "p5", "p6", "p7", - "p8", "p9", "p10", "p11", "p12", "p13", "p14", "p15", + "p8", "p9", "p10", "p11", "p12", "p13", "p14", "p15","d8", "d9", "d10", "d11", "d12", "d13", "d14", "d15", "z0", "z1", "z2", "z3", "z4", "z5", "z6", "z7", "z8", "z9", "z10", "z11", "z12", "z13", "z14", "z15", "z16", "z17", "z18", "z19", "z20", "z21", "z22", "z23", - "z24", "z25", "z26", "z27", "z28", "z29", "z30", "z31"); + "z24", "z25", "z26", "z27", "z28", "z29", "z30", "z31", "za"); /* Calculate C = alpha*A*B + beta*C */ @@ -210,4 +209,3 @@ void CNAME (BLASLONG M, BLASLONG N, BLASLONG K, float alpha, float * __restrict float beta, float * __restrict R, BLASLONG strideR){fprintf(stderr,"empty sgemm_direct_alpha_beta should not be called!!!\n");} #endif - From b4fc09e9e17cd2b3809e446d9b2b18fc028d6d14 Mon Sep 17 00:00:00 2001 From: Martin Kroeker Date: Sat, 23 Aug 2025 14:39:27 -0700 Subject: [PATCH 33/35] Add registers d8 to d15 to clobber lists as the code does not expressly save them --- kernel/arm64/sgemm_direct_arm64_sme1.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/kernel/arm64/sgemm_direct_arm64_sme1.c b/kernel/arm64/sgemm_direct_arm64_sme1.c index a214a0ff66..e4c8423550 100644 --- a/kernel/arm64/sgemm_direct_arm64_sme1.c +++ b/kernel/arm64/sgemm_direct_arm64_sme1.c @@ -58,7 +58,7 @@ void CNAME (BLASLONG M, BLASLONG N, BLASLONG K, float * __restrict A,\ * of reading directly from vector (z) registers. * */ asm volatile("" : : :"p0", "p1", "p2", "p3", "p4", "p5", "p6", "p7", - "p8", "p9", "p10", "p11", "p12", "p13", "p14", "p15", + "p8", "p9", "p10", "p11", "p12", "p13", "p14", "p15", "d8", "d9", "d10", "d11", "d12", "d13", "d14", "d15", "z0", "z1", "z2", "z3", "z4", "z5", "z6", "z7", "z8", "z9", "z10", "z11", "z12", "z13", "z14", "z15", "z16", "z17", "z18", "z19", "z20", "z21", "z22", "z23", @@ -74,7 +74,7 @@ fprintf(stderr,"sme direct calling 2x2\n"); SME1_DIRECT2X2(M, K, N, A_mod, B, R); asm volatile("" : : :"p0", "p1", "p2", "p3", "p4", "p5", "p6", "p7", - "p8", "p9", "p10", "p11", "p12", "p13", "p14", "p15", + "p8", "p9", "p10", "p11", "p12", "p13", "p14", "p15", "d8", "d9", "d10", "d11", "d12", "d13", "d14", "d15", "z0", "z1", "z2", "z3", "z4", "z5", "z6", "z7", "z8", "z9", "z10", "z11", "z12", "z13", "z14", "z15", "z16", "z17", "z18", "z19", "z20", "z21", "z22", "z23", From 1b88c9c7421d10c622f28ef3b3f85207089baccc Mon Sep 17 00:00:00 2001 From: Martin Kroeker Date: Sun, 24 Aug 2025 13:48:22 -0700 Subject: [PATCH 34/35] remove debugging printouts --- interface/gemm.c | 2 -- 1 file changed, 2 deletions(-) diff --git a/interface/gemm.c b/interface/gemm.c index 92c75093c3..52c16fc0ab 100644 --- a/interface/gemm.c +++ b/interface/gemm.c @@ -625,7 +625,6 @@ else } bool is_efficient_gemv = have_tuned_gemv || ((NT == 'N') || (NT == 'T' && inc_x == 1)); if (is_efficient_gemv) { -fprintf(stderr,"gemv_forwarding\n"); GEMV(&NT, &m, &n, args.alpha, args.a, &lda, args.b, &inc_x, args.beta, args.c, &inc_y); return; } @@ -649,7 +648,6 @@ fprintf(stderr,"gemv_forwarding\n"); } bool is_efficient_gemv = have_tuned_gemv || ((NT == 'N' && inc_y == 1) || (NT == 'T' && inc_x == 1)); if (is_efficient_gemv) { -fprintf(stderr,"gemv_forwarding\n"); GEMV(&NT, &m, &n, args.alpha, args.b, &ldb, args.a, &inc_x, args.beta, args.c, &inc_y); return; } From 2b5d8c789dad6560b17fe95759cbb5474e83686a Mon Sep 17 00:00:00 2001 From: Martin Kroeker Date: Sun, 24 Aug 2025 13:50:08 -0700 Subject: [PATCH 35/35] remove debugging printout --- kernel/arm64/sgemm_direct_arm64_sme1.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/kernel/arm64/sgemm_direct_arm64_sme1.c b/kernel/arm64/sgemm_direct_arm64_sme1.c index e4c8423550..8c1b398186 100644 --- a/kernel/arm64/sgemm_direct_arm64_sme1.c +++ b/kernel/arm64/sgemm_direct_arm64_sme1.c @@ -70,7 +70,6 @@ void CNAME (BLASLONG M, BLASLONG N, BLASLONG K, float * __restrict A,\ SME1_PREPROCESS(M, K, A, A_mod); /* Calculate C = A*B */ -fprintf(stderr,"sme direct calling 2x2\n"); SME1_DIRECT2X2(M, K, N, A_mod, B, R); asm volatile("" : : :"p0", "p1", "p2", "p3", "p4", "p5", "p6", "p7", @@ -90,3 +89,4 @@ void CNAME (BLASLONG M, BLASLONG N, BLASLONG K, float * __restrict A,\ fprintf(stderr,"EMPTY sgemm_kernel_direct should never be called \n"); } #endif +