From a4bd41e9f2bbebfe2453de7a43194b185fd72da5 Mon Sep 17 00:00:00 2001 From: Martin Kroeker Date: Tue, 4 Sep 2018 10:51:19 +0200 Subject: [PATCH 1/3] Fix paths to C kernels for nrm2 --- kernel/arm64/KERNEL | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/kernel/arm64/KERNEL b/kernel/arm64/KERNEL index aeccfbf4c8..f936cdf47e 100644 --- a/kernel/arm64/KERNEL +++ b/kernel/arm64/KERNEL @@ -1,17 +1,17 @@ ifndef SNRM2KERNEL -SNRM2KERNEL = nrm2.c +SNRM2KERNEL = ../arm/nrm2.c endif ifndef DNRM2KERNEL -DNRM2KERNEL = nrm2.c +DNRM2KERNEL = ../arm/nrm2.c endif ifndef CNRM2KERNEL -CNRM2KERNEL = znrm2.c +CNRM2KERNEL = ../arm/znrm2.c endif ifndef ZNRM2KERNEL -ZNRM2KERNEL = znrm2.c +ZNRM2KERNEL = ../arm/znrm2.c endif ifndef SCABS_KERNEL From 1cb7b9015ebd49e1cbf09eb289b7a6d5bba5ea31 Mon Sep 17 00:00:00 2001 From: Martin Kroeker Date: Tue, 4 Sep 2018 11:06:51 +0200 Subject: [PATCH 2/3] Conditional compilation of assembly files that IOS does not like --- kernel/arm64/KERNEL.ARMV8 | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/kernel/arm64/KERNEL.ARMV8 b/kernel/arm64/KERNEL.ARMV8 index d057546280..4c6d6fb710 100644 --- a/kernel/arm64/KERNEL.ARMV8 +++ b/kernel/arm64/KERNEL.ARMV8 @@ -51,10 +51,12 @@ CDOTKERNEL = zdot.S ZDOTKERNEL = zdot.S DSDOTKERNEL = dot.S +ifneq ($(OS_DARWIN)$(CROSS),11) SNRM2KERNEL = nrm2.S DNRM2KERNEL = nrm2.S CNRM2KERNEL = znrm2.S ZNRM2KERNEL = znrm2.S +endif SROTKERNEL = rot.S DROTKERNEL = rot.S @@ -86,7 +88,11 @@ DTRMMKERNEL = ../generic/trmmkernel_2x2.c CTRMMKERNEL = ../generic/ztrmmkernel_2x2.c ZTRMMKERNEL = ../generic/ztrmmkernel_2x2.c +ifneq ($(OS_DARWIN)$(CROSS),11) SGEMMKERNEL = sgemm_kernel_4x4.S +else +SGEMMKERNEL = ../generic/gemmkernel_2x2.c +endif SGEMMONCOPY = ../generic/gemm_ncopy_4.c SGEMMOTCOPY = ../generic/gemm_tcopy_4.c SGEMMONCOPYOBJ = sgemm_oncopy.o From 4cf7315a5d5c512b1f38c523d4cd28c399b2000d Mon Sep 17 00:00:00 2001 From: Martin Kroeker Date: Thu, 6 Sep 2018 21:41:54 +0200 Subject: [PATCH 3/3] Adjust ARMV8 SGEMM unrolling when using the C fallback kernel_2x2 for IOS --- param.h | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/param.h b/param.h index cfa4bba5ca..ded9fe0b84 100644 --- a/param.h +++ b/param.h @@ -2590,8 +2590,13 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. #define GEMM_DEFAULT_OFFSET_B 0 #define GEMM_DEFAULT_ALIGN 0x03fffUL +#if defined(OS_DARWIN) && defined(CROSS) +#define SGEMM_DEFAULT_UNROLL_M 2 +#define SGEMM_DEFAULT_UNROLL N 2 +#else #define SGEMM_DEFAULT_UNROLL_M 4 #define SGEMM_DEFAULT_UNROLL_N 4 +#endif #define DGEMM_DEFAULT_UNROLL_M 2 #define DGEMM_DEFAULT_UNROLL_N 2