From 0e7f43c898ea646b8bfec982657e77ac09e9f118 Mon Sep 17 00:00:00 2001 From: Martin Liska Date: Fri, 14 Feb 2020 10:35:51 +0100 Subject: [PATCH 1/3] Add missing USE_MIN in kernel/CMakeLists.txt. --- kernel/CMakeLists.txt | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/kernel/CMakeLists.txt b/kernel/CMakeLists.txt index b3310e87e4..35e0fff25f 100644 --- a/kernel/CMakeLists.txt +++ b/kernel/CMakeLists.txt @@ -47,7 +47,7 @@ function (build_core TARGET_CORE KDIR TSUFFIX KERNEL_DEFINITIONS) GenerateNamedObjects("${KERNELDIR}/${${float_char}MAXKERNEL}" "" "max_k" false "" "" false ${float_type}) endif () if (DEFINED ${float_char}MINKERNEL) - GenerateNamedObjects("${KERNELDIR}/${${float_char}MINKERNEL}" "" "min_k" false "" "" false ${float_type}) + GenerateNamedObjects("${KERNELDIR}/${${float_char}MINKERNEL}" "USE_MIN" "min_k" false "" "" false ${float_type}) endif () GenerateNamedObjects("${KERNELDIR}/${I${float_char}AMAXKERNEL}" "USE_ABS" "i*amax_k" false "" "" false ${float_type}) GenerateNamedObjects("${KERNELDIR}/${I${float_char}AMINKERNEL}" "USE_ABS;USE_MIN" "i*amin_k" false "" "" false ${float_type}) @@ -55,7 +55,7 @@ function (build_core TARGET_CORE KDIR TSUFFIX KERNEL_DEFINITIONS) GenerateNamedObjects("${KERNELDIR}/${I${float_char}MAXKERNEL}" "" "i*max_k" false "" "" false ${float_type}) endif () if (DEFINED I${float_char}MINKERNEL) - GenerateNamedObjects("${KERNELDIR}/${I${float_char}MINKERNEL}" "" "i*min_k" false "" "" false ${float_type}) + GenerateNamedObjects("${KERNELDIR}/${I${float_char}MINKERNEL}" "USE_MIN" "i*min_k" false "" "" false ${float_type}) endif () GenerateNamedObjects("${KERNELDIR}/${${float_char}ASUMKERNEL}" "" "asum_k" false "" "" false ${float_type}) GenerateNamedObjects("${KERNELDIR}/${${float_char}AXPYKERNEL}" "" "axpy_k" false "" "" false ${float_type}) From 18bcc36a6996d117c156394b047ec1c0e298e202 Mon Sep 17 00:00:00 2001 From: Martin Liska Date: Thu, 13 Feb 2020 14:32:24 +0100 Subject: [PATCH 2/3] Fix implementation of iamax_sse.S as reported in #2116. The was a typo in iamax_sse.S where one of the comparison was cmpeqps instead of cmpeqss. That misdetected index for sequences where the minimum value was 0. --- kernel/x86_64/KERNEL | 4 +- kernel/x86_64/iamax_sse.S | 6 +-- utest/CMakeLists.txt | 1 + utest/Makefile | 2 +- utest/test_ismin.c | 89 +++++++++++++++++++++++++++++++++++++++ 5 files changed, 94 insertions(+), 8 deletions(-) create mode 100644 utest/test_ismin.c diff --git a/kernel/x86_64/KERNEL b/kernel/x86_64/KERNEL index 92d121ab2d..4874711bbe 100644 --- a/kernel/x86_64/KERNEL +++ b/kernel/x86_64/KERNEL @@ -171,7 +171,7 @@ IXAMAXKERNEL = izamax.S endif ifndef ISAMINKERNEL -ISAMINKERNEL = iamax.S +ISAMINKERNEL = iamax_sse.S endif ifndef IDAMINKERNEL @@ -207,7 +207,7 @@ IQMAXKERNEL = iamax.S endif ifndef ISMINKERNEL -ISMINKERNEL = iamax.S +ISMINKERNEL = iamax_sse.S endif ifndef IDMINKERNEL diff --git a/kernel/x86_64/iamax_sse.S b/kernel/x86_64/iamax_sse.S index d50c1699cb..9c7af1fd7b 100644 --- a/kernel/x86_64/iamax_sse.S +++ b/kernel/x86_64/iamax_sse.S @@ -36,10 +36,6 @@ /* or implied, of The University of Texas at Austin. */ /*********************************************************************/ -/* This kernel was found to give wrong results when used for ISMIN/ISAMIN - with increment != 1, although it appears to be correct for corresponding - MAX operations. See issue 2116 */ - #define ASSEMBLER #include "common.h" @@ -863,7 +859,7 @@ #ifdef USE_ABS andps %xmm15, %xmm5 #endif - cmpeqps %xmm0, %xmm5 + cmpeqss %xmm0, %xmm5 movss 0 * SIZE(X), %xmm6 addq INCX, X diff --git a/utest/CMakeLists.txt b/utest/CMakeLists.txt index 1e3051a8fd..544646911e 100644 --- a/utest/CMakeLists.txt +++ b/utest/CMakeLists.txt @@ -7,6 +7,7 @@ else () set(OpenBLAS_utest_src utest_main.c test_amax.c + test_ismin.c test_rotmg.c test_rot.c test_axpy.c diff --git a/utest/Makefile b/utest/Makefile index bd4bdf3ae8..32bdcc6e14 100644 --- a/utest/Makefile +++ b/utest/Makefile @@ -11,7 +11,7 @@ UTESTBIN=openblas_utest include $(TOPDIR)/Makefile.system -OBJS=utest_main.o test_amax.o test_rotmg.o test_axpy.o test_dotu.o test_dsdot.o test_swap.o test_rot.o +OBJS=utest_main.o test_amax.o test_ismin.o test_rotmg.o test_axpy.o test_dotu.o test_dsdot.o test_swap.o test_rot.o #test_rot.o test_swap.o test_axpy.o test_dotu.o test_dsdot.o test_fork.o ifneq ($(NO_LAPACK), 1) diff --git a/utest/test_ismin.c b/utest/test_ismin.c new file mode 100644 index 0000000000..f23d6b5457 --- /dev/null +++ b/utest/test_ismin.c @@ -0,0 +1,89 @@ +/***************************************************************************** +Copyright (c) 2020, The OpenBLAS Project +All rights reserved. + +Redistribution and use in source and binary forms, with or without +modification, are permitted provided that the following conditions are +met: + + 1. Redistributions of source code must retain the above copyright + notice, this list of conditions and the following disclaimer. + + 2. Redistributions in binary form must reproduce the above copyright + notice, this list of conditions and the following disclaimer in + the documentation and/or other materials provided with the + distribution. + 3. Neither the name of the OpenBLAS project nor the names of + its contributors may be used to endorse or promote products + derived from this software without specific prior written + permission. + +THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" +AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE +ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE +LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL +DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR +SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER +CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, +OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE +USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + +**********************************************************************************/ + +#include "openblas_utest.h" + +#define ELEMENTS 50 +#define INCREMENT 2 + +CTEST(ismin, positive_step_2){ + blasint i; + blasint N = ELEMENTS, inc = INCREMENT; + float x[ELEMENTS * INCREMENT]; + for (i = 0; i < N * inc; i ++) { + x[i] = i + 1000; + } + + x[8 * inc] = 0; + blasint index = BLASFUNC(ismin)(&N, x, &inc); + ASSERT_EQUAL(9, index); +} + +CTEST(ismin, negative_step_2){ + blasint i; + blasint N = ELEMENTS, inc = INCREMENT; + float x[ELEMENTS * INCREMENT]; + for (i = 0; i < N * inc; i ++) { + x[i] = - i - 1000; + } + + x[8 * inc] = -123456.0f; + blasint index = BLASFUNC(ismin)(&N, x, &inc); + ASSERT_EQUAL(9, index); +} + +CTEST(ismax, positive_step_2){ + blasint i; + blasint N = ELEMENTS, inc = INCREMENT; + float x[ELEMENTS * INCREMENT]; + for (i = 0; i < N * inc; i ++) { + x[i] = i + 1000; + } + + x[8 * inc] = 123456.0f; + blasint index = BLASFUNC(ismax)(&N, x, &inc); + ASSERT_EQUAL(9, index); +} + +CTEST(ismax, negative_step_2){ + blasint i; + blasint N = ELEMENTS, inc = INCREMENT; + float x[ELEMENTS * INCREMENT]; + for (i = 0; i < N * inc; i ++) { + x[i] = - i - 1000; + } + + x[8 * inc] = 0; + blasint index = BLASFUNC(ismax)(&N, x, &inc); + ASSERT_EQUAL(9, index); +} From aeea14ee4096860a75818547931c9dae43f965da Mon Sep 17 00:00:00 2001 From: Martin Liska Date: Thu, 13 Feb 2020 14:42:45 +0100 Subject: [PATCH 3/3] Come up with LOAD_AND_COMPARE_TO_MXX macro in iamax_sse.S. --- kernel/x86_64/iamax_sse.S | 72 +++++++++------------------------------ 1 file changed, 17 insertions(+), 55 deletions(-) diff --git a/kernel/x86_64/iamax_sse.S b/kernel/x86_64/iamax_sse.S index 9c7af1fd7b..4f62b9be29 100644 --- a/kernel/x86_64/iamax_sse.S +++ b/kernel/x86_64/iamax_sse.S @@ -55,6 +55,15 @@ #define MAXSS minss #endif +.macro LOAD_AND_COMPARE_TO_MXX REG + movss 0 * SIZE(X), \REG + addq INCX, X +#ifdef USE_ABS + andps %xmm15, \REG +#endif + cmpeqss %xmm0, \REG +.endm + #include "l1param.h" PROLOGUE @@ -826,61 +835,14 @@ ALIGN_4 .L93: - movss 0 * SIZE(X), %xmm1 - addq INCX, X -#ifdef USE_ABS - andps %xmm15, %xmm1 -#endif - cmpeqss %xmm0, %xmm1 - - movss 0 * SIZE(X), %xmm2 - addq INCX, X -#ifdef USE_ABS - andps %xmm15, %xmm2 -#endif - cmpeqss %xmm0, %xmm2 - - movss 0 * SIZE(X), %xmm3 - addq INCX, X -#ifdef USE_ABS - andps %xmm15, %xmm3 -#endif - cmpeqss %xmm0, %xmm3 - - movss 0 * SIZE(X), %xmm4 - addq INCX, X -#ifdef USE_ABS - andps %xmm15, %xmm4 -#endif - cmpeqss %xmm0, %xmm4 - - movss 0 * SIZE(X), %xmm5 - addq INCX, X -#ifdef USE_ABS - andps %xmm15, %xmm5 -#endif - cmpeqss %xmm0, %xmm5 - - movss 0 * SIZE(X), %xmm6 - addq INCX, X -#ifdef USE_ABS - andps %xmm15, %xmm6 -#endif - cmpeqss %xmm0, %xmm6 - - movss 0 * SIZE(X), %xmm7 - addq INCX, X -#ifdef USE_ABS - andps %xmm15, %xmm7 -#endif - cmpeqss %xmm0, %xmm7 - - movss 0 * SIZE(X), %xmm8 - addq INCX, X -#ifdef USE_ABS - andps %xmm15, %xmm8 -#endif - cmpeqss %xmm0, %xmm8 + LOAD_AND_COMPARE_TO_MXX %xmm1 + LOAD_AND_COMPARE_TO_MXX %xmm2 + LOAD_AND_COMPARE_TO_MXX %xmm3 + LOAD_AND_COMPARE_TO_MXX %xmm4 + LOAD_AND_COMPARE_TO_MXX %xmm5 + LOAD_AND_COMPARE_TO_MXX %xmm6 + LOAD_AND_COMPARE_TO_MXX %xmm7 + LOAD_AND_COMPARE_TO_MXX %xmm8 orps %xmm2, %xmm1 orps %xmm4, %xmm3