Skip to content

Commit 5ef9a2f

Browse files
committed
implement v?Sqr as y =__mm256_mul_pd(a, a)
1 parent e31e919 commit 5ef9a2f

19 files changed

+300
-19
lines changed

include/openvml.h

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -46,7 +46,8 @@ OPENVML_EXPORT void OpenVML_FUNCNAME(vdSub)(const VML_INT n, const double * a, c
4646
OPENVML_EXPORT void OpenVML_FUNCNAME(vcSub)(const VML_INT n, const float * a, const float * b, float * y);
4747
OPENVML_EXPORT void OpenVML_FUNCNAME(vzSub)(const VML_INT n, const double * a, const double * b, double * y);
4848

49-
OPENVML_EXPORT void OpenVML_FUNCNAME(vsSqr)(const VML_INT n, const double * a, double * y);
49+
OPENVML_EXPORT void OpenVML_FUNCNAME(vsSqr)(const VML_INT n, const float * a, float * y);
50+
OPENVML_EXPORT void OpenVML_FUNCNAME(vdSqr)(const VML_INT n, const double * a, double * y);
5051

5152
OPENVML_EXPORT void OpenVML_FUNCNAME(vsPow)(const VML_INT n, const float * a, const float * b, float * y);
5253
OPENVML_EXPORT void OpenVML_FUNCNAME(vdPow)(const VML_INT n, const double * a, const double * b, double * y);

include/openvml_kernel.h

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -42,6 +42,7 @@ void OpenVML_FUNCNAME(csub_k)(VMLLONG n, float * a, float * b, float * y, float
4242
void OpenVML_FUNCNAME(zsub_k)(VMLLONG n, double * a, double * b, double * y, double * z, double * other_params);
4343

4444
void OpenVML_FUNCNAME(ssqr_k)(VMLLONG n, float * a, float * b, float * y, float * z, float * other_params);
45+
void OpenVML_FUNCNAME(sdqr_k)(VMLLONG n, double * a, double * b, double * y, double * z, double * other_params);
4546

4647
void OpenVML_FUNCNAME(spow_k)(VMLLONG n, float * a, float * b, float * y, float * z, float * other_params);
4748
void OpenVML_FUNCNAME(dpow_k)(VMLLONG n, double * a, double * b, double * y, double * z, double * other_params);

include/openvml_macros.h

Lines changed: 4 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -43,7 +43,8 @@
4343
#define CSUB_K OpenVML_FUNCNAME(csub_k)
4444
#define ZSUB_K OpenVML_FUNCNAME(zsub_k)
4545

46-
#define ZSQR_K OpenVML_FUNCNAME(ssqr_k)
46+
#define SSQR_K OpenVML_FUNCNAME(ssqr_k)
47+
#define DSQR_K OpenVML_FUNCNAME(dsqr_k)
4748

4849
#define SPOW_K OpenVML_FUNCNAME(spow_k)
4950
#define DPOW_K OpenVML_FUNCNAME(dpow_k)
@@ -116,6 +117,7 @@
116117
#ifndef DOUBLE
117118
#define ADD_K SADD_K
118119
#define SUB_K SSUB_K
120+
#define SQR_K SSQR_K
119121
#define POW_K SPOW_K
120122
#define POWX_K SPOWX_K
121123
#define EXP_K SEXP_K
@@ -136,6 +138,7 @@
136138
#else
137139
#define ADD_K DADD_K
138140
#define SUB_K DSUB_K
141+
#define SQR_K DSQR_K
139142
#define POW_K DPOW_K
140143
#define POWX_K DPOWX_K
141144
#define EXP_K DEXP_K

include/openvml_reference.h

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -47,6 +47,7 @@ OPENVML_EXPORT void OpenVML_FUNCNAME_REF(vcSub)(const VML_INT n, const float * a
4747
OPENVML_EXPORT void OpenVML_FUNCNAME_REF(vzSub)(const VML_INT n, const double * a, const double * b, double * y);
4848

4949
OPENVML_EXPORT void OpenVML_FUNCNAME_REF(vsSqr)(const VML_INT n, const float * a, float * y);
50+
OPENVML_EXPORT void OpenVML_FUNCNAME_REF(vdSqr)(const VML_INT n, const double * a, double * y);
5051

5152
OPENVML_EXPORT void OpenVML_FUNCNAME_REF(vsPow)(const VML_INT n, const float * a, const float * b, float * y);
5253
OPENVML_EXPORT void OpenVML_FUNCNAME_REF(vdPow)(const VML_INT n, const double * a, const double * b, double * y);

interface/CMakeLists.txt

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -6,7 +6,7 @@ set(OpenVML_LIBSRC_C "")
66
set(OpenVML_LIBSRC_Z "")
77

88
set(REAL_INTERFACE_LIST
9-
add sub
9+
add sub sqr
1010
pow powx pow2o3 pow3o2 exp expm1
1111
tanh
1212
log10 ln log1p
@@ -108,4 +108,4 @@ Endforeach(INTERFACE)
108108

109109
add_library(openvml_interface_core OBJECT ${OpenVML_LIBSRC_S} ${OpenVML_LIBSRC_D} ${OpenVML_LIBSRC_C} ${OpenVML_LIBSRC_Z} ${OpenVML_LIBSRC_OTHER})
110110

111-
target_compile_definitions(openvml_interface_core PUBLIC openvml_EXPORTS)
111+
target_compile_definitions(openvml_interface_core PUBLIC openvml_EXPORTS)

interface/sqr.c

Lines changed: 39 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,39 @@
1+
/* * Copyright (c) 2014, 2015 Zhang Xianyi
2+
* All rights reserved.
3+
*
4+
* Redistribution and use in source and binary forms, with or without modification,
5+
* are permitted provided that the following conditions are met:
6+
*
7+
* * Redistributions of source code must retain the above copyright notice, this
8+
* list of conditions and the following disclaimer.
9+
*
10+
* * Redistributions in binary form must reproduce the above copyright notice, this
11+
* list of conditions and the following disclaimer in the documentation and/or
12+
* other materials provided with the distribution.
13+
*
14+
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
15+
* ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
16+
* WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
17+
* DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR
18+
* ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
19+
* (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
20+
* LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON
21+
* ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
22+
* (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
23+
* SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
24+
*/
25+
26+
#include <openvml.h>
27+
#include <openvml_driver.h>
28+
#include <openvml_kernel.h>
29+
30+
31+
void CNAME(const VML_INT n, const VML_FLOAT * a, VML_FLOAT * y) {
32+
33+
if (n<=0) return;
34+
if (a==NULL || y==NULL) return;
35+
36+
37+
EXEC_VML(0, SQR_K, n, (VML_FLOAT*)a, NULL, y, NULL, NULL);
38+
39+
}

kernel/CMakeLists.txt

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -7,7 +7,7 @@ set(OpenVML_LIBSRC_C "")
77
set(OpenVML_LIBSRC_Z "")
88

99
#s,d
10-
set(KERNEL_LIST add sub pow powx exp expm1 tanh log10 ln log1p floor
10+
set(KERNEL_LIST add sub sqr pow powx exp expm1 tanh log10 ln log1p floor
1111
sin cos sincos tan asin acos atan atan2)
1212

1313
#c,z

kernel/aarch64/Kernel_generic.txt

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -8,6 +8,9 @@ set(sub_D_KERNEL_SOURCE generic/sub_kernel.c)
88
set(sub_C_KERNEL_SOURCE generic/sub_kernel.c)
99
set(sub_Z_KERNEL_SOURCE generic/sub_kernel.c)
1010

11+
set(sqr_S_KERNEL_SOURCE generic/sqr_kernel.c)
12+
set(sqr_D_KERNEL_SOURCE generic/sqr_kernel.c)
13+
1114
set(pow_S_KERNEL_SOURCE generic/pow_kernel.c)
1215
set(pow_D_KERNEL_SOURCE generic/pow_kernel.c)
1316

kernel/arm/Kernel_generic.txt

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -8,6 +8,9 @@ set(sub_D_KERNEL_SOURCE generic/sub_kernel.c)
88
set(sub_C_KERNEL_SOURCE generic/sub_kernel.c)
99
set(sub_Z_KERNEL_SOURCE generic/sub_kernel.c)
1010

11+
set(sqr_S_KERNEL_SOURCE generic/sqr_kernel.c)
12+
set(sqr_D_KERNEL_SOURCE generic/sqr_kernel.c)
13+
1114
set(pow_S_KERNEL_SOURCE generic/pow_kernel.c)
1215
set(pow_D_KERNEL_SOURCE generic/pow_kernel.c)
1316

kernel/generic/Kernel_generic.txt

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -8,6 +8,9 @@ set(sub_D_KERNEL_SOURCE ${OpenVML_ARCH}/sub_kernel.c)
88
set(sub_C_KERNEL_SOURCE ${OpenVML_ARCH}/sub_kernel.c)
99
set(sub_Z_KERNEL_SOURCE ${OpenVML_ARCH}/sub_kernel.c)
1010

11+
set(sqr_S_KERNEL_SOURCE ${OpenVML_ARCH}/sqr_kernel.c)
12+
set(sqr_D_KERNEL_SOURCE ${OpenVML_ARCH}/sqr_kernel.c)
13+
1114
set(pow_S_KERNEL_SOURCE ${OpenVML_ARCH}/pow_kernel.c)
1215
set(pow_D_KERNEL_SOURCE ${OpenVML_ARCH}/pow_kernel.c)
1316
#set(pow_C_KERNEL_SOURCE ${OpenVML_ARCH}/pow_kernel.c)

kernel/x86_64/Kernel_generic.txt

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -8,6 +8,9 @@ set(sub_D_KERNEL_SOURCE generic/sub_kernel.c)
88
set(sub_C_KERNEL_SOURCE generic/sub_kernel.c)
99
set(sub_Z_KERNEL_SOURCE generic/sub_kernel.c)
1010

11+
set(sqr_S_KERNEL_SOURCE generic/sqr_kernel.c)
12+
set(sqr_D_KERNEL_SOURCE generic/sqr_kernel.c)
13+
1114
set(pow_S_KERNEL_SOURCE generic/pow_kernel.c)
1215
set(pow_D_KERNEL_SOURCE generic/pow_kernel.c)
1316

kernel/x86_64/Kernel_haswell.txt

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -16,6 +16,9 @@ set(sub_D_KERNEL_SOURCE ${OpenVML_ARCH}/dsub_kernel_avx.c)
1616
set(sub_C_KERNEL_SOURCE ${OpenVML_ARCH}/ssub_kernel_avx.c)
1717
set(sub_Z_KERNEL_SOURCE ${OpenVML_ARCH}/dsub_kernel_avx.c)
1818

19+
set(sqr_S_KERNEL_SOURCE ${OpenVML_ARCH}/ssqr_kernel_avx.c)
20+
set(sqr_D_KERNEL_SOURCE ${OpenVML_ARCH}/dsqr_kernel_avx.c)
21+
1922
set(pow_S_KERNEL_SOURCE ${OpenVML_ARCH}/spow_kernel_avx.c)
2023
set(pow_D_KERNEL_SOURCE ${OpenVML_ARCH}/dpow_kernel_avx.c)
2124

kernel/x86_64/Kernel_sandybridge.txt

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -16,6 +16,9 @@ set(sub_D_KERNEL_SOURCE ${OpenVML_ARCH}/dsub_kernel_avx.c)
1616
set(sub_C_KERNEL_SOURCE ${OpenVML_ARCH}/ssub_kernel_avx.c)
1717
set(sub_Z_KERNEL_SOURCE ${OpenVML_ARCH}/dsub_kernel_avx.c)
1818

19+
set(sqr_S_KERNEL_SOURCE ${OpenVML_ARCH}/ssqr_kernel_avx.c)
20+
set(sqr_D_KERNEL_SOURCE ${OpenVML_ARCH}/dsqr_kernel_avx.c)
21+
1922
set(pow_S_KERNEL_SOURCE ${OpenVML_ARCH}/spow_kernel_avx.c)
2023
set(pow_D_KERNEL_SOURCE ${OpenVML_ARCH}/dpow_kernel_avx.c)
2124

kernel/x86_64/dsqr_kernel_avx.c

Lines changed: 78 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,78 @@
1+
/* * Copyright (c) 2014, 2015 Zhang Xianyi
2+
* All rights reserved.
3+
*
4+
* Redistribution and use in source and binary forms, with or without modification,
5+
* are permitted provided that the following conditions are met:
6+
*
7+
* * Redistributions of source code must retain the above copyright notice, this
8+
* list of conditions and the following disclaimer.
9+
*
10+
* * Redistributions in binary form must reproduce the above copyright notice, this
11+
* list of conditions and the following disclaimer in the documentation and/or
12+
* other materials provided with the distribution.
13+
*
14+
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
15+
* ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
16+
* WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
17+
* DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR
18+
* ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
19+
* (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
20+
* LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON
21+
* ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
22+
* (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
23+
* SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
24+
*/
25+
26+
#include "openvml_kernel.h"
27+
28+
#include <immintrin.h>
29+
30+
void KERNEL_NAME(VMLLONG n, VML_FLOAT * a, VML_FLOAT * b, VML_FLOAT * y, VML_FLOAT * z, VML_FLOAT * other_params) {
31+
VMLLONG loop_count=(COMPSIZE*n) >> 5;
32+
VMLLONG remain_count=(COMPSIZE*n) & 0x1f;
33+
34+
int i=0;
35+
36+
while(loop_count>0){
37+
38+
__m256d av0=_mm256_loadu_pd(a);
39+
__m256d av1=_mm256_loadu_pd(a+4);
40+
__m256d av2=_mm256_loadu_pd(a+8);
41+
__m256d av3=_mm256_loadu_pd(a+12);
42+
43+
__m256d av4=_mm256_loadu_pd(a+16);
44+
__m256d av5=_mm256_loadu_pd(a+20);
45+
__m256d av6=_mm256_loadu_pd(a+24);
46+
__m256d av7=_mm256_loadu_pd(a+28);
47+
48+
49+
__m256d yv0=_mm256_mul_pd(av0, av0);
50+
__m256d yv1=_mm256_mul_pd(av1, av1);
51+
__m256d yv2=_mm256_mul_pd(av2, av2);
52+
__m256d yv3=_mm256_mul_pd(av3, av3);
53+
54+
__m256d yv4=_mm256_mul_pd(av4, av4);
55+
__m256d yv5=_mm256_mul_pd(av5, av5);
56+
__m256d yv6=_mm256_mul_pd(av6, av6);
57+
__m256d yv7=_mm256_mul_pd(av7, av7);
58+
59+
_mm256_storeu_pd(y, yv0);
60+
_mm256_storeu_pd(y+4, yv1);
61+
_mm256_storeu_pd(y+8, yv2);
62+
_mm256_storeu_pd(y+12, yv3);
63+
64+
_mm256_storeu_pd(y+16, yv4);
65+
_mm256_storeu_pd(y+20, yv5);
66+
_mm256_storeu_pd(y+24, yv6);
67+
_mm256_storeu_pd(y+28, yv7);
68+
69+
a+=32;
70+
y+=32;
71+
loop_count--;
72+
}
73+
74+
for(i=0; i<remain_count; i++){
75+
y[i]=a[i]*a[i];
76+
}
77+
}
78+

kernel/x86_64/sqr_kernel.c

Lines changed: 33 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,33 @@
1+
/* * Copyright (c) 2014, 2015 Zhang Xianyi
2+
* All rights reserved.
3+
*
4+
* Redistribution and use in source and binary forms, with or without modification,
5+
* are permitted provided that the following conditions are met:
6+
*
7+
* * Redistributions of source code must retain the above copyright notice, this
8+
* list of conditions and the following disclaimer.
9+
*
10+
* * Redistributions in binary form must reproduce the above copyright notice, this
11+
* list of conditions and the following disclaimer in the documentation and/or
12+
* other materials provided with the distribution.
13+
*
14+
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
15+
* ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
16+
* WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
17+
* DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR
18+
* ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
19+
* (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
20+
* LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON
21+
* ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
22+
* (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
23+
* SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
24+
*/
25+
26+
#include "openvml_kernel.h"
27+
28+
void KERNEL_NAME(VMLLONG n, VML_FLOAT * a, VML_FLOAT * b, VML_FLOAT * y, VML_FLOAT * z, VML_FLOAT * other_params) {
29+
VMLLONG i=0;
30+
for(i=0; i<COMPSIZE*n; i++){
31+
y[i]=a[i]*a[i];
32+
}
33+
}

kernel/x86_64/ssqr_kernel_avx.c

Lines changed: 64 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,64 @@
1+
/* * Copyright (c) 2014, 2015 Zhang Xianyi
2+
* All rights reserved.
3+
*
4+
* Redistribution and use in source and binary forms, with or without modification,
5+
* are permitted provided that the following conditions are met:
6+
*
7+
* * Redistributions of source code must retain the above copyright notice, this
8+
* list of conditions and the following disclaimer.
9+
*
10+
* * Redistributions in binary form must reproduce the above copyright notice, this
11+
* list of conditions and the following disclaimer in the documentation and/or
12+
* other materials provided with the distribution.
13+
*
14+
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
15+
* ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
16+
* WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
17+
* DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR
18+
* ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
19+
* (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
20+
* LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON
21+
* ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
22+
* (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
23+
* SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
24+
*/
25+
26+
#include "openvml_kernel.h"
27+
28+
#include <immintrin.h>
29+
30+
void KERNEL_NAME(VMLLONG n, VML_FLOAT * a, VML_FLOAT * b, VML_FLOAT * y, VML_FLOAT * z, VML_FLOAT * other_params) {
31+
VMLLONG loop_count=(COMPSIZE*n) >> 5;
32+
VMLLONG remain_count=(COMPSIZE*n) & 0x1f;
33+
34+
int i=0;
35+
36+
while(loop_count>0){
37+
38+
__m256 av0=_mm256_loadu_ps(a);
39+
__m256 av1=_mm256_loadu_ps(a+8);
40+
__m256 av2=_mm256_loadu_ps(a+16);
41+
__m256 av3=_mm256_loadu_ps(a+24);
42+
43+
44+
__m256 yv0=_mm256_mul_ps(av0, av0);
45+
__m256 yv1=_mm256_mul_ps(av1, av1);
46+
__m256 yv2=_mm256_mul_ps(av2, av2);
47+
__m256 yv3=_mm256_mul_ps(av3, av3);
48+
49+
50+
_mm256_storeu_ps(y, yv0);
51+
_mm256_storeu_ps(y+8, yv1);
52+
_mm256_storeu_ps(y+16, yv2);
53+
_mm256_storeu_ps(y+24, yv3);
54+
55+
a+=32;
56+
y+=32;
57+
loop_count--;
58+
}
59+
60+
for(i=0; i<remain_count; i++){
61+
y[i]=a[i]*a[i];
62+
}
63+
}
64+

reference/CMakeLists.txt

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,7 @@
11
set(OpenVML_REF_SRC
22
vadd.c
33
vsub.c
4+
vsqr.c
45
vpow.c
56
vpowx.c
67
vpow2o3.c
@@ -27,4 +28,4 @@ if(NOT MSVC)
2728
target_link_libraries(${OpenVML_LIBNAME}_ref m)
2829
endif()
2930

30-
target_compile_definitions(${OpenVML_LIBNAME}_ref PUBLIC openvml_EXPORTS)
31+
target_compile_definitions(${OpenVML_LIBNAME}_ref PUBLIC openvml_EXPORTS)

0 commit comments

Comments
 (0)