Skip to content

Make ARMV7 compile with xcode and add a CI job for it #2537

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 10 commits into from
Apr 2, 2020
6 changes: 6 additions & 0 deletions .travis.yml
Original file line number Diff line number Diff line change
Expand Up @@ -180,6 +180,12 @@ matrix:
- CFLAGS="-O2 -Wno-macro-redefined -isysroot /Applications/Xcode-10.1.app/Contents/Developer/Platforms/iPhoneOS.platform/Developer/SDKs/iPhoneOS12.1.sdk -arch arm64 -miphoneos-version-min=10.0"
- BTYPE="TARGET=ARMV8 BINARY=64 HOSTCC=clang NOFORTRAN=1"

- <<: *test-macos
osx_image: xcode10.1
env:
- CC="/Applications/Xcode-10.1.app/Contents/Developer/Toolchains/XcodeDefault.xctoolchain/usr/bin/clang"
- CFLAGS="-O2 -mno-thumb -Wno-macro-redefined -isysroot /Applications/Xcode-10.1.app/Contents/Developer/Platforms/iPhoneOS.platform/Developer/SDKs/iPhoneOS12.1.sdk -arch armv7 -miphoneos-version-min=5.1"
- BTYPE="TARGET=ARMV7 HOSTCC=clang NOFORTRAN=1"
# whitelist
branches:
only:
Expand Down
4 changes: 2 additions & 2 deletions driver/level2/gemv_thread.c
Original file line number Diff line number Diff line change
Expand Up @@ -72,9 +72,9 @@
defined __BORLANDC__ )
# define thread_local __declspec(thread)
/* note that ICC (linux) and Clang are covered by __GNUC__ */
# elif defined __GNUC__ || \
# elif (defined __GNUC__ || \
defined __SUNPRO_C || \
defined __xlC__
defined __xlC__) && !defined(__APPLE__)
# define thread_local __thread
# else
# define UNSAFE
Expand Down
60 changes: 30 additions & 30 deletions kernel/arm/nrm2_vfpv3.S
Original file line number Diff line number Diff line change
Expand Up @@ -61,20 +61,20 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
vldmia.f64 X!, { d4 }
vcmpe.f64 d4, d6 // compare with 0.0
vmrs APSR_nzcv, fpscr
beq KERNEL_F1_NEXT_\@
beq 1f /* KERNEL_F1_NEXT_\@ */
vabs.f64 d4, d4
vcmpe.f64 d0, d4 // compare with scale
vmrs APSR_nzcv, fpscr
vdivge.f64 d2 , d4, d0 // scale >= x ? x / scale
vmlage.f64 d1 , d2 , d2 // ssq += ( x/scale ) * ( x/scale )
bge KERNEL_F1_NEXT_\@
bge 1f /* KERNEL_F1_NEXT_\@ */
vdiv.f64 d2 , d0, d4 // scale / x
vmul.f64 d2 , d2, d2 // ( scale / x ) * ( scale / x )
vmul.f64 d3 , d1, d2 // ssq * ( scale / x ) * ( scale / x )
vadd.f64 d1 , d3, d7 // ssq = 1 + ssq * ( scale / x ) * ( scale / x )
vmov.f64 d0 , d4 // scale = x

KERNEL_F1_NEXT_\@:
1: /* KERNEL_F1_NEXT_\@: */

.endm

Expand Down Expand Up @@ -124,20 +124,20 @@ KERNEL_S1_NEXT:
vldmia.f32 X!, { s4 }
vcmpe.f32 s4, s6 // compare with 0.0
vmrs APSR_nzcv, fpscr
beq KERNEL_F1_NEXT_\@
beq 1f /* KERNEL_F1_NEXT_\@ */
vabs.f32 s4, s4
vcmpe.f32 s0, s4 // compare with scale
vmrs APSR_nzcv, fpscr
vdivge.f32 s2 , s4, s0 // scale >= x ? x / scale
vmlage.f32 s1 , s2 , s2 // ssq += ( x/scale ) * ( x/scale )
bge KERNEL_F1_NEXT_\@
bge 1f /* KERNEL_F1_NEXT_\@ */
vdiv.f32 s2 , s0, s4 // scale / x
vmul.f32 s2 , s2, s2 // ( scale / x ) * ( scale / x )
vmul.f32 s3 , s1, s2 // ssq * ( scale / x ) * ( scale / x )
vadd.f32 s1 , s3, s7 // ssq = 1 + ssq * ( scale / x ) * ( scale / x )
vmov.f32 s0 , s4 // scale = x

KERNEL_F1_NEXT_\@:
1: /* KERNEL_F1_NEXT_\@: */

.endm

Expand Down Expand Up @@ -195,37 +195,37 @@ KERNEL_S1_NEXT:

vcmpe.f64 d4, d6 // compare with 0.0
vmrs APSR_nzcv, fpscr
beq KERNEL_F1_NEXT_\@
beq 1f /* KERNEL_F1_NEXT_\@ */
vabs.f64 d4, d4
vcmpe.f64 d0, d4 // compare with scale
vmrs APSR_nzcv, fpscr
vdivge.f64 d2 , d4, d0 // scale >= x ? x / scale
vmlage.f64 d1 , d2 , d2 // ssq += ( x/scale ) * ( x/scale )
bge KERNEL_F1_NEXT_\@
bge 1f /* KERNEL_F1_NEXT_\@ */
vdiv.f64 d2 , d0, d4 // scale / x
vmul.f64 d2 , d2, d2 // ( scale / x ) * ( scale / x )
vmul.f64 d3 , d1, d2 // ssq * ( scale / x ) * ( scale / x )
vadd.f64 d1 , d3, d7 // ssq = 1 + ssq * ( scale / x ) * ( scale / x )
vmov.f64 d0 , d4 // scale = x

KERNEL_F1_NEXT_\@:
1: /* KERNEL_F1_NEXT_\@: */

vcmpe.f64 d5, d6 // compare with 0.0
vmrs APSR_nzcv, fpscr
beq KERNEL_F1_END_\@
beq 2f /* KERNEL_F1_END_\@ */
vabs.f64 d5, d5
vcmpe.f64 d0, d5 // compare with scale
vmrs APSR_nzcv, fpscr
vdivge.f64 d2 , d5, d0 // scale >= x ? x / scale
vmlage.f64 d1 , d2 , d2 // ssq += ( x/scale ) * ( x/scale )
bge KERNEL_F1_END_\@
bge 2f /* KERNEL_F1_END_\@ */
vdiv.f64 d2 , d0, d5 // scale / x
vmul.f64 d2 , d2, d2 // ( scale / x ) * ( scale / x )
vmul.f64 d3 , d1, d2 // ssq * ( scale / x ) * ( scale / x )
vadd.f64 d1 , d3, d7 // ssq = 1 + ssq * ( scale / x ) * ( scale / x )
vmov.f64 d0 , d5 // scale = x

KERNEL_F1_END_\@:
2: /* KERNEL_F1_END_\@: */


.endm
Expand Down Expand Up @@ -253,37 +253,37 @@ KERNEL_F1_END_\@:

vcmpe.f64 d4, d6 // compare with 0.0
vmrs APSR_nzcv, fpscr
beq KERNEL_S1_NEXT_\@
beq 1f /* KERNEL_S1_NEXT_\@ */
vabs.f64 d4, d4
vcmpe.f64 d0, d4 // compare with scale
vmrs APSR_nzcv, fpscr
vdivge.f64 d2 , d4, d0 // scale >= x ? x / scale
vmlage.f64 d1 , d2 , d2 // ssq += ( x/scale ) * ( x/scale )
bge KERNEL_S1_NEXT_\@
bge 1f /* KERNEL_S1_NEXT_\@ */
vdiv.f64 d2 , d0, d4 // scale / x
vmul.f64 d2 , d2, d2 // ( scale / x ) * ( scale / x )
vmul.f64 d3 , d1, d2 // ssq * ( scale / x ) * ( scale / x )
vadd.f64 d1 , d3, d7 // ssq = 1 + ssq * ( scale / x ) * ( scale / x )
vmov.f64 d0 , d4 // scale = x

KERNEL_S1_NEXT_\@:
1: /* KERNEL_S1_NEXT_\@: */

vcmpe.f64 d5, d6 // compare with 0.0
vmrs APSR_nzcv, fpscr
beq KERNEL_S1_END_\@
beq 2f /* KERNEL_S1_END_\@ */
vabs.f64 d5, d5
vcmpe.f64 d0, d5 // compare with scale
vmrs APSR_nzcv, fpscr
vdivge.f64 d2 , d5, d0 // scale >= x ? x / scale
vmlage.f64 d1 , d2 , d2 // ssq += ( x/scale ) * ( x/scale )
bge KERNEL_S1_END_\@
bge 2f /* KERNEL_S1_END_\@ */
vdiv.f64 d2 , d0, d5 // scale / x
vmul.f64 d2 , d2, d2 // ( scale / x ) * ( scale / x )
vmul.f64 d3 , d1, d2 // ssq * ( scale / x ) * ( scale / x )
vadd.f64 d1 , d3, d7 // ssq = 1 + ssq * ( scale / x ) * ( scale / x )
vmov.f64 d0 , d5 // scale = x

KERNEL_S1_END_\@:
2: /* KERNEL_S1_END_\@: */

add X, X, INC_X

Expand All @@ -298,37 +298,37 @@ KERNEL_S1_END_\@:

vcmpe.f32 s4, s6 // compare with 0.0
vmrs APSR_nzcv, fpscr
beq KERNEL_F1_NEXT_\@
beq 1f /* KERNEL_F1_NEXT_\@ */
vabs.f32 s4, s4
vcmpe.f32 s0, s4 // compare with scale
vmrs APSR_nzcv, fpscr
vdivge.f32 s2 , s4, s0 // scale >= x ? x / scale
vmlage.f32 s1 , s2 , s2 // ssq += ( x/scale ) * ( x/scale )
bge KERNEL_F1_NEXT_\@
bge 1f /* KERNEL_F1_NEXT_\@ */
vdiv.f32 s2 , s0, s4 // scale / x
vmul.f32 s2 , s2, s2 // ( scale / x ) * ( scale / x )
vmul.f32 s3 , s1, s2 // ssq * ( scale / x ) * ( scale / x )
vadd.f32 s1 , s3, s7 // ssq = 1 + ssq * ( scale / x ) * ( scale / x )
vmov.f32 s0 , s4 // scale = x

KERNEL_F1_NEXT_\@:
1: /* KERNEL_F1_NEXT_\@: */

vcmpe.f32 s5, s6 // compare with 0.0
vmrs APSR_nzcv, fpscr
beq KERNEL_F1_END_\@
beq 2f /* KERNEL_F1_END_\@ */
vabs.f32 s5, s5
vcmpe.f32 s0, s5 // compare with scale
vmrs APSR_nzcv, fpscr
vdivge.f32 s2 , s5, s0 // scale >= x ? x / scale
vmlage.f32 s1 , s2 , s2 // ssq += ( x/scale ) * ( x/scale )
bge KERNEL_F1_END_\@
bge 2f /* KERNEL_F1_END_\@ */
vdiv.f32 s2 , s0, s5 // scale / x
vmul.f32 s2 , s2, s2 // ( scale / x ) * ( scale / x )
vmul.f32 s3 , s1, s2 // ssq * ( scale / x ) * ( scale / x )
vadd.f32 s1 , s3, s7 // ssq = 1 + ssq * ( scale / x ) * ( scale / x )
vmov.f32 s0 , s5 // scale = x

KERNEL_F1_END_\@:
2: /* KERNEL_F1_END_\@: */


.endm
Expand All @@ -354,37 +354,37 @@ KERNEL_F1_END_\@:

vcmpe.f32 s4, s6 // compare with 0.0
vmrs APSR_nzcv, fpscr
beq KERNEL_S1_NEXT_\@
beq 1f /* KERNEL_S1_NEXT_\@ */
vabs.f32 s4, s4
vcmpe.f32 s0, s4 // compare with scale
vmrs APSR_nzcv, fpscr
vdivge.f32 s2 , s4, s0 // scale >= x ? x / scale
vmlage.f32 s1 , s2 , s2 // ssq += ( x/scale ) * ( x/scale )
bge KERNEL_S1_NEXT_\@
bge 1f /* KERNEL_S1_NEXT_\@ */
vdiv.f32 s2 , s0, s4 // scale / x
vmul.f32 s2 , s2, s2 // ( scale / x ) * ( scale / x )
vmul.f32 s3 , s1, s2 // ssq * ( scale / x ) * ( scale / x )
vadd.f32 s1 , s3, s7 // ssq = 1 + ssq * ( scale / x ) * ( scale / x )
vmov.f32 s0 , s4 // scale = x

KERNEL_S1_NEXT_\@:
1: /* KERNEL_S1_NEXT_\@: */

vcmpe.f32 s5, s6 // compare with 0.0
vmrs APSR_nzcv, fpscr
beq KERNEL_S1_END_\@
beq 2f /* KERNEL_S1_END_\@ */
vabs.f32 s5, s5
vcmpe.f32 s0, s5 // compare with scale
vmrs APSR_nzcv, fpscr
vdivge.f32 s2 , s5, s0 // scale >= x ? x / scale
vmlage.f32 s1 , s2 , s2 // ssq += ( x/scale ) * ( x/scale )
bge KERNEL_S1_END_\@
bge 2f /* KERNEL_S1_END_\@ */
vdiv.f32 s2 , s0, s5 // scale / x
vmul.f32 s2 , s2, s2 // ( scale / x ) * ( scale / x )
vmul.f32 s3 , s1, s2 // ssq * ( scale / x ) * ( scale / x )
vadd.f32 s1 , s3, s7 // ssq = 1 + ssq * ( scale / x ) * ( scale / x )
vmov.f32 s0 , s5 // scale = x

KERNEL_S1_END_\@:
2: /* KERNEL_S1_END_\@: */

add X, X, INC_X

Expand Down