diff --git a/.travis.yml b/.travis.yml index 0f20aef5c1..2d82f8812d 100644 --- a/.travis.yml +++ b/.travis.yml @@ -180,6 +180,12 @@ matrix: - CFLAGS="-O2 -Wno-macro-redefined -isysroot /Applications/Xcode-10.1.app/Contents/Developer/Platforms/iPhoneOS.platform/Developer/SDKs/iPhoneOS12.1.sdk -arch arm64 -miphoneos-version-min=10.0" - BTYPE="TARGET=ARMV8 BINARY=64 HOSTCC=clang NOFORTRAN=1" + - <<: *test-macos + osx_image: xcode10.1 + env: + - CC="/Applications/Xcode-10.1.app/Contents/Developer/Toolchains/XcodeDefault.xctoolchain/usr/bin/clang" + - CFLAGS="-O2 -mno-thumb -Wno-macro-redefined -isysroot /Applications/Xcode-10.1.app/Contents/Developer/Platforms/iPhoneOS.platform/Developer/SDKs/iPhoneOS12.1.sdk -arch armv7 -miphoneos-version-min=5.1" + - BTYPE="TARGET=ARMV7 HOSTCC=clang NOFORTRAN=1" # whitelist branches: only: diff --git a/driver/level2/gemv_thread.c b/driver/level2/gemv_thread.c index d577403142..0d8c6b005c 100644 --- a/driver/level2/gemv_thread.c +++ b/driver/level2/gemv_thread.c @@ -72,9 +72,9 @@ defined __BORLANDC__ ) # define thread_local __declspec(thread) /* note that ICC (linux) and Clang are covered by __GNUC__ */ -# elif defined __GNUC__ || \ +# elif (defined __GNUC__ || \ defined __SUNPRO_C || \ - defined __xlC__ + defined __xlC__) && !defined(__APPLE__) # define thread_local __thread # else # define UNSAFE diff --git a/kernel/arm/nrm2_vfpv3.S b/kernel/arm/nrm2_vfpv3.S index 7be1e977e4..82ae5e8d41 100644 --- a/kernel/arm/nrm2_vfpv3.S +++ b/kernel/arm/nrm2_vfpv3.S @@ -61,20 +61,20 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. vldmia.f64 X!, { d4 } vcmpe.f64 d4, d6 // compare with 0.0 vmrs APSR_nzcv, fpscr - beq KERNEL_F1_NEXT_\@ + beq 1f /* KERNEL_F1_NEXT_\@ */ vabs.f64 d4, d4 vcmpe.f64 d0, d4 // compare with scale vmrs APSR_nzcv, fpscr vdivge.f64 d2 , d4, d0 // scale >= x ? x / scale vmlage.f64 d1 , d2 , d2 // ssq += ( x/scale ) * ( x/scale ) - bge KERNEL_F1_NEXT_\@ + bge 1f /* KERNEL_F1_NEXT_\@ */ vdiv.f64 d2 , d0, d4 // scale / x vmul.f64 d2 , d2, d2 // ( scale / x ) * ( scale / x ) vmul.f64 d3 , d1, d2 // ssq * ( scale / x ) * ( scale / x ) vadd.f64 d1 , d3, d7 // ssq = 1 + ssq * ( scale / x ) * ( scale / x ) vmov.f64 d0 , d4 // scale = x -KERNEL_F1_NEXT_\@: +1: /* KERNEL_F1_NEXT_\@: */ .endm @@ -124,20 +124,20 @@ KERNEL_S1_NEXT: vldmia.f32 X!, { s4 } vcmpe.f32 s4, s6 // compare with 0.0 vmrs APSR_nzcv, fpscr - beq KERNEL_F1_NEXT_\@ + beq 1f /* KERNEL_F1_NEXT_\@ */ vabs.f32 s4, s4 vcmpe.f32 s0, s4 // compare with scale vmrs APSR_nzcv, fpscr vdivge.f32 s2 , s4, s0 // scale >= x ? x / scale vmlage.f32 s1 , s2 , s2 // ssq += ( x/scale ) * ( x/scale ) - bge KERNEL_F1_NEXT_\@ + bge 1f /* KERNEL_F1_NEXT_\@ */ vdiv.f32 s2 , s0, s4 // scale / x vmul.f32 s2 , s2, s2 // ( scale / x ) * ( scale / x ) vmul.f32 s3 , s1, s2 // ssq * ( scale / x ) * ( scale / x ) vadd.f32 s1 , s3, s7 // ssq = 1 + ssq * ( scale / x ) * ( scale / x ) vmov.f32 s0 , s4 // scale = x -KERNEL_F1_NEXT_\@: +1: /* KERNEL_F1_NEXT_\@: */ .endm @@ -195,37 +195,37 @@ KERNEL_S1_NEXT: vcmpe.f64 d4, d6 // compare with 0.0 vmrs APSR_nzcv, fpscr - beq KERNEL_F1_NEXT_\@ + beq 1f /* KERNEL_F1_NEXT_\@ */ vabs.f64 d4, d4 vcmpe.f64 d0, d4 // compare with scale vmrs APSR_nzcv, fpscr vdivge.f64 d2 , d4, d0 // scale >= x ? x / scale vmlage.f64 d1 , d2 , d2 // ssq += ( x/scale ) * ( x/scale ) - bge KERNEL_F1_NEXT_\@ + bge 1f /* KERNEL_F1_NEXT_\@ */ vdiv.f64 d2 , d0, d4 // scale / x vmul.f64 d2 , d2, d2 // ( scale / x ) * ( scale / x ) vmul.f64 d3 , d1, d2 // ssq * ( scale / x ) * ( scale / x ) vadd.f64 d1 , d3, d7 // ssq = 1 + ssq * ( scale / x ) * ( scale / x ) vmov.f64 d0 , d4 // scale = x -KERNEL_F1_NEXT_\@: +1: /* KERNEL_F1_NEXT_\@: */ vcmpe.f64 d5, d6 // compare with 0.0 vmrs APSR_nzcv, fpscr - beq KERNEL_F1_END_\@ + beq 2f /* KERNEL_F1_END_\@ */ vabs.f64 d5, d5 vcmpe.f64 d0, d5 // compare with scale vmrs APSR_nzcv, fpscr vdivge.f64 d2 , d5, d0 // scale >= x ? x / scale vmlage.f64 d1 , d2 , d2 // ssq += ( x/scale ) * ( x/scale ) - bge KERNEL_F1_END_\@ + bge 2f /* KERNEL_F1_END_\@ */ vdiv.f64 d2 , d0, d5 // scale / x vmul.f64 d2 , d2, d2 // ( scale / x ) * ( scale / x ) vmul.f64 d3 , d1, d2 // ssq * ( scale / x ) * ( scale / x ) vadd.f64 d1 , d3, d7 // ssq = 1 + ssq * ( scale / x ) * ( scale / x ) vmov.f64 d0 , d5 // scale = x -KERNEL_F1_END_\@: +2: /* KERNEL_F1_END_\@: */ .endm @@ -253,37 +253,37 @@ KERNEL_F1_END_\@: vcmpe.f64 d4, d6 // compare with 0.0 vmrs APSR_nzcv, fpscr - beq KERNEL_S1_NEXT_\@ + beq 1f /* KERNEL_S1_NEXT_\@ */ vabs.f64 d4, d4 vcmpe.f64 d0, d4 // compare with scale vmrs APSR_nzcv, fpscr vdivge.f64 d2 , d4, d0 // scale >= x ? x / scale vmlage.f64 d1 , d2 , d2 // ssq += ( x/scale ) * ( x/scale ) - bge KERNEL_S1_NEXT_\@ + bge 1f /* KERNEL_S1_NEXT_\@ */ vdiv.f64 d2 , d0, d4 // scale / x vmul.f64 d2 , d2, d2 // ( scale / x ) * ( scale / x ) vmul.f64 d3 , d1, d2 // ssq * ( scale / x ) * ( scale / x ) vadd.f64 d1 , d3, d7 // ssq = 1 + ssq * ( scale / x ) * ( scale / x ) vmov.f64 d0 , d4 // scale = x -KERNEL_S1_NEXT_\@: +1: /* KERNEL_S1_NEXT_\@: */ vcmpe.f64 d5, d6 // compare with 0.0 vmrs APSR_nzcv, fpscr - beq KERNEL_S1_END_\@ + beq 2f /* KERNEL_S1_END_\@ */ vabs.f64 d5, d5 vcmpe.f64 d0, d5 // compare with scale vmrs APSR_nzcv, fpscr vdivge.f64 d2 , d5, d0 // scale >= x ? x / scale vmlage.f64 d1 , d2 , d2 // ssq += ( x/scale ) * ( x/scale ) - bge KERNEL_S1_END_\@ + bge 2f /* KERNEL_S1_END_\@ */ vdiv.f64 d2 , d0, d5 // scale / x vmul.f64 d2 , d2, d2 // ( scale / x ) * ( scale / x ) vmul.f64 d3 , d1, d2 // ssq * ( scale / x ) * ( scale / x ) vadd.f64 d1 , d3, d7 // ssq = 1 + ssq * ( scale / x ) * ( scale / x ) vmov.f64 d0 , d5 // scale = x -KERNEL_S1_END_\@: +2: /* KERNEL_S1_END_\@: */ add X, X, INC_X @@ -298,37 +298,37 @@ KERNEL_S1_END_\@: vcmpe.f32 s4, s6 // compare with 0.0 vmrs APSR_nzcv, fpscr - beq KERNEL_F1_NEXT_\@ + beq 1f /* KERNEL_F1_NEXT_\@ */ vabs.f32 s4, s4 vcmpe.f32 s0, s4 // compare with scale vmrs APSR_nzcv, fpscr vdivge.f32 s2 , s4, s0 // scale >= x ? x / scale vmlage.f32 s1 , s2 , s2 // ssq += ( x/scale ) * ( x/scale ) - bge KERNEL_F1_NEXT_\@ + bge 1f /* KERNEL_F1_NEXT_\@ */ vdiv.f32 s2 , s0, s4 // scale / x vmul.f32 s2 , s2, s2 // ( scale / x ) * ( scale / x ) vmul.f32 s3 , s1, s2 // ssq * ( scale / x ) * ( scale / x ) vadd.f32 s1 , s3, s7 // ssq = 1 + ssq * ( scale / x ) * ( scale / x ) vmov.f32 s0 , s4 // scale = x -KERNEL_F1_NEXT_\@: +1: /* KERNEL_F1_NEXT_\@: */ vcmpe.f32 s5, s6 // compare with 0.0 vmrs APSR_nzcv, fpscr - beq KERNEL_F1_END_\@ + beq 2f /* KERNEL_F1_END_\@ */ vabs.f32 s5, s5 vcmpe.f32 s0, s5 // compare with scale vmrs APSR_nzcv, fpscr vdivge.f32 s2 , s5, s0 // scale >= x ? x / scale vmlage.f32 s1 , s2 , s2 // ssq += ( x/scale ) * ( x/scale ) - bge KERNEL_F1_END_\@ + bge 2f /* KERNEL_F1_END_\@ */ vdiv.f32 s2 , s0, s5 // scale / x vmul.f32 s2 , s2, s2 // ( scale / x ) * ( scale / x ) vmul.f32 s3 , s1, s2 // ssq * ( scale / x ) * ( scale / x ) vadd.f32 s1 , s3, s7 // ssq = 1 + ssq * ( scale / x ) * ( scale / x ) vmov.f32 s0 , s5 // scale = x -KERNEL_F1_END_\@: +2: /* KERNEL_F1_END_\@: */ .endm @@ -354,37 +354,37 @@ KERNEL_F1_END_\@: vcmpe.f32 s4, s6 // compare with 0.0 vmrs APSR_nzcv, fpscr - beq KERNEL_S1_NEXT_\@ + beq 1f /* KERNEL_S1_NEXT_\@ */ vabs.f32 s4, s4 vcmpe.f32 s0, s4 // compare with scale vmrs APSR_nzcv, fpscr vdivge.f32 s2 , s4, s0 // scale >= x ? x / scale vmlage.f32 s1 , s2 , s2 // ssq += ( x/scale ) * ( x/scale ) - bge KERNEL_S1_NEXT_\@ + bge 1f /* KERNEL_S1_NEXT_\@ */ vdiv.f32 s2 , s0, s4 // scale / x vmul.f32 s2 , s2, s2 // ( scale / x ) * ( scale / x ) vmul.f32 s3 , s1, s2 // ssq * ( scale / x ) * ( scale / x ) vadd.f32 s1 , s3, s7 // ssq = 1 + ssq * ( scale / x ) * ( scale / x ) vmov.f32 s0 , s4 // scale = x -KERNEL_S1_NEXT_\@: +1: /* KERNEL_S1_NEXT_\@: */ vcmpe.f32 s5, s6 // compare with 0.0 vmrs APSR_nzcv, fpscr - beq KERNEL_S1_END_\@ + beq 2f /* KERNEL_S1_END_\@ */ vabs.f32 s5, s5 vcmpe.f32 s0, s5 // compare with scale vmrs APSR_nzcv, fpscr vdivge.f32 s2 , s5, s0 // scale >= x ? x / scale vmlage.f32 s1 , s2 , s2 // ssq += ( x/scale ) * ( x/scale ) - bge KERNEL_S1_END_\@ + bge 2f /* KERNEL_S1_END_\@ */ vdiv.f32 s2 , s0, s5 // scale / x vmul.f32 s2 , s2, s2 // ( scale / x ) * ( scale / x ) vmul.f32 s3 , s1, s2 // ssq * ( scale / x ) * ( scale / x ) vadd.f32 s1 , s3, s7 // ssq = 1 + ssq * ( scale / x ) * ( scale / x ) vmov.f32 s0 , s5 // scale = x -KERNEL_S1_END_\@: +2: /* KERNEL_S1_END_\@: */ add X, X, INC_X