@@ -384,8 +384,8 @@ void ggml_gemv_q4_0_4x4_q8_0(int n, float * restrict s, size_t bs, const void *
384
384
UNUSED (blocklen );
385
385
386
386
#if defined(__ARM_FEATURE_SVE )
387
- if (svcntw () == 8 ) {
388
- GGML_ASSERT (!(ggml_cpu_has_sve () && (svcntw () == 8 )) &&
387
+ if (ggml_sve_cnt_b == QK8_0 ) {
388
+ GGML_ASSERT (!(ggml_cpu_has_sve () && (ggml_sve_cnt_b == QK8_0 )) &&
389
389
"__ARM_FEATURE_SVE defined, use the Q4_0_8_8 quantization format for optimal performance" );
390
390
}
391
391
#endif
@@ -496,8 +496,8 @@ void ggml_gemv_q4_0_4x8_q8_0(int n, float * restrict s, size_t bs, const void *
496
496
UNUSED (blocklen );
497
497
498
498
#if defined(__ARM_FEATURE_SVE )
499
- if (svcntw () == 8 ) {
500
- GGML_ASSERT (!(ggml_cpu_has_sve () && (svcntw () == 8 )) &&
499
+ if (ggml_sve_cnt_b == QK8_0 ) {
500
+ GGML_ASSERT (!(ggml_cpu_has_sve () && (ggml_sve_cnt_b == QK8_0 )) &&
501
501
"__ARM_FEATURE_SVE defined, use the Q4_0_8_8 quantization format for optimal performance" );
502
502
}
503
503
#endif
@@ -614,7 +614,7 @@ void ggml_gemv_q4_0_8x8_q8_0(int n, float * restrict s, size_t bs, const void *
614
614
UNUSED (blocklen );
615
615
616
616
#if defined(__ARM_FEATURE_SVE ) && ! ((defined(_MSC_VER )) && ! defined(__clang__ ))
617
- if (svcntw () == 8 ) {
617
+ if (ggml_sve_cnt_b == QK8_0 ) {
618
618
const void * b_ptr = vx ;
619
619
const void * a_ptr = vy ;
620
620
float * res_ptr = s ;
@@ -680,12 +680,12 @@ void ggml_gemv_q4_0_8x8_q8_0(int n, float * restrict s, size_t bs, const void *
680
680
return ;
681
681
}
682
682
else if (ggml_cpu_has_neon () && ggml_cpu_has_matmul_int8 ()) {
683
- GGML_ASSERT ((ggml_cpu_has_sve () && (svcntw () == 8 )) &&
683
+ GGML_ASSERT ((ggml_cpu_has_sve () && (ggml_sve_cnt_b == QK8_0 )) &&
684
684
"__ARM_FEATURE_SVE for vector size of 256-bits not defined, use the Q4_0_4_8 quantization format for optimal "
685
685
"performance" );
686
686
}
687
687
else if (ggml_cpu_has_neon ()) {
688
- GGML_ASSERT (((ggml_cpu_has_sve () && (svcntw () == 8 )) || ggml_cpu_has_matmul_int8 ()) &&
688
+ GGML_ASSERT (((ggml_cpu_has_sve () && (ggml_sve_cnt_b == QK8_0 )) || ggml_cpu_has_matmul_int8 ()) &&
689
689
"__ARM_FEATURE_SVE for vector size of 256-bits and __ARM_FEATURE_MATMUL_INT8 not defined, use the Q4_0_4_4 "
690
690
"quantization format for optimal performance" );
691
691
}
@@ -745,8 +745,8 @@ void ggml_gemm_q4_0_4x4_q8_0(int n, float * restrict s, size_t bs, const void *
745
745
UNUSED (blocklen );
746
746
747
747
#if defined(__ARM_FEATURE_SVE ) && defined(__ARM_FEATURE_MATMUL_INT8 )
748
- if (svcntw () == 8 ) {
749
- GGML_ASSERT (!(ggml_cpu_has_sve () && (svcntw () == 8 )) &&
748
+ if (ggml_sve_cnt_b == QK8_0 ) {
749
+ GGML_ASSERT (!(ggml_cpu_has_sve () && (ggml_sve_cnt_b == QK8_0 )) &&
750
750
"__ARM_FEATURE_SVE defined, use the Q4_0_8_8 quantization format for optimal performance" );
751
751
}
752
752
#endif
@@ -1266,8 +1266,8 @@ void ggml_gemm_q4_0_4x8_q8_0(int n, float * restrict s, size_t bs, const void *
1266
1266
UNUSED (blocklen );
1267
1267
1268
1268
#if defined(__ARM_FEATURE_SVE ) && defined(__ARM_FEATURE_MATMUL_INT8 )
1269
- if (svcntw () == 8 ) {
1270
- GGML_ASSERT (!(ggml_cpu_has_sve () && (svcntw () == 8 )) &&
1269
+ if (ggml_sve_cnt_b == QK8_0 ) {
1270
+ GGML_ASSERT (!(ggml_cpu_has_sve () && (ggml_sve_cnt_b == QK8_0 )) &&
1271
1271
"__ARM_FEATURE_SVE defined, use the Q4_0_8_8 quantization format for optimal performance" );
1272
1272
}
1273
1273
#endif
@@ -1728,7 +1728,7 @@ void ggml_gemm_q4_0_8x8_q8_0(int n, float * restrict s, size_t bs, const void *
1728
1728
UNUSED (blocklen );
1729
1729
1730
1730
#if defined(__ARM_FEATURE_SVE ) && defined(__ARM_FEATURE_MATMUL_INT8 ) && ! ((defined(_MSC_VER )) && ! defined(__clang__ ))
1731
- if (svcntw () == 8 ) {
1731
+ if (ggml_sve_cnt_b == QK8_0 ) {
1732
1732
const void * b_ptr = vx ;
1733
1733
const void * a_ptr = vy ;
1734
1734
float * res_ptr = s ;
@@ -2139,12 +2139,12 @@ void ggml_gemm_q4_0_8x8_q8_0(int n, float * restrict s, size_t bs, const void *
2139
2139
return ;
2140
2140
}
2141
2141
else if (ggml_cpu_has_neon () && ggml_cpu_has_matmul_int8 ()) {
2142
- GGML_ASSERT ((ggml_cpu_has_sve () && (svcntw () == 8 )) &&
2142
+ GGML_ASSERT ((ggml_cpu_has_sve () && (ggml_sve_cnt_b == QK8_0 )) &&
2143
2143
"__ARM_FEATURE_SVE for vector size of 256-bits not defined, use the Q4_0_4_8 quantization format for optimal "
2144
2144
"performance" );
2145
2145
}
2146
2146
else if (ggml_cpu_has_neon ()) {
2147
- GGML_ASSERT (((ggml_cpu_has_sve () && (svcntw () == 8 )) || ggml_cpu_has_matmul_int8 ()) &&
2147
+ GGML_ASSERT (((ggml_cpu_has_sve () && (ggml_sve_cnt_b == QK8_0 )) || ggml_cpu_has_matmul_int8 ()) &&
2148
2148
"__ARM_FEATURE_SVE for vector size of 256-bits and __ARM_FEATURE_MATMUL_INT8 not defined, use the Q4_0_4_4 "
2149
2149
"quantization format for optimal performance" );
2150
2150
}
0 commit comments