Skip to content

Commit 76614f3

Browse files
jdomkedomke
and
domke
authored
ggml : reading the runtime sve config of the cpu (#8709)
* ggml : reading the runtime sve config of the cpu * change to one time init to prevent performance drop * prefix variable to avoid possible conflicts * revert xxhash fix and add brackets --------- Co-authored-by: domke <[email protected]>
1 parent b72c20b commit 76614f3

File tree

5 files changed

+30
-16
lines changed

5 files changed

+30
-16
lines changed

ggml/src/ggml-aarch64.c

Lines changed: 14 additions & 14 deletions
Original file line numberDiff line numberDiff line change
@@ -384,8 +384,8 @@ void ggml_gemv_q4_0_4x4_q8_0(int n, float * restrict s, size_t bs, const void *
384384
UNUSED(blocklen);
385385

386386
#if defined(__ARM_FEATURE_SVE)
387-
if (svcntw() == 8) {
388-
GGML_ASSERT(!(ggml_cpu_has_sve() && (svcntw() == 8)) &&
387+
if (ggml_sve_cnt_b == QK8_0) {
388+
GGML_ASSERT(!(ggml_cpu_has_sve() && (ggml_sve_cnt_b == QK8_0)) &&
389389
"__ARM_FEATURE_SVE defined, use the Q4_0_8_8 quantization format for optimal performance");
390390
}
391391
#endif
@@ -496,8 +496,8 @@ void ggml_gemv_q4_0_4x8_q8_0(int n, float * restrict s, size_t bs, const void *
496496
UNUSED(blocklen);
497497

498498
#if defined(__ARM_FEATURE_SVE)
499-
if (svcntw() == 8) {
500-
GGML_ASSERT(!(ggml_cpu_has_sve() && (svcntw() == 8)) &&
499+
if (ggml_sve_cnt_b == QK8_0) {
500+
GGML_ASSERT(!(ggml_cpu_has_sve() && (ggml_sve_cnt_b == QK8_0)) &&
501501
"__ARM_FEATURE_SVE defined, use the Q4_0_8_8 quantization format for optimal performance");
502502
}
503503
#endif
@@ -614,7 +614,7 @@ void ggml_gemv_q4_0_8x8_q8_0(int n, float * restrict s, size_t bs, const void *
614614
UNUSED(blocklen);
615615

616616
#if defined(__ARM_FEATURE_SVE) && ! ((defined(_MSC_VER)) && ! defined(__clang__))
617-
if (svcntw() == 8) {
617+
if (ggml_sve_cnt_b == QK8_0) {
618618
const void * b_ptr = vx;
619619
const void * a_ptr = vy;
620620
float * res_ptr = s;
@@ -680,12 +680,12 @@ void ggml_gemv_q4_0_8x8_q8_0(int n, float * restrict s, size_t bs, const void *
680680
return;
681681
}
682682
else if (ggml_cpu_has_neon() && ggml_cpu_has_matmul_int8()) {
683-
GGML_ASSERT((ggml_cpu_has_sve() && (svcntw() == 8)) &&
683+
GGML_ASSERT((ggml_cpu_has_sve() && (ggml_sve_cnt_b == QK8_0)) &&
684684
"__ARM_FEATURE_SVE for vector size of 256-bits not defined, use the Q4_0_4_8 quantization format for optimal "
685685
"performance");
686686
}
687687
else if (ggml_cpu_has_neon()) {
688-
GGML_ASSERT(((ggml_cpu_has_sve() && (svcntw() == 8)) || ggml_cpu_has_matmul_int8()) &&
688+
GGML_ASSERT(((ggml_cpu_has_sve() && (ggml_sve_cnt_b == QK8_0)) || ggml_cpu_has_matmul_int8()) &&
689689
"__ARM_FEATURE_SVE for vector size of 256-bits and __ARM_FEATURE_MATMUL_INT8 not defined, use the Q4_0_4_4 "
690690
"quantization format for optimal performance");
691691
}
@@ -745,8 +745,8 @@ void ggml_gemm_q4_0_4x4_q8_0(int n, float * restrict s, size_t bs, const void *
745745
UNUSED(blocklen);
746746

747747
#if defined(__ARM_FEATURE_SVE) && defined(__ARM_FEATURE_MATMUL_INT8)
748-
if (svcntw() == 8) {
749-
GGML_ASSERT(!(ggml_cpu_has_sve() && (svcntw() == 8)) &&
748+
if (ggml_sve_cnt_b == QK8_0) {
749+
GGML_ASSERT(!(ggml_cpu_has_sve() && (ggml_sve_cnt_b == QK8_0)) &&
750750
"__ARM_FEATURE_SVE defined, use the Q4_0_8_8 quantization format for optimal performance");
751751
}
752752
#endif
@@ -1266,8 +1266,8 @@ void ggml_gemm_q4_0_4x8_q8_0(int n, float * restrict s, size_t bs, const void *
12661266
UNUSED(blocklen);
12671267

12681268
#if defined(__ARM_FEATURE_SVE) && defined(__ARM_FEATURE_MATMUL_INT8)
1269-
if (svcntw() == 8) {
1270-
GGML_ASSERT(!(ggml_cpu_has_sve() && (svcntw() == 8)) &&
1269+
if (ggml_sve_cnt_b == QK8_0) {
1270+
GGML_ASSERT(!(ggml_cpu_has_sve() && (ggml_sve_cnt_b == QK8_0)) &&
12711271
"__ARM_FEATURE_SVE defined, use the Q4_0_8_8 quantization format for optimal performance");
12721272
}
12731273
#endif
@@ -1728,7 +1728,7 @@ void ggml_gemm_q4_0_8x8_q8_0(int n, float * restrict s, size_t bs, const void *
17281728
UNUSED(blocklen);
17291729

17301730
#if defined(__ARM_FEATURE_SVE) && defined(__ARM_FEATURE_MATMUL_INT8) && ! ((defined(_MSC_VER)) && ! defined(__clang__))
1731-
if (svcntw() == 8) {
1731+
if (ggml_sve_cnt_b == QK8_0) {
17321732
const void * b_ptr = vx;
17331733
const void * a_ptr = vy;
17341734
float * res_ptr = s;
@@ -2139,12 +2139,12 @@ void ggml_gemm_q4_0_8x8_q8_0(int n, float * restrict s, size_t bs, const void *
21392139
return;
21402140
}
21412141
else if (ggml_cpu_has_neon() && ggml_cpu_has_matmul_int8()) {
2142-
GGML_ASSERT((ggml_cpu_has_sve() && (svcntw() == 8)) &&
2142+
GGML_ASSERT((ggml_cpu_has_sve() && (ggml_sve_cnt_b == QK8_0)) &&
21432143
"__ARM_FEATURE_SVE for vector size of 256-bits not defined, use the Q4_0_4_8 quantization format for optimal "
21442144
"performance");
21452145
}
21462146
else if (ggml_cpu_has_neon()) {
2147-
GGML_ASSERT(((ggml_cpu_has_sve() && (svcntw() == 8)) || ggml_cpu_has_matmul_int8()) &&
2147+
GGML_ASSERT(((ggml_cpu_has_sve() && (ggml_sve_cnt_b == QK8_0)) || ggml_cpu_has_matmul_int8()) &&
21482148
"__ARM_FEATURE_SVE for vector size of 256-bits and __ARM_FEATURE_MATMUL_INT8 not defined, use the Q4_0_4_4 "
21492149
"quantization format for optimal performance");
21502150
}

ggml/src/ggml-impl.h

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -143,6 +143,7 @@ extern "C" {
143143

144144
#if defined(__ARM_FEATURE_SVE)
145145
#include <arm_sve.h>
146+
#include <sys/prctl.h>
146147
#endif
147148

148149
// 16-bit float

ggml/src/ggml-quants.c

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -3818,7 +3818,7 @@ void ggml_vec_dot_q4_0_q8_0(int n, float * restrict s, size_t bs, const void * r
38183818
float sumf = 0;
38193819

38203820
#if defined(__ARM_FEATURE_SVE)
3821-
if (svcntb() == QK8_0) {
3821+
if (ggml_sve_cnt_b == QK8_0) {
38223822
const svbool_t ptrueh = svptrue_pat_b8(SV_VL16);
38233823
const svbool_t ptruel = svnot_b_z(svptrue_b8(), ptrueh);
38243824

@@ -5303,7 +5303,7 @@ void ggml_vec_dot_q8_0_q8_0(int n, float * restrict s, size_t bs, const void * r
53035303
float sumf = 0;
53045304

53055305
#if defined(__ARM_FEATURE_SVE)
5306-
if (svcntb() == QK8_0) {
5306+
if (ggml_sve_cnt_b == QK8_0) {
53075307
svfloat32_t sumv0 = svdup_n_f32(0.0f);
53085308
svfloat32_t sumv1 = svdup_n_f32(0.0f);
53095309

ggml/src/ggml-quants.h

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -127,6 +127,10 @@ void iq2xs_free_impl(enum ggml_type type);
127127
void iq3xs_init_impl(int grid_size);
128128
void iq3xs_free_impl(int grid_size);
129129

130+
#if defined(__ARM_FEATURE_SVE)
131+
extern int ggml_sve_cnt_b;
132+
#endif
133+
130134
#ifdef __cplusplus
131135
}
132136
#endif

ggml/src/ggml.c

Lines changed: 9 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -37,6 +37,9 @@
3737
#include <unistd.h>
3838
#endif
3939

40+
#if defined(__ARM_FEATURE_SVE)
41+
int ggml_sve_cnt_b = 0;
42+
#endif
4043
#if defined(__ARM_FEATURE_SVE) || defined(__ARM_FEATURE_MATMUL_INT8)
4144
#undef GGML_USE_LLAMAFILE
4245
#endif
@@ -3558,6 +3561,12 @@ struct ggml_context * ggml_init(struct ggml_init_params params) {
35583561

35593562
GGML_ASSERT_ALIGNED(ctx->mem_buffer);
35603563

3564+
#if defined(__ARM_FEATURE_SVE)
3565+
if (!ggml_sve_cnt_b) {
3566+
ggml_sve_cnt_b = PR_SVE_VL_LEN_MASK & prctl(PR_SVE_GET_VL);
3567+
}
3568+
#endif
3569+
35613570
GGML_PRINT_DEBUG("%s: context initialized\n", __func__);
35623571

35633572
ggml_critical_section_end();

0 commit comments

Comments
 (0)