Skip to content

Commit 5ab0773

Browse files
ggerganovhodlen
authored andcommitted
ggml : restore vec dot stride arg names (ggml-org#5453)
1 parent 15cb4a4 commit 5ab0773

File tree

1 file changed

+38
-38
lines changed

1 file changed

+38
-38
lines changed

ggml-quants.c

Lines changed: 38 additions & 38 deletions
Original file line numberDiff line numberDiff line change
@@ -3855,7 +3855,7 @@ static inline __m128i get_scale_shuffle(int i) {
38553855
}
38563856
#endif
38573857

3858-
void ggml_vec_dot_q4_0_q8_0(int n, float * restrict s, size_t bs, const void * restrict vx, size_t bbx, const void * restrict vy, size_t bby, int nrc) {
3858+
void ggml_vec_dot_q4_0_q8_0(int n, float * restrict s, size_t bs, const void * restrict vx, size_t bx, const void * restrict vy, size_t by, int nrc) {
38593859
const int qk = QK8_0;
38603860
const int nb = n / qk;
38613861

@@ -3866,8 +3866,8 @@ void ggml_vec_dot_q4_0_q8_0(int n, float * restrict s, size_t bs, const void * r
38663866
assert(nrc == 1);
38673867
#endif
38683868
UNUSED(nrc);
3869-
UNUSED(bbx);
3870-
UNUSED(bby);
3869+
UNUSED(bx);
3870+
UNUSED(by);
38713871
UNUSED(bs);
38723872

38733873
const block_q4_0 * restrict x = vx;
@@ -4024,15 +4024,15 @@ void ggml_vec_dot_q4_0_q8_0(int n, float * restrict s, size_t bs, const void * r
40244024

40254025
const __m128i tmp = _mm_loadu_si128((const __m128i *)x[i].qs);
40264026

4027-
__m128i bx = _mm_and_si128(lowMask, tmp);
4028-
__m128i by = _mm_loadu_si128((const __m128i *)y[i].qs);
4029-
bx = _mm_sub_epi8(bx, off);
4030-
const __m128i i32_0 = mul_sum_i8_pairs(bx, by);
4027+
__m128i bx_0 = _mm_and_si128(lowMask, tmp);
4028+
__m128i by_0 = _mm_loadu_si128((const __m128i *)y[i].qs);
4029+
bx_0 = _mm_sub_epi8(bx_0, off);
4030+
const __m128i i32_0 = mul_sum_i8_pairs(bx_0, by_0);
40314031

4032-
bx = _mm_and_si128(lowMask, _mm_srli_epi64(tmp, 4));
4033-
by = _mm_loadu_si128((const __m128i *)(y[i].qs + 16));
4034-
bx = _mm_sub_epi8(bx, off);
4035-
const __m128i i32_1 = mul_sum_i8_pairs(bx, by);
4032+
bx_0 = _mm_and_si128(lowMask, _mm_srli_epi64(tmp, 4));
4033+
by_0 = _mm_loadu_si128((const __m128i *)(y[i].qs + 16));
4034+
bx_0 = _mm_sub_epi8(bx_0, off);
4035+
const __m128i i32_1 = mul_sum_i8_pairs(bx_0, by_0);
40364036

40374037
// Convert int32_t to float
40384038
__m256 p = _mm256_cvtepi32_ps(MM256_SET_M128I(i32_0, i32_1));
@@ -4222,7 +4222,7 @@ void ggml_vec_dot_q4_0_q8_0(int n, float * restrict s, size_t bs, const void * r
42224222
#endif
42234223
}
42244224

4225-
void ggml_vec_dot_q4_1_q8_1(int n, float * restrict s, size_t bs, const void * restrict vx, size_t bbx, const void * restrict vy, size_t bby, int nrc) {
4225+
void ggml_vec_dot_q4_1_q8_1(int n, float * restrict s, size_t bs, const void * restrict vx, size_t bx, const void * restrict vy, size_t by, int nrc) {
42264226
const int qk = QK8_1;
42274227
const int nb = n / qk;
42284228

@@ -4233,8 +4233,8 @@ void ggml_vec_dot_q4_1_q8_1(int n, float * restrict s, size_t bs, const void * r
42334233
assert(nrc == 1);
42344234
#endif
42354235
UNUSED(nrc);
4236-
UNUSED(bbx);
4237-
UNUSED(bby);
4236+
UNUSED(bx);
4237+
UNUSED(by);
42384238
UNUSED(bs);
42394239

42404240
const block_q4_1 * restrict x = vx;
@@ -4440,16 +4440,16 @@ void ggml_vec_dot_q4_1_q8_1(int n, float * restrict s, size_t bs, const void * r
44404440
#endif
44414441
}
44424442

4443-
void ggml_vec_dot_q5_0_q8_0(int n, float * restrict s, size_t bs, const void * restrict vx, size_t bbx, const void * restrict vy, size_t bby, int nrc) {
4443+
void ggml_vec_dot_q5_0_q8_0(int n, float * restrict s, size_t bs, const void * restrict vx, size_t bx, const void * restrict vy, size_t by, int nrc) {
44444444
const int qk = QK8_0;
44454445
const int nb = n / qk;
44464446

44474447
assert(n % qk == 0);
44484448
assert(qk == QK5_0);
44494449
assert(nrc == 1);
44504450
UNUSED(nrc);
4451-
UNUSED(bbx);
4452-
UNUSED(bby);
4451+
UNUSED(bx);
4452+
UNUSED(by);
44534453
UNUSED(bs);
44544454

44554455
const block_q5_0 * restrict x = vx;
@@ -4618,21 +4618,21 @@ void ggml_vec_dot_q5_0_q8_0(int n, float * restrict s, size_t bs, const void * r
46184618
/* Compute combined scale for the block */
46194619
const __m256 d = _mm256_set1_ps(GGML_FP16_TO_FP32(x[i].d) * GGML_FP16_TO_FP32(y[i].d));
46204620

4621-
__m256i bx = bytes_from_nibbles_32(x[i].qs);
4621+
__m256i bx_0 = bytes_from_nibbles_32(x[i].qs);
46224622
const __m256i bxhi = bytes_from_bits_32(x[i].qh);
46234623
__m128i bxhil = _mm256_castsi256_si128(bxhi);
46244624
__m128i bxhih = _mm256_extractf128_si256(bxhi, 1);
46254625
bxhil = _mm_andnot_si128(bxhil, mask);
46264626
bxhih = _mm_andnot_si128(bxhih, mask);
4627-
__m128i bxl = _mm256_castsi256_si128(bx);
4628-
__m128i bxh = _mm256_extractf128_si256(bx, 1);
4627+
__m128i bxl = _mm256_castsi256_si128(bx_0);
4628+
__m128i bxh = _mm256_extractf128_si256(bx_0, 1);
46294629
bxl = _mm_or_si128(bxl, bxhil);
46304630
bxh = _mm_or_si128(bxh, bxhih);
4631-
bx = MM256_SET_M128I(bxh, bxl);
4631+
bx_0 = MM256_SET_M128I(bxh, bxl);
46324632

4633-
const __m256i by = _mm256_loadu_si256((const __m256i *)y[i].qs);
4633+
const __m256i by_0 = _mm256_loadu_si256((const __m256i *)y[i].qs);
46344634

4635-
const __m256 q = mul_sum_i8_pairs_float(bx, by);
4635+
const __m256 q = mul_sum_i8_pairs_float(bx_0, by_0);
46364636

46374637
/* Multiply q with scale and accumulate */
46384638
acc = _mm256_add_ps(_mm256_mul_ps(d, q), acc);
@@ -4731,16 +4731,16 @@ void ggml_vec_dot_q5_0_q8_0(int n, float * restrict s, size_t bs, const void * r
47314731
#endif
47324732
}
47334733

4734-
void ggml_vec_dot_q5_1_q8_1(int n, float * restrict s, size_t bs, const void * restrict vx, size_t bbx, const void * restrict vy, size_t bby, int nrc) {
4734+
void ggml_vec_dot_q5_1_q8_1(int n, float * restrict s, size_t bs, const void * restrict vx, size_t bx, const void * restrict vy, size_t by, int nrc) {
47354735
const int qk = QK8_1;
47364736
const int nb = n / qk;
47374737

47384738
assert(n % qk == 0);
47394739
assert(qk == QK5_1);
47404740
assert(nrc == 1);
47414741
UNUSED(nrc);
4742-
UNUSED(bbx);
4743-
UNUSED(bby);
4742+
UNUSED(bx);
4743+
UNUSED(by);
47444744
UNUSED(bs);
47454745

47464746
const block_q5_1 * restrict x = vx;
@@ -4925,22 +4925,22 @@ void ggml_vec_dot_q5_1_q8_1(int n, float * restrict s, size_t bs, const void * r
49254925

49264926
summs += GGML_FP16_TO_FP32(x[i].m) * y[i].s;
49274927

4928-
__m256i bx = bytes_from_nibbles_32(x[i].qs);
4928+
__m256i bx_0 = bytes_from_nibbles_32(x[i].qs);
49294929
const __m256i bxhi = bytes_from_bits_32(x[i].qh);
49304930
__m128i bxhil = _mm256_castsi256_si128(bxhi);
49314931
__m128i bxhih = _mm256_extractf128_si256(bxhi, 1);
49324932
bxhil = _mm_and_si128(bxhil, mask);
49334933
bxhih = _mm_and_si128(bxhih, mask);
4934-
__m128i bxl = _mm256_castsi256_si128(bx);
4935-
__m128i bxh = _mm256_extractf128_si256(bx, 1);
4934+
__m128i bxl = _mm256_castsi256_si128(bx_0);
4935+
__m128i bxh = _mm256_extractf128_si256(bx_0, 1);
49364936
bxl = _mm_or_si128(bxl, bxhil);
49374937
bxh = _mm_or_si128(bxh, bxhih);
4938-
bx = MM256_SET_M128I(bxh, bxl);
4938+
bx_0 = MM256_SET_M128I(bxh, bxl);
49394939

49404940
const __m256 dy = _mm256_set1_ps(y[i].d);
4941-
const __m256i by = _mm256_loadu_si256((const __m256i *)y[i].qs);
4941+
const __m256i by_0 = _mm256_loadu_si256((const __m256i *)y[i].qs);
49424942

4943-
const __m256 q = mul_sum_us8_pairs_float(bx, by);
4943+
const __m256 q = mul_sum_us8_pairs_float(bx_0, by_0);
49444944

49454945
acc = _mm256_add_ps(_mm256_mul_ps(q, _mm256_mul_ps(dx, dy)), acc);
49464946
}
@@ -5035,7 +5035,7 @@ void ggml_vec_dot_q5_1_q8_1(int n, float * restrict s, size_t bs, const void * r
50355035
#endif
50365036
}
50375037

5038-
void ggml_vec_dot_q8_0_q8_0(int n, float * restrict s, size_t bs, const void * restrict vx, size_t bbx, const void * restrict vy, size_t bby, int nrc) {
5038+
void ggml_vec_dot_q8_0_q8_0(int n, float * restrict s, size_t bs, const void * restrict vx, size_t bx, const void * restrict vy, size_t by, int nrc) {
50395039
const int qk = QK8_0;
50405040
const int nb = n / qk;
50415041

@@ -5046,8 +5046,8 @@ void ggml_vec_dot_q8_0_q8_0(int n, float * restrict s, size_t bs, const void * r
50465046
assert(nrc == 1);
50475047
#endif
50485048
UNUSED(nrc);
5049-
UNUSED(bbx);
5050-
UNUSED(bby);
5049+
UNUSED(bx);
5050+
UNUSED(by);
50515051
UNUSED(bs);
50525052

50535053
const block_q8_0 * restrict x = vx;
@@ -5169,10 +5169,10 @@ void ggml_vec_dot_q8_0_q8_0(int n, float * restrict s, size_t bs, const void * r
51695169

51705170
for (int i = 0; i < nb; i++) {
51715171
// load elements
5172-
vint8m1_t bx = __riscv_vle8_v_i8m1(x[i].qs, vl);
5173-
vint8m1_t by = __riscv_vle8_v_i8m1(y[i].qs, vl);
5172+
vint8m1_t bx_0 = __riscv_vle8_v_i8m1(x[i].qs, vl);
5173+
vint8m1_t by_0 = __riscv_vle8_v_i8m1(y[i].qs, vl);
51745174

5175-
vint16m2_t vw_mul = __riscv_vwmul_vv_i16m2(bx, by, vl);
5175+
vint16m2_t vw_mul = __riscv_vwmul_vv_i16m2(bx_0, by_0, vl);
51765176

51775177
vint32m1_t v_zero = __riscv_vmv_v_x_i32m1(0, vl);
51785178
vint32m1_t v_sum = __riscv_vwredsum_vs_i16m2_i32m1(vw_mul, v_zero, vl);

0 commit comments

Comments
 (0)