Skip to content

Commit c79a9bd

Browse files
committed
Support 32-bit window and comb filter
1 parent 38e535a commit c79a9bd

17 files changed

+160
-81
lines changed

celt/arch.h

Lines changed: 19 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -175,8 +175,20 @@ typedef opus_val16 opus_res;
175175

176176
#ifdef ENABLE_QEXT
177177
typedef opus_val32 celt_coef;
178+
#define COEF_ONE Q31ONE
179+
#define MULT_COEF_32(a, b) MULT32_32_Q31(a,b)
180+
#define MAC_COEF_32_ARM(c, a, b) ADD32((c), MULT32_32_Q32(a,b))
181+
#define MULT_COEF(a, b) MULT32_32_Q31(a,b)
182+
#define MULT_COEF_TAPS(a, b) SHL32(MULT16_16(a,b), 1)
183+
#define COEF2VAL16(x) EXTRACT16(SHR32(x, 16))
178184
#else
179185
typedef opus_val16 celt_coef;
186+
#define COEF_ONE Q15ONE
187+
#define MULT_COEF_32(a, b) MULT16_32_Q15(a,b)
188+
#define MAC_COEF_32_ARM(a, b, c) MAC16_32_Q16(a,b,c)
189+
#define MULT_COEF(a, b) MULT16_16_Q15(a,b)
190+
#define MULT_COEF_TAPS(a, b) MULT16_16_P15(a,b)
191+
#define COEF2VAL16(x) (x)
180192
#endif
181193

182194
#define celt_isnan(x) 0
@@ -265,6 +277,8 @@ static OPUS_INLINE int celt_isnan(float x)
265277

266278
#define Q15ONE 1.0f
267279
#define Q31ONE 1.0f
280+
#define COEF_ONE 1.0f
281+
#define COEF2VAL16(x) (x)
268282

269283
#define NORM_SCALING 1.f
270284

@@ -321,6 +335,7 @@ static OPUS_INLINE int celt_isnan(float x)
321335

322336
#define MAC16_32_Q15(c,a,b) ((c)+(a)*(b))
323337
#define MAC16_32_Q16(c,a,b) ((c)+(a)*(b))
338+
#define MAC_COEF_32_ARM(c,a,b) ((c)+(a)*(b))
324339

325340
#define MULT16_16_Q11_32(a,b) ((a)*(b))
326341
#define MULT16_16_Q11(a,b) ((a)*(b))
@@ -332,6 +347,10 @@ static OPUS_INLINE int celt_isnan(float x)
332347
#define MULT16_16_P14(a,b) ((a)*(b))
333348
#define MULT16_32_P16(a,b) ((a)*(b))
334349

350+
#define MULT_COEF_32(a, b) ((a)*(b))
351+
#define MULT_COEF(a, b) ((a)*(b))
352+
#define MULT_COEF_TAPS(a, b) ((a)*(b))
353+
335354
#define DIV32_16(a,b) (((opus_val32)(a))/(opus_val16)(b))
336355
#define DIV32(a,b) (((opus_val32)(a))/(opus_val32)(b))
337356

celt/celt.c

Lines changed: 40 additions & 39 deletions
Original file line numberDiff line numberDiff line change
@@ -89,14 +89,15 @@ int resampling_factor(opus_int32 rate)
8989
return ret;
9090
}
9191

92+
9293
#if !defined(OVERRIDE_COMB_FILTER_CONST) || defined(NON_STATIC_COMB_FILTER_CONST_C)
9394
/* This version should be faster on ARM */
9495
#ifdef OPUS_ARM_ASM
9596
#ifndef NON_STATIC_COMB_FILTER_CONST_C
9697
static
9798
#endif
9899
void comb_filter_const_c(opus_val32 *y, opus_val32 *x, int T, int N,
99-
opus_val16 g10, opus_val16 g11, opus_val16 g12)
100+
celt_coef g10, celt_coef g11, celt_coef g12)
100101
{
101102
opus_val32 x0, x1, x2, x3, x4;
102103
int i;
@@ -108,33 +109,33 @@ void comb_filter_const_c(opus_val32 *y, opus_val32 *x, int T, int N,
108109
{
109110
opus_val32 t;
110111
x0=SHL32(x[i-T+2],1);
111-
t = MAC16_32_Q16(x[i], g10, x2);
112-
t = MAC16_32_Q16(t, g11, ADD32(x1,x3));
113-
t = MAC16_32_Q16(t, g12, ADD32(x0,x4));
112+
t = MAC_COEF_32_ARM(x[i], g10, x2);
113+
t = MAC_COEF_32_ARM(t, g11, ADD32(x1,x3));
114+
t = MAC_COEF_32_ARM(t, g12, ADD32(x0,x4));
114115
t = SATURATE(t, SIG_SAT);
115116
y[i] = t;
116117
x4=SHL32(x[i-T+3],1);
117-
t = MAC16_32_Q16(x[i+1], g10, x1);
118-
t = MAC16_32_Q16(t, g11, ADD32(x0,x2));
119-
t = MAC16_32_Q16(t, g12, ADD32(x4,x3));
118+
t = MAC_COEF_32_ARM(x[i+1], g10, x1);
119+
t = MAC_COEF_32_ARM(t, g11, ADD32(x0,x2));
120+
t = MAC_COEF_32_ARM(t, g12, ADD32(x4,x3));
120121
t = SATURATE(t, SIG_SAT);
121122
y[i+1] = t;
122123
x3=SHL32(x[i-T+4],1);
123-
t = MAC16_32_Q16(x[i+2], g10, x0);
124-
t = MAC16_32_Q16(t, g11, ADD32(x4,x1));
125-
t = MAC16_32_Q16(t, g12, ADD32(x3,x2));
124+
t = MAC_COEF_32_ARM(x[i+2], g10, x0);
125+
t = MAC_COEF_32_ARM(t, g11, ADD32(x4,x1));
126+
t = MAC_COEF_32_ARM(t, g12, ADD32(x3,x2));
126127
t = SATURATE(t, SIG_SAT);
127128
y[i+2] = t;
128129
x2=SHL32(x[i-T+5],1);
129-
t = MAC16_32_Q16(x[i+3], g10, x4);
130-
t = MAC16_32_Q16(t, g11, ADD32(x3,x0));
131-
t = MAC16_32_Q16(t, g12, ADD32(x2,x1));
130+
t = MAC_COEF_32_ARM(x[i+3], g10, x4);
131+
t = MAC_COEF_32_ARM(t, g11, ADD32(x3,x0));
132+
t = MAC_COEF_32_ARM(t, g12, ADD32(x2,x1));
132133
t = SATURATE(t, SIG_SAT);
133134
y[i+3] = t;
134135
x1=SHL32(x[i-T+6],1);
135-
t = MAC16_32_Q16(x[i+4], g10, x3);
136-
t = MAC16_32_Q16(t, g11, ADD32(x2,x4));
137-
t = MAC16_32_Q16(t, g12, ADD32(x1,x0));
136+
t = MAC_COEF_32_ARM(x[i+4], g10, x3);
137+
t = MAC_COEF_32_ARM(t, g11, ADD32(x2,x4));
138+
t = MAC_COEF_32_ARM(t, g12, ADD32(x1,x0));
138139
t = SATURATE(t, SIG_SAT);
139140
y[i+4] = t;
140141
}
@@ -143,9 +144,9 @@ void comb_filter_const_c(opus_val32 *y, opus_val32 *x, int T, int N,
143144
{
144145
opus_val32 t;
145146
x0=SHL32(x[i-T+2],1);
146-
t = MAC16_32_Q16(x[i], g10, x2);
147-
t = MAC16_32_Q16(t, g11, ADD32(x1,x3));
148-
t = MAC16_32_Q16(t, g12, ADD32(x0,x4));
147+
t = MAC_COEF_32_ARM(x[i], g10, x2);
148+
t = MAC_COEF_32_ARM(t, g11, ADD32(x1,x3));
149+
t = MAC_COEF_32_ARM(t, g12, ADD32(x0,x4));
149150
t = SATURATE(t, SIG_SAT);
150151
y[i] = t;
151152
x4=x3;
@@ -160,7 +161,7 @@ void comb_filter_const_c(opus_val32 *y, opus_val32 *x, int T, int N,
160161
static
161162
#endif
162163
void comb_filter_const_c(opus_val32 *y, opus_val32 *x, int T, int N,
163-
opus_val16 g10, opus_val16 g11, opus_val16 g12)
164+
celt_coef g10, celt_coef g11, celt_coef g12)
164165
{
165166
opus_val32 x0, x1, x2, x3, x4;
166167
int i;
@@ -172,9 +173,9 @@ void comb_filter_const_c(opus_val32 *y, opus_val32 *x, int T, int N,
172173
{
173174
x0=x[i-T+2];
174175
y[i] = x[i]
175-
+ MULT16_32_Q15(g10,x2)
176-
+ MULT16_32_Q15(g11,ADD32(x1,x3))
177-
+ MULT16_32_Q15(g12,ADD32(x0,x4));
176+
+ MULT_COEF_32(g10,x2)
177+
+ MULT_COEF_32(g11,ADD32(x1,x3))
178+
+ MULT_COEF_32(g12,ADD32(x0,x4));
178179
y[i] = SATURATE(y[i], SIG_SAT);
179180
x4=x3;
180181
x3=x2;
@@ -189,11 +190,11 @@ void comb_filter_const_c(opus_val32 *y, opus_val32 *x, int T, int N,
189190
#ifndef OVERRIDE_comb_filter
190191
void comb_filter(opus_val32 *y, opus_val32 *x, int T0, int T1, int N,
191192
opus_val16 g0, opus_val16 g1, int tapset0, int tapset1,
192-
const opus_val16 *window, int overlap, int arch)
193+
const celt_coef *window, int overlap, int arch)
193194
{
194195
int i;
195196
/* printf ("%d %d %f %f\n", T0, T1, g0, g1); */
196-
opus_val16 g00, g01, g02, g10, g11, g12;
197+
celt_coef g00, g01, g02, g10, g11, g12;
197198
opus_val32 x0, x1, x2, x3, x4;
198199
static const opus_val16 gains[3][3] = {
199200
{QCONST16(0.3066406250f, 15), QCONST16(0.2170410156f, 15), QCONST16(0.1296386719f, 15)},
@@ -211,12 +212,12 @@ void comb_filter(opus_val32 *y, opus_val32 *x, int T0, int T1, int N,
211212
to have then be at least 2 to avoid processing garbage data. */
212213
T0 = IMAX(T0, COMBFILTER_MINPERIOD);
213214
T1 = IMAX(T1, COMBFILTER_MINPERIOD);
214-
g00 = MULT16_16_P15(g0, gains[tapset0][0]);
215-
g01 = MULT16_16_P15(g0, gains[tapset0][1]);
216-
g02 = MULT16_16_P15(g0, gains[tapset0][2]);
217-
g10 = MULT16_16_P15(g1, gains[tapset1][0]);
218-
g11 = MULT16_16_P15(g1, gains[tapset1][1]);
219-
g12 = MULT16_16_P15(g1, gains[tapset1][2]);
215+
g00 = MULT_COEF_TAPS(g0, gains[tapset0][0]);
216+
g01 = MULT_COEF_TAPS(g0, gains[tapset0][1]);
217+
g02 = MULT_COEF_TAPS(g0, gains[tapset0][2]);
218+
g10 = MULT_COEF_TAPS(g1, gains[tapset1][0]);
219+
g11 = MULT_COEF_TAPS(g1, gains[tapset1][1]);
220+
g12 = MULT_COEF_TAPS(g1, gains[tapset1][2]);
220221
x1 = x[-T1+1];
221222
x2 = x[-T1 ];
222223
x3 = x[-T1-1];
@@ -226,16 +227,16 @@ void comb_filter(opus_val32 *y, opus_val32 *x, int T0, int T1, int N,
226227
overlap=0;
227228
for (i=0;i<overlap;i++)
228229
{
229-
opus_val16 f;
230+
celt_coef f;
230231
x0=x[i-T1+2];
231-
f = MULT16_16_Q15(window[i],window[i]);
232+
f = MULT_COEF(window[i],window[i]);
232233
y[i] = x[i]
233-
+ MULT16_32_Q15(MULT16_16_Q15((Q15ONE-f),g00),x[i-T0])
234-
+ MULT16_32_Q15(MULT16_16_Q15((Q15ONE-f),g01),ADD32(x[i-T0+1],x[i-T0-1]))
235-
+ MULT16_32_Q15(MULT16_16_Q15((Q15ONE-f),g02),ADD32(x[i-T0+2],x[i-T0-2]))
236-
+ MULT16_32_Q15(MULT16_16_Q15(f,g10),x2)
237-
+ MULT16_32_Q15(MULT16_16_Q15(f,g11),ADD32(x1,x3))
238-
+ MULT16_32_Q15(MULT16_16_Q15(f,g12),ADD32(x0,x4));
234+
+ MULT_COEF_32(MULT_COEF((COEF_ONE-f),g00),x[i-T0])
235+
+ MULT_COEF_32(MULT_COEF((COEF_ONE-f),g01),ADD32(x[i-T0+1],x[i-T0-1]))
236+
+ MULT_COEF_32(MULT_COEF((COEF_ONE-f),g02),ADD32(x[i-T0+2],x[i-T0-2]))
237+
+ MULT_COEF_32(MULT_COEF(f,g10),x2)
238+
+ MULT_COEF_32(MULT_COEF(f,g11),ADD32(x1,x3))
239+
+ MULT_COEF_32(MULT_COEF(f,g12),ADD32(x0,x4));
239240
y[i] = SATURATE(y[i], SIG_SAT);
240241
x4=x3;
241242
x3=x2;

celt/celt.h

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -41,6 +41,7 @@
4141
#include "entenc.h"
4242
#include "entdec.h"
4343
#include "arch.h"
44+
#include "kiss_fft.h"
4445

4546
#ifdef ENABLE_DEEP_PLC
4647
#include "lpcnet.h"
@@ -236,7 +237,7 @@ void celt_preemphasis(const opus_res * OPUS_RESTRICT pcmp, celt_sig * OPUS_RESTR
236237

237238
void comb_filter(opus_val32 *y, opus_val32 *x, int T0, int T1, int N,
238239
opus_val16 g0, opus_val16 g1, int tapset0, int tapset1,
239-
const opus_val16 *window, int overlap, int arch);
240+
const celt_coef *window, int overlap, int arch);
240241

241242
void init_caps(const CELTMode *m,int *cap,int LM,int C);
242243

celt/celt_decoder.c

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -535,8 +535,8 @@ static void prefilter_and_fold(CELTDecoder * OPUS_RESTRICT st, int N)
535535
for (i=0;i<overlap/2;i++)
536536
{
537537
decode_mem[c][DECODE_BUFFER_SIZE-N+i] =
538-
MULT16_32_Q15(mode->window[i], etmp[overlap-1-i])
539-
+ MULT16_32_Q15(mode->window[overlap-i-1], etmp[i]);
538+
MULT16_32_Q15(COEF2VAL16(mode->window[i]), etmp[overlap-1-i])
539+
+ MULT16_32_Q15 (COEF2VAL16(mode->window[overlap-i-1]), etmp[i]);
540540
}
541541
} while (++c<CC);
542542
}
@@ -692,7 +692,7 @@ static void celt_decode_lost(CELTDecoder * OPUS_RESTRICT st, int N, int LM
692692
} else {
693693
int exc_length;
694694
/* Pitch-based PLC */
695-
const opus_val16 *window;
695+
const celt_coef *window;
696696
opus_val16 *exc;
697697
opus_val16 fade = Q15ONE;
698698
int pitch_index;
@@ -880,7 +880,7 @@ static void celt_decode_lost(CELTDecoder * OPUS_RESTRICT st, int N, int LM
880880
for (i=0;i<overlap;i++)
881881
{
882882
opus_val16 tmp_g = Q15ONE
883-
- MULT16_16_Q15(window[i], Q15ONE-ratio);
883+
- MULT16_16_Q15(COEF2VAL16(window[i]), Q15ONE-ratio);
884884
buf[DECODE_BUFFER_SIZE-N+i] =
885885
MULT16_32_Q15(tmp_g, buf[DECODE_BUFFER_SIZE-N+i]);
886886
}

celt/celt_lpc.c

Lines changed: 4 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -277,7 +277,7 @@ void celt_iir(const opus_val32 *_x,
277277
int _celt_autocorr(
278278
const opus_val16 *x, /* in: [0...n-1] samples x */
279279
opus_val32 *ac, /* out: [0...lag-1] ac values */
280-
const opus_val16 *window,
280+
const celt_coef *window,
281281
int overlap,
282282
int lag,
283283
int n,
@@ -302,8 +302,9 @@ int _celt_autocorr(
302302
xx[i] = x[i];
303303
for (i=0;i<overlap;i++)
304304
{
305-
xx[i] = MULT16_16_Q15(x[i],window[i]);
306-
xx[n-i-1] = MULT16_16_Q15(x[n-i-1],window[i]);
305+
opus_val16 w = COEF2VAL16(window[i]);
306+
xx[i] = MULT16_16_Q15(x[i],w);
307+
xx[n-i-1] = MULT16_16_Q15(x[n-i-1],w);
307308
}
308309
xptr = xx;
309310
}

celt/celt_lpc.h

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -61,6 +61,6 @@ void celt_iir(const opus_val32 *x,
6161
int arch);
6262

6363
int _celt_autocorr(const opus_val16 *x, opus_val32 *ac,
64-
const opus_val16 *window, int overlap, int lag, int n, int arch);
64+
const celt_coef *window, int overlap, int lag, int n, int arch);
6565

6666
#endif /* PLC_H */

celt/dump_modes/dump_modes.c

Lines changed: 11 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -99,9 +99,19 @@ void dump_modes(FILE *file, CELTMode **modes, int nb_modes)
9999

100100
fprintf(file, "#ifndef DEF_WINDOW%d\n", mode->overlap);
101101
fprintf(file, "#define DEF_WINDOW%d\n", mode->overlap);
102-
fprintf (file, "static const opus_val16 window%d[%d] = {\n", mode->overlap, mode->overlap);
102+
fprintf (file, "static const celt_coef window%d[%d] = {\n", mode->overlap, mode->overlap);
103+
#if defined(FIXED_POINT) && defined(ENABLE_QEXT)
104+
fprintf(file, "#ifdef ENABLE_QEXT\n");
105+
for (j=0;j<mode->overlap;j++)
106+
fprintf (file, WORD32 ",%c", mode->window[j],(j+6)%5==0?'\n':' ');
107+
fprintf(file, "#else\n");
108+
for (j=0;j<mode->overlap;j++)
109+
fprintf (file, WORD16 ",%c", COEF16(mode->window[j], 16),(j+6)%5==0?'\n':' ');
110+
fprintf(file, "#endif\n");
111+
#else
103112
for (j=0;j<mode->overlap;j++)
104113
fprintf (file, WORD16 ",%c", mode->window[j],(j+6)%5==0?'\n':' ');
114+
#endif
105115
fprintf (file, "};\n");
106116
fprintf(file, "#endif\n");
107117
fprintf(file, "\n");

celt/fixed_debug.h

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -43,6 +43,7 @@ extern opus_int64 celt_mips;
4343

4444
#define MULT16_16SU(a,b) ((opus_val32)(opus_val16)(a)*(opus_val32)(opus_uint16)(b))
4545
#define MULT32_32_Q31(a,b) ADD32(ADD32(SHL32(MULT16_16(SHR32((a),16),SHR((b),16)),1), SHR32(MULT16_16SU(SHR32((a),16),((b)&0x0000ffff)),15)), SHR32(MULT16_16SU(SHR32((b),16),((a)&0x0000ffff)),15))
46+
#define MULT32_32_Q32(a,b) ADD32(ADD32(MULT16_16(SHR((a),16),SHR((b),16)), SHR(MULT16_16SU(SHR((a),16),((b)&0x0000ffff)),16)), SHR(MULT16_16SU(SHR((b),16),((a)&0x0000ffff)),16))
4647

4748
/** 16x32 multiplication, followed by a 16-bit shift right. Results fits in 32 bits */
4849
#define MULT16_32_Q16(a,b) ADD32(MULT16_16((a),SHR32((b),16)), SHR32(MULT16_16SU((a),((b)&0x0000ffff)),16))

celt/fixed_generic.h

Lines changed: 7 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -71,6 +71,13 @@
7171
#define MULT32_32_Q31(a,b) ADD32(ADD32(SHL(MULT16_16(SHR((a),16),SHR((b),16)),1), SHR(MULT16_16SU(SHR((a),16),((b)&0x0000ffff)),15)), SHR(MULT16_16SU(SHR((b),16),((a)&0x0000ffff)),15))
7272
#endif
7373

74+
/** 32x32 multiplication, followed by a 32-bit shift right. Results fits in 32 bits */
75+
#if OPUS_FAST_INT64
76+
#define MULT32_32_Q32(a,b) ((opus_val32)SHR((opus_int64)(a)*(opus_int64)(b),32))
77+
#else
78+
#define MULT32_32_Q32(a,b) ADD32(ADD32(MULT16_16(SHR((a),16),SHR((b),16)), SHR(MULT16_16SU(SHR((a),16),((b)&0x0000ffff)),16)), SHR(MULT16_16SU(SHR((b),16),((a)&0x0000ffff)),16))
79+
#endif
80+
7481
/** Compile-time conversion of float constant to 16-bit value */
7582
#define QCONST16(x,bits) ((opus_val16)(.5+(x)*(((opus_val32)1)<<(bits))))
7683

celt/mdct.c

Lines changed: 12 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -120,7 +120,7 @@ void clt_mdct_clear(mdct_lookup *l, int arch)
120120
/* Forward MDCT trashes the input array */
121121
#ifndef OVERRIDE_clt_mdct_forward
122122
void clt_mdct_forward_c(const mdct_lookup *l, kiss_fft_scalar *in, kiss_fft_scalar * OPUS_RESTRICT out,
123-
const opus_val16 *window, int overlap, int shift, int stride, int arch)
123+
const celt_coef *window, int overlap, int shift, int stride, int arch)
124124
{
125125
int i;
126126
int N, N2, N4;
@@ -159,13 +159,13 @@ void clt_mdct_forward_c(const mdct_lookup *l, kiss_fft_scalar *in, kiss_fft_scal
159159
const kiss_fft_scalar * OPUS_RESTRICT xp1 = in+(overlap>>1);
160160
const kiss_fft_scalar * OPUS_RESTRICT xp2 = in+N2-1+(overlap>>1);
161161
kiss_fft_scalar * OPUS_RESTRICT yp = f;
162-
const opus_val16 * OPUS_RESTRICT wp1 = window+(overlap>>1);
163-
const opus_val16 * OPUS_RESTRICT wp2 = window+(overlap>>1)-1;
162+
const celt_coef * OPUS_RESTRICT wp1 = window+(overlap>>1);
163+
const celt_coef * OPUS_RESTRICT wp2 = window+(overlap>>1)-1;
164164
for(i=0;i<((overlap+3)>>2);i++)
165165
{
166166
/* Real part arranged as -d-cR, Imag part arranged as -b+aR*/
167-
*yp++ = MULT16_32_Q15(*wp2, xp1[N2]) + MULT16_32_Q15(*wp1,*xp2);
168-
*yp++ = MULT16_32_Q15(*wp1, *xp1) - MULT16_32_Q15(*wp2, xp2[-N2]);
167+
*yp++ = S_MUL(xp1[N2], *wp2) + S_MUL(*xp2, *wp1);
168+
*yp++ = S_MUL(*xp1, *wp1) - S_MUL(xp2[-N2], *wp2);
169169
xp1+=2;
170170
xp2-=2;
171171
wp1+=2;
@@ -184,8 +184,8 @@ void clt_mdct_forward_c(const mdct_lookup *l, kiss_fft_scalar *in, kiss_fft_scal
184184
for(;i<N4;i++)
185185
{
186186
/* Real part arranged as a-bR, Imag part arranged as -c-dR */
187-
*yp++ = -MULT16_32_Q15(*wp1, xp1[-N2]) + MULT16_32_Q15(*wp2, *xp2);
188-
*yp++ = MULT16_32_Q15(*wp2, *xp1) + MULT16_32_Q15(*wp1, xp2[N2]);
187+
*yp++ = -S_MUL(xp1[-N2], *wp1) + S_MUL(*xp2, *wp2);
188+
*yp++ = S_MUL(*xp1, *wp2) + S_MUL(xp2[N2], *wp1);
189189
xp1+=2;
190190
xp2-=2;
191191
wp1+=2;
@@ -258,7 +258,7 @@ void clt_mdct_forward_c(const mdct_lookup *l, kiss_fft_scalar *in, kiss_fft_scal
258258

259259
#ifndef OVERRIDE_clt_mdct_backward
260260
void clt_mdct_backward_c(const mdct_lookup *l, kiss_fft_scalar *in, kiss_fft_scalar * OPUS_RESTRICT out,
261-
const opus_val16 * OPUS_RESTRICT window, int overlap, int shift, int stride, int arch)
261+
const celt_coef * OPUS_RESTRICT window, int overlap, int shift, int stride, int arch)
262262
{
263263
int i;
264264
int N, N2, N4;
@@ -346,16 +346,16 @@ void clt_mdct_backward_c(const mdct_lookup *l, kiss_fft_scalar *in, kiss_fft_sca
346346
{
347347
kiss_fft_scalar * OPUS_RESTRICT xp1 = out+overlap-1;
348348
kiss_fft_scalar * OPUS_RESTRICT yp1 = out;
349-
const opus_val16 * OPUS_RESTRICT wp1 = window;
350-
const opus_val16 * OPUS_RESTRICT wp2 = window+overlap-1;
349+
const celt_coef * OPUS_RESTRICT wp1 = window;
350+
const celt_coef * OPUS_RESTRICT wp2 = window+overlap-1;
351351

352352
for(i = 0; i < overlap/2; i++)
353353
{
354354
kiss_fft_scalar x1, x2;
355355
x1 = *xp1;
356356
x2 = *yp1;
357-
*yp1++ = SUB32_ovflw(MULT16_32_Q15(*wp2, x2), MULT16_32_Q15(*wp1, x1));
358-
*xp1-- = ADD32_ovflw(MULT16_32_Q15(*wp1, x2), MULT16_32_Q15(*wp2, x1));
357+
*yp1++ = SUB32_ovflw(S_MUL(x2, *wp2), S_MUL(x1, *wp1));
358+
*xp1-- = ADD32_ovflw(S_MUL(x2, *wp1), S_MUL(x1, *wp2));
359359
wp1++;
360360
wp2--;
361361
}

0 commit comments

Comments
 (0)