Skip to content

Commit 2739064

Browse files
committed
silencing overflows in MDCT and FFT
1 parent ae33218 commit 2739064

File tree

6 files changed

+59
-44
lines changed

6 files changed

+59
-44
lines changed

celt/_kiss_fft_guts.h

Lines changed: 8 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -58,12 +58,12 @@
5858
# define S_MUL(a,b) MULT16_32_Q15(b, a)
5959

6060
# define C_MUL(m,a,b) \
61-
do{ (m).r = SUB32(S_MUL((a).r,(b).r) , S_MUL((a).i,(b).i)); \
62-
(m).i = ADD32(S_MUL((a).r,(b).i) , S_MUL((a).i,(b).r)); }while(0)
61+
do{ (m).r = SUB32_ovflw(S_MUL((a).r,(b).r) , S_MUL((a).i,(b).i)); \
62+
(m).i = ADD32_ovflw(S_MUL((a).r,(b).i) , S_MUL((a).i,(b).r)); }while(0)
6363

6464
# define C_MULC(m,a,b) \
65-
do{ (m).r = ADD32(S_MUL((a).r,(b).r) , S_MUL((a).i,(b).i)); \
66-
(m).i = SUB32(S_MUL((a).i,(b).r) , S_MUL((a).r,(b).i)); }while(0)
65+
do{ (m).r = ADD32_ovflw(S_MUL((a).r,(b).r) , S_MUL((a).i,(b).i)); \
66+
(m).i = SUB32_ovflw(S_MUL((a).i,(b).r) , S_MUL((a).r,(b).i)); }while(0)
6767

6868
# define C_MULBYSCALAR( c, s ) \
6969
do{ (c).r = S_MUL( (c).r , s ) ;\
@@ -77,17 +77,17 @@
7777
DIVSCALAR( (c).i , div); }while (0)
7878

7979
#define C_ADD( res, a,b)\
80-
do {(res).r=ADD32((a).r,(b).r); (res).i=ADD32((a).i,(b).i); \
80+
do {(res).r=ADD32_ovflw((a).r,(b).r); (res).i=ADD32_ovflw((a).i,(b).i); \
8181
}while(0)
8282
#define C_SUB( res, a,b)\
83-
do {(res).r=SUB32((a).r,(b).r); (res).i=SUB32((a).i,(b).i); \
83+
do {(res).r=SUB32_ovflw((a).r,(b).r); (res).i=SUB32_ovflw((a).i,(b).i); \
8484
}while(0)
8585
#define C_ADDTO( res , a)\
86-
do {(res).r = ADD32((res).r, (a).r); (res).i = ADD32((res).i,(a).i);\
86+
do {(res).r = ADD32_ovflw((res).r, (a).r); (res).i = ADD32_ovflw((res).i,(a).i);\
8787
}while(0)
8888

8989
#define C_SUBFROM( res , a)\
90-
do {(res).r = ADD32((res).r,(a).r); (res).i = SUB32((res).i,(a).i); \
90+
do {(res).r = ADD32_ovflw((res).r,(a).r); (res).i = SUB32_ovflw((res).i,(a).i); \
9191
}while(0)
9292

9393
#if defined(OPUS_ARM_INLINE_ASM)

celt/arch.h

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -186,6 +186,7 @@ static OPUS_INLINE int celt_isnan(float x)
186186

187187
#define NEG16(x) (-(x))
188188
#define NEG32(x) (-(x))
189+
#define NEG32_ovflw(x) (-(x))
189190
#define EXTRACT16(x) (x)
190191
#define EXTEND32(x) (x)
191192
#define SHR16(a,shift) (a)
@@ -209,6 +210,8 @@ static OPUS_INLINE int celt_isnan(float x)
209210
#define SUB16(a,b) ((a)-(b))
210211
#define ADD32(a,b) ((a)+(b))
211212
#define SUB32(a,b) ((a)-(b))
213+
#define ADD32_ovflw(a,b) ((a)+(b))
214+
#define SUB32_ovflw(a,b) ((a)-(b))
212215
#define MULT16_16_16(a,b) ((a)*(b))
213216
#define MULT16_16(a,b) ((opus_val32)(a)*(opus_val32)(b))
214217
#define MAC16_16(c,a,b) ((c)+(opus_val32)(a)*(opus_val32)(b))

celt/fixed_debug.h

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -59,6 +59,12 @@ extern opus_int64 celt_mips;
5959
#define SHR(a,b) SHR32(a,b)
6060
#define PSHR(a,b) PSHR32(a,b)
6161

62+
/** Add two 32-bit values, ignore any overflows */
63+
#define ADD32_ovflw(a,b) (celt_mips+=2,(opus_val32)((opus_uint32)(a)+(opus_uint32)(b)))
64+
/** Subtract two 32-bit values, ignore any overflows */
65+
#define SUB32_ovflw(a,b) (celt_mips+=2,(opus_val32)((opus_uint32)(a)-(opus_uint32)(b)))
66+
#define NEG32_ovflw(a) (celt_mips+=2,(opus_val32)(-(opus_uint32)(a)))
67+
6268
static OPUS_INLINE short NEG16(int x)
6369
{
6470
int res;

celt/fixed_generic.h

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -117,6 +117,12 @@
117117
/** Subtract two 32-bit values */
118118
#define SUB32(a,b) ((opus_val32)(a)-(opus_val32)(b))
119119

120+
/** Add two 32-bit values, ignore any overflows */
121+
#define ADD32_ovflw(a,b) ((opus_val32)((opus_uint32)(a)+(opus_uint32)(b)))
122+
/** Subtract two 32-bit values, ignore any overflows */
123+
#define SUB32_ovflw(a,b) ((opus_val32)((opus_uint32)(a)-(opus_uint32)(b)))
124+
#define NEG32_ovflw(a) ((opus_val32)(-(opus_uint32)(a)))
125+
120126
/** 16x16 multiplication where the result fits in 16 bits */
121127
#define MULT16_16_16(a,b) ((((opus_val16)(a))*((opus_val16)(b))))
122128

celt/kiss_fft.c

Lines changed: 28 additions & 28 deletions
Original file line numberDiff line numberDiff line change
@@ -82,8 +82,8 @@ static void kf_bfly2(
8282
C_SUB( Fout2[0] , Fout[0] , t );
8383
C_ADDTO( Fout[0] , t );
8484

85-
t.r = S_MUL(Fout2[1].r+Fout2[1].i, tw);
86-
t.i = S_MUL(Fout2[1].i-Fout2[1].r, tw);
85+
t.r = S_MUL(ADD32_ovflw(Fout2[1].r, Fout2[1].i), tw);
86+
t.i = S_MUL(SUB32_ovflw(Fout2[1].i, Fout2[1].r), tw);
8787
C_SUB( Fout2[1] , Fout[1] , t );
8888
C_ADDTO( Fout[1] , t );
8989

@@ -92,8 +92,8 @@ static void kf_bfly2(
9292
C_SUB( Fout2[2] , Fout[2] , t );
9393
C_ADDTO( Fout[2] , t );
9494

95-
t.r = S_MUL(Fout2[3].i-Fout2[3].r, tw);
96-
t.i = S_MUL(-Fout2[3].i-Fout2[3].r, tw);
95+
t.r = S_MUL(SUB32_ovflw(Fout2[3].i, Fout2[3].r), tw);
96+
t.i = S_MUL(NEG32_ovflw(ADD32_ovflw(Fout2[3].i, Fout2[3].r)), tw);
9797
C_SUB( Fout2[3] , Fout[3] , t );
9898
C_ADDTO( Fout[3] , t );
9999
Fout += 8;
@@ -126,10 +126,10 @@ static void kf_bfly4(
126126
C_ADDTO( *Fout , scratch1 );
127127
C_SUB( scratch1 , Fout[1] , Fout[3] );
128128

129-
Fout[1].r = scratch0.r + scratch1.i;
130-
Fout[1].i = scratch0.i - scratch1.r;
131-
Fout[3].r = scratch0.r - scratch1.i;
132-
Fout[3].i = scratch0.i + scratch1.r;
129+
Fout[1].r = ADD32_ovflw(scratch0.r, scratch1.i);
130+
Fout[1].i = SUB32_ovflw(scratch0.i, scratch1.r);
131+
Fout[3].r = SUB32_ovflw(scratch0.r, scratch1.i);
132+
Fout[3].i = ADD32_ovflw(scratch0.i, scratch1.r);
133133
Fout+=4;
134134
}
135135
} else {
@@ -160,10 +160,10 @@ static void kf_bfly4(
160160
tw3 += fstride*3;
161161
C_ADDTO( *Fout , scratch[3] );
162162

163-
Fout[m].r = scratch[5].r + scratch[4].i;
164-
Fout[m].i = scratch[5].i - scratch[4].r;
165-
Fout[m3].r = scratch[5].r - scratch[4].i;
166-
Fout[m3].i = scratch[5].i + scratch[4].r;
163+
Fout[m].r = ADD32_ovflw(scratch[5].r, scratch[4].i);
164+
Fout[m].i = SUB32_ovflw(scratch[5].i, scratch[4].r);
165+
Fout[m3].r = SUB32_ovflw(scratch[5].r, scratch[4].i);
166+
Fout[m3].i = ADD32_ovflw(scratch[5].i, scratch[4].r);
167167
++Fout;
168168
}
169169
}
@@ -212,18 +212,18 @@ static void kf_bfly3(
212212
tw1 += fstride;
213213
tw2 += fstride*2;
214214

215-
Fout[m].r = Fout->r - HALF_OF(scratch[3].r);
216-
Fout[m].i = Fout->i - HALF_OF(scratch[3].i);
215+
Fout[m].r = SUB32_ovflw(Fout->r, HALF_OF(scratch[3].r));
216+
Fout[m].i = SUB32_ovflw(Fout->i, HALF_OF(scratch[3].i));
217217

218218
C_MULBYSCALAR( scratch[0] , epi3.i );
219219

220220
C_ADDTO(*Fout,scratch[3]);
221221

222-
Fout[m2].r = Fout[m].r + scratch[0].i;
223-
Fout[m2].i = Fout[m].i - scratch[0].r;
222+
Fout[m2].r = ADD32_ovflw(Fout[m].r, scratch[0].i);
223+
Fout[m2].i = SUB32_ovflw(Fout[m].i, scratch[0].r);
224224

225-
Fout[m].r -= scratch[0].i;
226-
Fout[m].i += scratch[0].r;
225+
Fout[m].r = SUB32_ovflw(Fout[m].r, scratch[0].i);
226+
Fout[m].i = ADD32_ovflw(Fout[m].i, scratch[0].r);
227227

228228
++Fout;
229229
} while(--k);
@@ -282,22 +282,22 @@ static void kf_bfly5(
282282
C_ADD( scratch[8],scratch[2],scratch[3]);
283283
C_SUB( scratch[9],scratch[2],scratch[3]);
284284

285-
Fout0->r += scratch[7].r + scratch[8].r;
286-
Fout0->i += scratch[7].i + scratch[8].i;
285+
Fout0->r = ADD32_ovflw(Fout0->r, ADD32_ovflw(scratch[7].r, scratch[8].r));
286+
Fout0->i = ADD32_ovflw(Fout0->i, ADD32_ovflw(scratch[7].i, scratch[8].i));
287287

288-
scratch[5].r = scratch[0].r + S_MUL(scratch[7].r,ya.r) + S_MUL(scratch[8].r,yb.r);
289-
scratch[5].i = scratch[0].i + S_MUL(scratch[7].i,ya.r) + S_MUL(scratch[8].i,yb.r);
288+
scratch[5].r = ADD32_ovflw(scratch[0].r, ADD32_ovflw(S_MUL(scratch[7].r,ya.r), S_MUL(scratch[8].r,yb.r)));
289+
scratch[5].i = ADD32_ovflw(scratch[0].i, ADD32_ovflw(S_MUL(scratch[7].i,ya.r), S_MUL(scratch[8].i,yb.r)));
290290

291-
scratch[6].r = S_MUL(scratch[10].i,ya.i) + S_MUL(scratch[9].i,yb.i);
292-
scratch[6].i = -S_MUL(scratch[10].r,ya.i) - S_MUL(scratch[9].r,yb.i);
291+
scratch[6].r = ADD32_ovflw(S_MUL(scratch[10].i,ya.i), S_MUL(scratch[9].i,yb.i));
292+
scratch[6].i = NEG32_ovflw(ADD32_ovflw(S_MUL(scratch[10].r,ya.i), S_MUL(scratch[9].r,yb.i)));
293293

294294
C_SUB(*Fout1,scratch[5],scratch[6]);
295295
C_ADD(*Fout4,scratch[5],scratch[6]);
296296

297-
scratch[11].r = scratch[0].r + S_MUL(scratch[7].r,yb.r) + S_MUL(scratch[8].r,ya.r);
298-
scratch[11].i = scratch[0].i + S_MUL(scratch[7].i,yb.r) + S_MUL(scratch[8].i,ya.r);
299-
scratch[12].r = - S_MUL(scratch[10].i,yb.i) + S_MUL(scratch[9].i,ya.i);
300-
scratch[12].i = S_MUL(scratch[10].r,yb.i) - S_MUL(scratch[9].r,ya.i);
297+
scratch[11].r = ADD32_ovflw(scratch[0].r, ADD32_ovflw(S_MUL(scratch[7].r,yb.r), S_MUL(scratch[8].r,ya.r)));
298+
scratch[11].i = ADD32_ovflw(scratch[0].i, ADD32_ovflw(S_MUL(scratch[7].i,yb.r), S_MUL(scratch[8].i,ya.r)));
299+
scratch[12].r = SUB32_ovflw(S_MUL(scratch[9].i,ya.i), S_MUL(scratch[10].i,yb.i));
300+
scratch[12].i = SUB32_ovflw(S_MUL(scratch[10].r,yb.i), S_MUL(scratch[9].r,ya.i));
301301

302302
C_ADD(*Fout2,scratch[11],scratch[12]);
303303
C_SUB(*Fout3,scratch[11],scratch[12]);

celt/mdct.c

Lines changed: 8 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -270,8 +270,8 @@ void clt_mdct_backward_c(const mdct_lookup *l, kiss_fft_scalar *in, kiss_fft_sca
270270
int rev;
271271
kiss_fft_scalar yr, yi;
272272
rev = *bitrev++;
273-
yr = S_MUL(*xp2, t[i]) + S_MUL(*xp1, t[N4+i]);
274-
yi = S_MUL(*xp1, t[i]) - S_MUL(*xp2, t[N4+i]);
273+
yr = ADD32_ovflw(S_MUL(*xp2, t[i]), S_MUL(*xp1, t[N4+i]));
274+
yi = SUB32_ovflw(S_MUL(*xp1, t[i]), S_MUL(*xp2, t[N4+i]));
275275
/* We swap real and imag because we use an FFT instead of an IFFT. */
276276
yp[2*rev+1] = yr;
277277
yp[2*rev] = yi;
@@ -301,8 +301,8 @@ void clt_mdct_backward_c(const mdct_lookup *l, kiss_fft_scalar *in, kiss_fft_sca
301301
t0 = t[i];
302302
t1 = t[N4+i];
303303
/* We'd scale up by 2 here, but instead it's done when mixing the windows */
304-
yr = S_MUL(re,t0) + S_MUL(im,t1);
305-
yi = S_MUL(re,t1) - S_MUL(im,t0);
304+
yr = ADD32_ovflw(S_MUL(re,t0), S_MUL(im,t1));
305+
yi = SUB32_ovflw(S_MUL(re,t1), S_MUL(im,t0));
306306
/* We swap real and imag because we're using an FFT instead of an IFFT. */
307307
re = yp1[1];
308308
im = yp1[0];
@@ -312,8 +312,8 @@ void clt_mdct_backward_c(const mdct_lookup *l, kiss_fft_scalar *in, kiss_fft_sca
312312
t0 = t[(N4-i-1)];
313313
t1 = t[(N2-i-1)];
314314
/* We'd scale up by 2 here, but instead it's done when mixing the windows */
315-
yr = S_MUL(re,t0) + S_MUL(im,t1);
316-
yi = S_MUL(re,t1) - S_MUL(im,t0);
315+
yr = ADD32_ovflw(S_MUL(re,t0), S_MUL(im,t1));
316+
yi = SUB32_ovflw(S_MUL(re,t1), S_MUL(im,t0));
317317
yp1[0] = yr;
318318
yp0[1] = yi;
319319
yp0 += 2;
@@ -333,8 +333,8 @@ void clt_mdct_backward_c(const mdct_lookup *l, kiss_fft_scalar *in, kiss_fft_sca
333333
kiss_fft_scalar x1, x2;
334334
x1 = *xp1;
335335
x2 = *yp1;
336-
*yp1++ = MULT16_32_Q15(*wp2, x2) - MULT16_32_Q15(*wp1, x1);
337-
*xp1-- = MULT16_32_Q15(*wp1, x2) + MULT16_32_Q15(*wp2, x1);
336+
*yp1++ = SUB32_ovflw(MULT16_32_Q15(*wp2, x2), MULT16_32_Q15(*wp1, x1));
337+
*xp1-- = ADD32_ovflw(MULT16_32_Q15(*wp1, x2), MULT16_32_Q15(*wp2, x1));
338338
wp1++;
339339
wp2--;
340340
}

0 commit comments

Comments
 (0)