diff --git a/Makefile.mips b/Makefile.mips
deleted file mode 100644
index bc12ba86e..000000000
--- a/Makefile.mips
+++ /dev/null
@@ -1,169 +0,0 @@
-#################### COMPILE OPTIONS #######################
-
-# Uncomment this for fixed-point build
-FIXED_POINT=1
-
-# It is strongly recommended to uncomment one of these
-# VAR_ARRAYS: Use C99 variable-length arrays for stack allocation
-# USE_ALLOCA: Use alloca() for stack allocation
-# If none is defined, then the fallback is a non-threadsafe global array
-CFLAGS := -DUSE_ALLOCA $(CFLAGS)
-#CFLAGS := -DVAR_ARRAYS $(CFLAGS)
-
-# These options affect performance
-# HAVE_LRINTF: Use C99 intrinsics to speed up float-to-int conversion
-CFLAGS := -DHAVE_LRINTF $(CFLAGS)
-
-###################### END OF OPTIONS ######################
-
--include package_version
-
-include silk_sources.mk
-include celt_sources.mk
-include opus_sources.mk
-
-ifdef FIXED_POINT
-SILK_SOURCES += $(SILK_SOURCES_FIXED)
-else
-SILK_SOURCES += $(SILK_SOURCES_FLOAT)
-OPUS_SOURCES += $(OPUS_SOURCES_FLOAT)
-endif
-
-EXESUFFIX =
-LIBPREFIX = lib
-LIBSUFFIX = .a
-OBJSUFFIX = .o
-
-CC     = $(TOOLCHAIN_PREFIX)cc$(TOOLCHAIN_SUFFIX)
-AR     = $(TOOLCHAIN_PREFIX)ar
-RANLIB = $(TOOLCHAIN_PREFIX)ranlib
-CP     = $(TOOLCHAIN_PREFIX)cp
-
-cppflags-from-defines   = $(addprefix -D,$(1))
-cppflags-from-includes  = $(addprefix -I,$(1))
-ldflags-from-ldlibdirs  = $(addprefix -L,$(1))
-ldlibs-from-libs        = $(addprefix -l,$(1))
-
-WARNINGS = -Wall -W -Wstrict-prototypes -Wextra -Wcast-align -Wnested-externs -Wshadow
-
-CFLAGS  += -mips32r2 -mno-mips16 -std=gnu99 -O2 -g $(WARNINGS) -DENABLE_ASSERTIONS -DMIPSr1_ASM -DOPUS_BUILD -mdspr2 -march=74kc -mtune=74kc -mmt -mgp32
-
-CINCLUDES = include silk celt
-
-ifdef FIXED_POINT
-CFLAGS += -DFIXED_POINT=1 -DDISABLE_FLOAT_API
-CINCLUDES += silk/fixed
-else
-CINCLUDES += silk/float
-endif
-
-
-LIBS = m
-
-LDLIBDIRS = ./
-
-CFLAGS  += $(call cppflags-from-defines,$(CDEFINES))
-CFLAGS  += $(call cppflags-from-includes,$(CINCLUDES))
-LDFLAGS += $(call ldflags-from-ldlibdirs,$(LDLIBDIRS))
-LDLIBS  += $(call ldlibs-from-libs,$(LIBS))
-
-COMPILE.c.cmdline   = $(CC) -c $(CFLAGS) -o $@ $<
-LINK.o              = $(CC) $(LDPREFLAGS) $(LDFLAGS)
-LINK.o.cmdline      = $(LINK.o) $^ $(LDLIBS) -o $@$(EXESUFFIX)
-
-ARCHIVE.cmdline     = $(AR) $(ARFLAGS) $@ $^ && $(RANLIB) $@
-
-%$(OBJSUFFIX):%.c
-	$(COMPILE.c.cmdline)
-
-%$(OBJSUFFIX):%.cpp
-	$(COMPILE.cpp.cmdline)
-
-# Directives
-
-
-# Variable definitions
-LIB_NAME = opus
-TARGET = $(LIBPREFIX)$(LIB_NAME)$(LIBSUFFIX)
-
-SRCS_C = $(SILK_SOURCES) $(CELT_SOURCES) $(OPUS_SOURCES)
-
-OBJS := $(patsubst %.c,%$(OBJSUFFIX),$(SRCS_C))
-
-OPUSDEMO_SRCS_C = src/opus_demo.c
-OPUSDEMO_OBJS := $(patsubst %.c,%$(OBJSUFFIX),$(OPUSDEMO_SRCS_C))
-
-TESTOPUSAPI_SRCS_C = tests/test_opus_api.c
-TESTOPUSAPI_OBJS := $(patsubst %.c,%$(OBJSUFFIX),$(TESTOPUSAPI_SRCS_C))
-
-TESTOPUSDECODE_SRCS_C = tests/test_opus_decode.c
-TESTOPUSDECODE_OBJS := $(patsubst %.c,%$(OBJSUFFIX),$(TESTOPUSDECODE_SRCS_C))
-
-TESTOPUSENCODE_SRCS_C = tests/test_opus_encode.c tests/opus_encode_regressions.c
-TESTOPUSENCODE_OBJS := $(patsubst %.c,%$(OBJSUFFIX),$(TESTOPUSENCODE_SRCS_C))
-
-TESTOPUSEXTENSIONS_SRCS_C = tests/test_opus_extensions.c
-TESTOPUSEXTENSIONS_OBJS := $(patsubst %.c,%$(OBJSUFFIX),$(TESTOPUSEXTENSIONS_SRCS_C))
-
-TESTOPUSPADDING_SRCS_C = tests/test_opus_padding.c
-TESTOPUSPADDING_OBJS := $(patsubst %.c,%$(OBJSUFFIX),$(TESTOPUSPADDING_SRCS_C))
-
-OPUSCOMPARE_SRCS_C = src/opus_compare.c
-OPUSCOMPARE_OBJS := $(patsubst %.c,%$(OBJSUFFIX),$(OPUSCOMPARE_SRCS_C))
-
-TESTS := test_opus_api test_opus_decode test_opus_encode test_opus_extensions test_opus_padding
-
-# Rules
-all: lib opus_demo opus_compare $(TESTS)
-
-lib: $(TARGET)
-
-check: all
-	for test in $(TESTS); do ./$$test; done
-
-$(TARGET): $(OBJS)
-	$(ARCHIVE.cmdline)
-
-opus_demo$(EXESUFFIX): $(OPUSDEMO_OBJS) $(TARGET)
-	$(LINK.o.cmdline)
-
-test_opus_api$(EXESUFFIX): $(TESTOPUSAPI_OBJS) $(TARGET)
-	$(LINK.o.cmdline)
-
-test_opus_decode$(EXESUFFIX): $(TESTOPUSDECODE_OBJS) $(TARGET)
-	$(LINK.o.cmdline)
-
-test_opus_encode$(EXESUFFIX): $(TESTOPUSENCODE_OBJS) $(TARGET)
-	$(LINK.o.cmdline)
-
-test_opus_extensions$(EXESUFFIX): $(TESTOPUSEXTENSIONS_OBJS) $(TARGET)
-	$(LINK.o.cmdline)
-
-test_opus_padding$(EXESUFFIX): $(TESTOPUSPADDING_OBJS) $(TARGET)
-	$(LINK.o.cmdline)
-
-opus_compare$(EXESUFFIX): $(OPUSCOMPARE_OBJS)
-	$(LINK.o.cmdline)
-
-celt/celt.o: CFLAGS += -DPACKAGE_VERSION='$(PACKAGE_VERSION)'
-celt/celt.o: package_version
-
-package_version: force
-	@if [ -x ./update_version ]; then \
-		./update_version || true; \
-	elif [ ! -e ./package_version ]; then \
-		echo 'PACKAGE_VERSION="unknown"' > ./package_version; \
-	fi
-
-force:
-
-clean:
-	rm -f opus_demo$(EXESUFFIX) opus_compare$(EXESUFFIX) $(TARGET) \
-                test_opus_api$(EXESUFFIX) test_opus_decode$(EXESUFFIX) \
-                test_opus_encode$(EXESUFFIX) test_opus_extensions$(EXESUFFIX) \
-                test_opus_padding$(EXESUFFIX) \
-		$(OBJS) $(OPUSDEMO_OBJS) $(OPUSCOMPARE_OBJS) $(TESTOPUSAPI_OBJS) \
-                $(TESTOPUSDECODE_OBJS) $(TESTOPUSENCODE_OBJS) \
-                $(TESTOPUSEXTENSIONS_OBJS) $(TESTOPUSPADDING_OBJS)
-
-.PHONY: all lib clean force check
diff --git a/celt/_kiss_fft_guts.h b/celt/_kiss_fft_guts.h
index 89ccc8039..5b1bfbcfe 100644
--- a/celt/_kiss_fft_guts.h
+++ b/celt/_kiss_fft_guts.h
@@ -102,7 +102,7 @@
 #if defined(OPUS_ARM_INLINE_EDSP)
 #include "arm/kiss_fft_armv5e.h"
 #endif
-#if defined(MIPSr1_ASM)
+#if defined(__mips_dsp)
 #include "mips/kiss_fft_mipsr1.h"
 #endif
 
diff --git a/celt/arch.h b/celt/arch.h
index a6055f403..dd095b218 100644
--- a/celt/arch.h
+++ b/celt/arch.h
@@ -121,7 +121,7 @@ void celt_fatal(const char *str, const char *file, int line)
 /* Set this if opus_int64 is a native type of the CPU. */
 /* Assume that all LP64 architectures have fast 64-bit types; also x86_64
    (which can be ILP32 for x32) and Win64 (which is LLP64). */
-#if defined(__x86_64__) || defined(__LP64__) || defined(_WIN64)
+#if defined(__x86_64__) || defined(__LP64__) || defined(_WIN64) || defined (__mips)
 #define OPUS_FAST_INT64 1
 #else
 #define OPUS_FAST_INT64 0
diff --git a/celt/celt.c b/celt/celt.c
index 2235d46b8..8ca0e0baa 100644
--- a/celt/celt.c
+++ b/celt/celt.c
@@ -54,7 +54,7 @@
 #define PACKAGE_VERSION "unknown"
 #endif
 
-#if defined(MIPSr1_ASM)
+#if defined(FIXED_POINT) && defined(__mips_dsp)
 #include "mips/celt_mipsr1.h"
 #endif
 
diff --git a/celt/fixed_generic.h b/celt/fixed_generic.h
index 743b064e9..86d345335 100644
--- a/celt/fixed_generic.h
+++ b/celt/fixed_generic.h
@@ -200,7 +200,7 @@
 /** Divide a 32-bit value by a 32-bit value. Result fits in 32 bits */
 #define DIV32(a,b) (((opus_val32)(a))/((opus_val32)(b)))
 
-#if defined(MIPSr1_ASM)
+#if defined(__mips_dsp)
 #include "mips/fixed_generic_mipsr1.h"
 #endif
 
diff --git a/celt/mdct.c b/celt/mdct.c
index f8483a2df..6812b8815 100644
--- a/celt/mdct.c
+++ b/celt/mdct.c
@@ -53,7 +53,7 @@
 #include "mathops.h"
 #include "stack_alloc.h"
 
-#if defined(MIPSr1_ASM)
+#if defined(FIXED_POINT) && defined(__mips_dsp)
 #include "mips/mdct_mipsr1.h"
 #endif
 
diff --git a/celt/mips/celt_mipsr1.h b/celt/mips/celt_mipsr1.h
index d1b25c204..7fa8d4358 100644
--- a/celt/mips/celt_mipsr1.h
+++ b/celt/mips/celt_mipsr1.h
@@ -97,18 +97,17 @@ void comb_filter(opus_val32 *y, opus_val32 *x, int T0, int T1, int N,
    {
       opus_val16 f;
       opus_val32 res;
+      long long acc;
       f = MULT16_16_Q15(window[i],window[i]);
       x0= x[i-T1+2];
 
-      asm volatile("MULT $ac1, %0, %1" : : "r" ((int)MULT16_16_Q15((Q15ONE-f),g00)), "r" ((int)x[i-T0]));
-
-      asm volatile("MADD $ac1, %0, %1" : : "r" ((int)MULT16_16_Q15((Q15ONE-f),g01)), "r" ((int)ADD32(x[i-T0-1],x[i-T0+1])));
-      asm volatile("MADD $ac1, %0, %1" : : "r" ((int)MULT16_16_Q15((Q15ONE-f),g02)), "r" ((int)ADD32(x[i-T0-2],x[i-T0+2])));
-      asm volatile("MADD $ac1, %0, %1" : : "r" ((int)MULT16_16_Q15(f,g10)), "r" ((int)x2));
-      asm volatile("MADD $ac1, %0, %1" : : "r" ((int)MULT16_16_Q15(f,g11)), "r" ((int)ADD32(x3,x1)));
-      asm volatile("MADD $ac1, %0, %1" : : "r" ((int)MULT16_16_Q15(f,g12)), "r" ((int)ADD32(x4,x0)));
-
-      asm volatile("EXTR.W %0,$ac1, %1" : "=r" (res): "i" (15));
+      acc = __builtin_mips_mult((int)MULT16_16_Q15((Q15ONE-f),g00), (int)x[i-T0]);
+      acc = __builtin_mips_madd(acc, (int)MULT16_16_Q15((Q15ONE-f),g01), (int)ADD32(x[i-T0-1],x[i-T0+1]));
+      acc = __builtin_mips_madd(acc, (int)MULT16_16_Q15((Q15ONE-f),g02), (int)ADD32(x[i-T0-2],x[i-T0+2]));
+      acc = __builtin_mips_madd(acc, (int)MULT16_16_Q15(f,g10), (int)x2);
+      acc = __builtin_mips_madd(acc, (int)MULT16_16_Q15(f,g11), (int)ADD32(x3,x1));
+      acc = __builtin_mips_madd(acc, (int)MULT16_16_Q15(f,g12), (int)ADD32(x4,x0));
+      res = __builtin_mips_extr_w(acc, 15);
 
       y[i] = x[i] + res;
 
@@ -134,13 +133,14 @@ void comb_filter(opus_val32 *y, opus_val32 *x, int T0, int T1, int N,
    for (i=overlap;i<N;i++)
    {
       opus_val32 res;
+      long long acc;
       x0=x[i-T1+2];
 
-      asm volatile("MULT $ac1, %0, %1" : : "r" ((int)g10), "r" ((int)x2));
+      acc = __builtin_mips_mult((int)g10, (int)x2);
+      acc = __builtin_mips_madd(acc, (int)g11, (int)ADD32(x3,x1));
+      acc = __builtin_mips_madd(acc, (int)g12, (int)ADD32(x4,x0));
+      res = __builtin_mips_extr_w(acc, 15);
 
-      asm volatile("MADD $ac1, %0, %1" : : "r" ((int)g11), "r" ((int)ADD32(x3,x1)));
-      asm volatile("MADD $ac1, %0, %1" : : "r" ((int)g12), "r" ((int)ADD32(x4,x0)));
-      asm volatile("EXTR.W %0,$ac1, %1" : "=r" (res): "i" (15));
       y[i] = x[i] + res;
       x4=x3;
       x3=x2;
diff --git a/celt/mips/fixed_generic_mipsr1.h b/celt/mips/fixed_generic_mipsr1.h
index 4a05efbf8..42f0e4047 100644
--- a/celt/mips/fixed_generic_mipsr1.h
+++ b/celt/mips/fixed_generic_mipsr1.h
@@ -35,92 +35,72 @@
 
 #undef MULT16_32_Q15_ADD
 static inline int MULT16_32_Q15_ADD(int a, int b, int c, int d) {
-    int m;
-    asm volatile("MULT $ac1, %0, %1" : : "r" ((int)a), "r" ((int)b));
-    asm volatile("madd $ac1, %0, %1" : : "r" ((int)c), "r" ((int)d));
-    asm volatile("EXTR.W %0,$ac1, %1" : "=r" (m): "i" (15));
-    return m;
+    long long acc = __builtin_mips_mult(a, b);
+    acc = __builtin_mips_madd(acc, c, d);
+    return __builtin_mips_extr_w(acc, 15);
 }
 
 #undef MULT16_32_Q15_SUB
 static inline int MULT16_32_Q15_SUB(int a, int b, int c, int d) {
-    int m;
-    asm volatile("MULT $ac1, %0, %1" : : "r" ((int)a), "r" ((int)b));
-    asm volatile("msub $ac1, %0, %1" : : "r" ((int)c), "r" ((int)d));
-    asm volatile("EXTR.W %0,$ac1, %1" : "=r" (m): "i" (15));
-    return m;
+    long long acc = __builtin_mips_mult(a, b);
+    acc = __builtin_mips_msub(acc, c, d);
+    return __builtin_mips_extr_w(acc, 15);
 }
 
 #undef MULT16_16_Q15_ADD
 static inline int MULT16_16_Q15_ADD(int a, int b, int c, int d) {
-    int m;
-    asm volatile("MULT $ac1, %0, %1" : : "r" ((int)a), "r" ((int)b));
-    asm volatile("madd $ac1, %0, %1" : : "r" ((int)c), "r" ((int)d));
-    asm volatile("EXTR.W %0,$ac1, %1" : "=r" (m): "i" (15));
-    return m;
+    long long acc = __builtin_mips_mult(a, b);
+    acc = __builtin_mips_madd(acc, c, d);
+    return __builtin_mips_extr_w(acc, 15);
 }
 
 #undef MULT16_16_Q15_SUB
 static inline int MULT16_16_Q15_SUB(int a, int b, int c, int d) {
-    int m;
-    asm volatile("MULT $ac1, %0, %1" : : "r" ((int)a), "r" ((int)b));
-    asm volatile("msub $ac1, %0, %1" : : "r" ((int)c), "r" ((int)d));
-    asm volatile("EXTR.W %0,$ac1, %1" : "=r" (m): "i" (15));
-    return m;
+    long long acc = __builtin_mips_mult(a, b);
+    acc = __builtin_mips_msub(acc, c, d);
+    return __builtin_mips_extr_w(acc, 15);
 }
 
 
 #undef MULT16_32_Q16
 static inline int MULT16_32_Q16(int a, int b)
 {
-    int c;
-    asm volatile("MULT $ac1,%0, %1" : : "r" (a), "r" (b));
-    asm volatile("EXTR.W %0,$ac1, %1" : "=r" (c): "i" (16));
-    return c;
+    long long acc = __builtin_mips_mult(a, b);
+    return __builtin_mips_extr_w(acc, 16);
 }
 
 #undef MULT16_32_P16
 static inline int MULT16_32_P16(int a, int b)
 {
-    int c;
-    asm volatile("MULT $ac1, %0, %1" : : "r" (a), "r" (b));
-    asm volatile("EXTR_R.W %0,$ac1, %1" : "=r" (c): "i" (16));
-    return c;
+    long long acc = __builtin_mips_mult(a, b);
+    return __builtin_mips_extr_r_w(acc, 16);
 }
 
 #undef MULT16_32_Q15
 static inline int MULT16_32_Q15(int a, int b)
 {
-    int c;
-    asm volatile("MULT $ac1, %0, %1" : : "r" (a), "r" (b));
-    asm volatile("EXTR.W %0,$ac1, %1" : "=r" (c): "i" (15));
-    return c;
+    long long acc = __builtin_mips_mult(a, b);
+    return __builtin_mips_extr_w(acc, 15);
 }
 
 #undef MULT32_32_Q31
 static inline int MULT32_32_Q31(int a, int b)
 {
-    int r;
-    asm volatile("MULT $ac1, %0, %1" : : "r" (a), "r" (b));
-    asm volatile("EXTR.W %0,$ac1, %1" : "=r" (r): "i" (31));
-    return r;
+    long long acc = __builtin_mips_mult(a, b);
+    return __builtin_mips_extr_w(acc, 31);
 }
 
 #undef PSHR32
 static inline int PSHR32(int a, int shift)
 {
-    int r;
-    asm volatile ("SHRAV_R.W %0, %1, %2" :"=r" (r): "r" (a), "r" (shift));
-    return r;
+    return __builtin_mips_shra_r_w(a, shift);
 }
 
 #undef MULT16_16_P15
 static inline int MULT16_16_P15(int a, int b)
 {
-    int r;
-    asm volatile ("mul %0, %1, %2" :"=r" (r): "r" (a), "r" (b));
-    asm volatile ("SHRA_R.W %0, %1, %2" : "+r" (r):  "0" (r), "i"(15));
-    return r;
+    int r = a * b;
+    return __builtin_mips_shra_r_w(r, 15);
 }
 
 #endif /* CELT_FIXED_GENERIC_MIPSR1_H */
diff --git a/celt/mips/kiss_fft_mipsr1.h b/celt/mips/kiss_fft_mipsr1.h
index 400ca4de9..bdb5df804 100644
--- a/celt/mips/kiss_fft_mipsr1.h
+++ b/celt/mips/kiss_fft_mipsr1.h
@@ -37,20 +37,16 @@
 
 #undef S_MUL_ADD
 static inline int S_MUL_ADD(int a, int b, int c, int d) {
-    int m;
-    asm volatile("MULT $ac1, %0, %1" : : "r" ((int)a), "r" ((int)b));
-    asm volatile("madd $ac1, %0, %1" : : "r" ((int)c), "r" ((int)d));
-    asm volatile("EXTR.W %0,$ac1, %1" : "=r" (m): "i" (15));
-    return m;
+    long long acc = __builtin_mips_mult(a, b);
+    acc = __builtin_mips_madd(acc, c, d);
+    return __builtin_mips_extr_w(acc, 15);
 }
 
 #undef S_MUL_SUB
 static inline int S_MUL_SUB(int a, int b, int c, int d) {
-    int m;
-    asm volatile("MULT $ac1, %0, %1" : : "r" ((int)a), "r" ((int)b));
-    asm volatile("msub $ac1, %0, %1" : : "r" ((int)c), "r" ((int)d));
-    asm volatile("EXTR.W %0,$ac1, %1" : "=r" (m): "i" (15));
-    return m;
+    long long acc = __builtin_mips_mult(a, b);
+    acc = __builtin_mips_msub(acc, c, d);
+    return __builtin_mips_extr_w(acc, 15);
 }
 
 #undef C_MUL
@@ -58,13 +54,12 @@ static inline int S_MUL_SUB(int a, int b, int c, int d) {
 static inline kiss_fft_cpx C_MUL_fun(kiss_fft_cpx a, kiss_twiddle_cpx b) {
     kiss_fft_cpx m;
 
-    asm volatile("MULT $ac1, %0, %1" : : "r" ((int)a.r), "r" ((int)b.r));
-    asm volatile("msub $ac1, %0, %1" : : "r" ((int)a.i), "r" ((int)b.i));
-    asm volatile("EXTR.W %0,$ac1, %1" : "=r" (m.r): "i" (15));
-    asm volatile("MULT $ac1, %0, %1" : : "r" ((int)a.r), "r" ((int)b.i));
-    asm volatile("madd $ac1, %0, %1" : : "r" ((int)a.i), "r" ((int)b.r));
-    asm volatile("EXTR.W %0,$ac1, %1" : "=r" (m.i): "i" (15));
-
+    long long acc1 = __builtin_mips_mult((int)a.r, (int)b.r);
+    long long acc2 = __builtin_mips_mult((int)a.r, (int)b.i);
+    acc1 = __builtin_mips_msub(acc1, (int)a.i, (int)b.i);
+    acc2 = __builtin_mips_madd(acc2, (int)a.i, (int)b.r);
+    m.r = __builtin_mips_extr_w(acc1, 15);
+    m.i = __builtin_mips_extr_w(acc2, 15);
     return m;
 }
 #undef C_MULC
@@ -72,13 +67,12 @@ static inline kiss_fft_cpx C_MUL_fun(kiss_fft_cpx a, kiss_twiddle_cpx b) {
 static inline kiss_fft_cpx C_MULC_fun(kiss_fft_cpx a, kiss_twiddle_cpx b) {
     kiss_fft_cpx m;
 
-    asm volatile("MULT $ac1, %0, %1" : : "r" ((int)a.r), "r" ((int)b.r));
-    asm volatile("madd $ac1, %0, %1" : : "r" ((int)a.i), "r" ((int)b.i));
-    asm volatile("EXTR.W %0,$ac1, %1" : "=r" (m.r): "i" (15));
-    asm volatile("MULT $ac1, %0, %1" : : "r" ((int)a.i), "r" ((int)b.r));
-    asm volatile("msub $ac1, %0, %1" : : "r" ((int)a.r), "r" ((int)b.i));
-    asm volatile("EXTR.W %0,$ac1, %1" : "=r" (m.i): "i" (15));
-
+    long long acc1 = __builtin_mips_mult((int)a.r, (int)b.r);
+    long long acc2 = __builtin_mips_mult((int)a.i, (int)b.r);
+    acc1 = __builtin_mips_madd(acc1, (int)a.i, (int)b.i);
+    acc2 = __builtin_mips_msub(acc2, (int)a.r, (int)b.i);
+    m.r = __builtin_mips_extr_w(acc1, 15);
+    m.i = __builtin_mips_extr_w(acc2, 15);
     return m;
 }
 
diff --git a/celt/mips/mdct_mipsr1.h b/celt/mips/mdct_mipsr1.h
index 7456c181a..c8accc093 100644
--- a/celt/mips/mdct_mipsr1.h
+++ b/celt/mips/mdct_mipsr1.h
@@ -55,10 +55,22 @@
 #include "mathops.h"
 #include "stack_alloc.h"
 
+static inline int S_MUL_ADD_PSR(int a, int b, int c, int d, int shift) {
+    long long acc = __builtin_mips_mult(a, b);
+    acc = __builtin_mips_madd(acc, c, d);
+    return __builtin_mips_extr_w(acc, 15+shift);
+}
+
+static inline int S_MUL_SUB_PSR(int a, int b, int c, int d, int shift) {
+    long long acc = __builtin_mips_mult(a, b);
+    acc = __builtin_mips_msub(acc, c, d);
+    return __builtin_mips_extr_w(acc, 15+shift);
+}
+
 /* Forward MDCT trashes the input array */
 #define OVERRIDE_clt_mdct_forward
 void clt_mdct_forward(const mdct_lookup *l, kiss_fft_scalar *in, kiss_fft_scalar * OPUS_RESTRICT out,
-      const opus_val16 *window, int overlap, int shift, int stride, int arch)
+      const celt_coef *window, int overlap, int shift, int stride, int arch)
 {
    int i;
    int N, N2, N4;
@@ -66,16 +78,15 @@ void clt_mdct_forward(const mdct_lookup *l, kiss_fft_scalar *in, kiss_fft_scalar
    VARDECL(kiss_fft_cpx, f2);
    const kiss_fft_state *st = l->kfft[shift];
    const kiss_twiddle_scalar *trig;
-   opus_val16 scale;
+   celt_coef scale;
 #ifdef FIXED_POINT
    /* Allows us to scale with MULT16_32_Q16(), which is faster than
       MULT16_32_Q15() on ARM. */
    int scale_shift = st->scale_shift-1;
+   int headroom;
 #endif
-
-    (void)arch;
-
    SAVE_STACK;
+   (void)arch;
    scale = st->scale;
 
    N = l->n;
@@ -98,8 +109,8 @@ void clt_mdct_forward(const mdct_lookup *l, kiss_fft_scalar *in, kiss_fft_scalar
       const kiss_fft_scalar * OPUS_RESTRICT xp1 = in+(overlap>>1);
       const kiss_fft_scalar * OPUS_RESTRICT xp2 = in+N2-1+(overlap>>1);
       kiss_fft_scalar * OPUS_RESTRICT yp = f;
-      const opus_val16 * OPUS_RESTRICT wp1 = window+(overlap>>1);
-      const opus_val16 * OPUS_RESTRICT wp2 = window+(overlap>>1)-1;
+      const celt_coef * OPUS_RESTRICT wp1 = window+(overlap>>1);
+      const celt_coef * OPUS_RESTRICT wp2 = window+(overlap>>1)-1;
       for(i=0;i<((overlap+3)>>2);i++)
       {
          /* Real part arranged as -d-cR, Imag part arranged as -b+aR*/
@@ -123,7 +134,7 @@ void clt_mdct_forward(const mdct_lookup *l, kiss_fft_scalar *in, kiss_fft_scalar
       for(;i<N4;i++)
       {
          /* Real part arranged as a-bR, Imag part arranged as -c-dR */
-          *yp++ =  S_MUL_SUB(*wp2, *xp2, *wp1, xp1[-N2]);
+          *yp++ = S_MUL_SUB(*wp2, *xp2, *wp1, xp1[-N2]);
           *yp++ = S_MUL_ADD(*wp2, *xp1, *wp1, xp2[N2]);
          xp1+=2;
          xp2-=2;
@@ -135,6 +146,9 @@ void clt_mdct_forward(const mdct_lookup *l, kiss_fft_scalar *in, kiss_fft_scalar
    {
       kiss_fft_scalar * OPUS_RESTRICT yp = f;
       const kiss_twiddle_scalar *t = &trig[0];
+#ifdef FIXED_POINT
+      opus_val32 maxval=1;
+#endif
       for(i=0;i<N4;i++)
       {
          kiss_fft_cpx yc;
@@ -144,20 +158,29 @@ void clt_mdct_forward(const mdct_lookup *l, kiss_fft_scalar *in, kiss_fft_scalar
          t1 = t[N4+i];
          re = *yp++;
          im = *yp++;
-
          yr = S_MUL_SUB(re,t0,im,t1);
          yi = S_MUL_ADD(im,t0,re,t1);
-
+         /* For QEXT, it's best to scale before the FFT, but otherwise it's best to scale after.
+            For floating-point it doesn't matter. */
+#ifdef ENABLE_QEXT
          yc.r = yr;
          yc.i = yi;
-         yc.r = PSHR32(MULT16_32_Q16(scale, yc.r), scale_shift);
-         yc.i = PSHR32(MULT16_32_Q16(scale, yc.i), scale_shift);
+#else
+         yc.r = S_MUL2(yr, scale);
+         yc.i = S_MUL2(yi, scale);
+#endif
+#ifdef FIXED_POINT
+         maxval = MAX32(maxval, MAX32(ABS32(yc.r), ABS32(yc.i)));
+#endif
          f2[st->bitrev[i]] = yc;
       }
+#ifdef FIXED_POINT
+      headroom = IMAX(0, IMIN(scale_shift, 28-celt_ilog2(maxval)));
+#endif
    }
 
    /* N/4 complex FFT, does not downscale anymore */
-   opus_fft_impl(st, f2);
+   opus_fft_impl(st, f2 ARG_FIXED(scale_shift-headroom));
 
    /* Post-rotate */
    {
@@ -170,8 +193,16 @@ void clt_mdct_forward(const mdct_lookup *l, kiss_fft_scalar *in, kiss_fft_scalar
       for(i=0;i<N4;i++)
       {
          kiss_fft_scalar yr, yi;
-         yr = S_MUL_SUB(fp->i,t[N4+i] , fp->r,t[i]);
-         yi = S_MUL_ADD(fp->r,t[N4+i] ,fp->i,t[i]);
+         kiss_fft_scalar t0, t1;
+#ifdef ENABLE_QEXT
+         t0 = S_MUL2(t[i], scale);
+         t1 = S_MUL2(t[N4+i], scale);
+#else
+         t0 = t[i];
+         t1 = t[N4+i];
+#endif
+         yr = S_MUL_SUB_PSR(fp->i,t1 , fp->r,t0, headroom);
+         yi = S_MUL_ADD_PSR(fp->r,t1 , fp->i,t0, headroom);
          *yp1 = yr;
          *yp2 = yi;
          fp++;
@@ -184,13 +215,15 @@ void clt_mdct_forward(const mdct_lookup *l, kiss_fft_scalar *in, kiss_fft_scalar
 
 #define OVERRIDE_clt_mdct_backward
 void clt_mdct_backward(const mdct_lookup *l, kiss_fft_scalar *in, kiss_fft_scalar * OPUS_RESTRICT out,
-      const opus_val16 * OPUS_RESTRICT window, int overlap, int shift, int stride, int arch)
+      const celt_coef * OPUS_RESTRICT window, int overlap, int shift, int stride, int arch)
 {
    int i;
    int N, N2, N4;
    const kiss_twiddle_scalar *trig;
-
-    (void)arch;
+#ifdef FIXED_POINT
+   int pre_shift, post_shift, fft_shift;
+#endif
+   (void) arch;
 
    N = l->n;
    trig = l->trig;
@@ -202,6 +235,21 @@ void clt_mdct_backward(const mdct_lookup *l, kiss_fft_scalar *in, kiss_fft_scala
    N2 = N>>1;
    N4 = N>>2;
 
+#ifdef FIXED_POINT
+   {
+      opus_val32 sumval=N2;
+      opus_val32 maxval=0;
+      for (i=0;i<N2;i++) {
+         maxval = MAX32(maxval, ABS32(in[i*stride]));
+         sumval = ADD32_ovflw(sumval, ABS32(SHR32(in[i*stride],11)));
+      }
+      pre_shift = IMAX(0, 29-celt_zlog2(1+maxval));
+      /* Worst-case where all the energy goes to a single sample. */
+      post_shift = IMAX(0, 19-celt_ilog2(ABS32(sumval)));
+      post_shift = IMIN(post_shift, pre_shift);
+      fft_shift = pre_shift - post_shift;
+   }
+#endif
    /* Pre-rotate */
    {
       /* Temp pointers to make it really clear to the compiler what we're doing */
@@ -214,9 +262,12 @@ void clt_mdct_backward(const mdct_lookup *l, kiss_fft_scalar *in, kiss_fft_scala
       {
          int rev;
          kiss_fft_scalar yr, yi;
+         opus_val32 x1, x2;
          rev = *bitrev++;
-         yr = S_MUL_ADD(*xp2, t[i] , *xp1, t[N4+i]);
-         yi = S_MUL_SUB(*xp1, t[i] , *xp2, t[N4+i]);
+         x1 = SHL32_ovflw(*xp1, pre_shift);
+         x2 = SHL32_ovflw(*xp2, pre_shift);
+         yr = S_MUL_ADD(x2,t[i] , x1,t[N4+i]);
+         yi = S_MUL_SUB(x1,t[i] , x2,t[N4+i]);
          /* We swap real and imag because we use an FFT instead of an IFFT. */
          yp[2*rev+1] = yr;
          yp[2*rev] = yi;
@@ -226,13 +277,13 @@ void clt_mdct_backward(const mdct_lookup *l, kiss_fft_scalar *in, kiss_fft_scala
       }
    }
 
-   opus_fft_impl(l->kfft[shift], (kiss_fft_cpx*)(out+(overlap>>1)));
+   opus_fft_impl(l->kfft[shift], (kiss_fft_cpx*)(out+(overlap>>1)) ARG_FIXED(fft_shift));
 
    /* Post-rotate and de-shuffle from both ends of the buffer at once to make
       it in-place. */
    {
-      kiss_fft_scalar * OPUS_RESTRICT yp0 = out+(overlap>>1);
-      kiss_fft_scalar * OPUS_RESTRICT yp1 = out+(overlap>>1)+N2-2;
+      kiss_fft_scalar * yp0 = out+(overlap>>1);
+      kiss_fft_scalar * yp1 = out+(overlap>>1)+N2-2;
       const kiss_twiddle_scalar *t = &trig[0];
       /* Loop to (N4+1)>>1 to handle odd N4. When N4 is odd, the
          middle pair will be computed twice. */
@@ -246,8 +297,8 @@ void clt_mdct_backward(const mdct_lookup *l, kiss_fft_scalar *in, kiss_fft_scala
          t0 = t[i];
          t1 = t[N4+i];
          /* We'd scale up by 2 here, but instead it's done when mixing the windows */
-         yr = S_MUL_ADD(re,t0 , im,t1);
-         yi = S_MUL_SUB(re,t1 , im,t0);
+         yr = S_MUL_ADD_PSR(re,t0 , im,t1, post_shift);
+         yi = S_MUL_SUB_PSR(re,t1 , im,t0, post_shift);
          /* We swap real and imag because we're using an FFT instead of an IFFT. */
          re = yp1[1];
          im = yp1[0];
@@ -257,8 +308,8 @@ void clt_mdct_backward(const mdct_lookup *l, kiss_fft_scalar *in, kiss_fft_scala
          t0 = t[(N4-i-1)];
          t1 = t[(N2-i-1)];
          /* We'd scale up by 2 here, but instead it's done when mixing the windows */
-         yr = S_MUL_ADD(re,t0,im,t1);
-         yi = S_MUL_SUB(re,t1,im,t0);
+         yr = S_MUL_ADD_PSR(re,t0,im,t1, post_shift);
+         yi = S_MUL_SUB_PSR(re,t1,im,t0, post_shift);
          yp1[0] = yr;
          yp0[1] = yi;
          yp0 += 2;
@@ -270,16 +321,16 @@ void clt_mdct_backward(const mdct_lookup *l, kiss_fft_scalar *in, kiss_fft_scala
    {
       kiss_fft_scalar * OPUS_RESTRICT xp1 = out+overlap-1;
       kiss_fft_scalar * OPUS_RESTRICT yp1 = out;
-      const opus_val16 * OPUS_RESTRICT wp1 = window;
-      const opus_val16 * OPUS_RESTRICT wp2 = window+overlap-1;
+      const celt_coef * OPUS_RESTRICT wp1 = window;
+      const celt_coef * OPUS_RESTRICT wp2 = window+overlap-1;
 
       for(i = 0; i < overlap/2; i++)
       {
          kiss_fft_scalar x1, x2;
          x1 = *xp1;
          x2 = *yp1;
-         *yp1++ = MULT16_32_Q15(*wp2, x2) - MULT16_32_Q15(*wp1, x1);
-         *xp1-- = MULT16_32_Q15(*wp1, x2) + MULT16_32_Q15(*wp2, x1);
+         *yp1++ = S_MUL_SUB(x2, *wp2, x1, *wp1);
+         *xp1-- = S_MUL_ADD(x2, *wp1, x1, *wp2);
          wp1++;
          wp2--;
       }
diff --git a/celt/mips/pitch_mipsr1.h b/celt/mips/pitch_mipsr1.h
index a9500aff5..6cbdd78d3 100644
--- a/celt/mips/pitch_mipsr1.h
+++ b/celt/mips/pitch_mipsr1.h
@@ -39,26 +39,22 @@ static inline void dual_inner_prod(const opus_val16 *x, const opus_val16 *y01, c
       int N, opus_val32 *xy1, opus_val32 *xy2, int arch)
 {
    int j;
-   opus_val32 xy01=0;
-   opus_val32 xy02=0;
+   long long acc1 = 0;
+   long long acc2 = 0;
 
    (void)arch;
 
-   asm volatile("MULT $ac1, $0, $0");
-   asm volatile("MULT $ac2, $0, $0");
    /* Compute the norm of X+Y and X-Y as |X|^2 + |Y|^2 +/- sum(xy) */
-   for (j=0;j<N;j++)
+   for (j=0;j<N;j+=2)
    {
-      asm volatile("MADD $ac1, %0, %1" : : "r" ((int)x[j]), "r" ((int)y01[j]));
-      asm volatile("MADD $ac2, %0, %1" : : "r" ((int)x[j]), "r" ((int)y02[j]));
-      ++j;
-      asm volatile("MADD $ac1, %0, %1" : : "r" ((int)x[j]), "r" ((int)y01[j]));
-      asm volatile("MADD $ac2, %0, %1" : : "r" ((int)x[j]), "r" ((int)y02[j]));
+       acc1 = __builtin_mips_madd(acc1, (int)x[j],   (int)y01[j]);
+       acc2 = __builtin_mips_madd(acc2, (int)x[j],   (int)y02[j]);
+       acc1 = __builtin_mips_madd(acc1, (int)x[j+1], (int)y01[j+1]);
+       acc2 = __builtin_mips_madd(acc2, (int)x[j+1], (int)y02[j+1]);
    }
-   asm volatile ("mflo %0, $ac1": "=r"(xy01));
-   asm volatile ("mflo %0, $ac2": "=r"(xy02));
-   *xy1 = xy01;
-   *xy2 = xy02;
+
+   *xy1 = (opus_val32)acc1;
+   *xy2 = (opus_val32)acc2;
 }
 
 static inline void xcorr_kernel_mips(const opus_val16 * x,
diff --git a/celt/mips/vq_mipsr1.h b/celt/mips/vq_mipsr1.h
index 1621c5624..009c3ef3e 100644
--- a/celt/mips/vq_mipsr1.h
+++ b/celt/mips/vq_mipsr1.h
@@ -64,13 +64,14 @@ static void exp_rotation1(celt_norm *X, int len, int stride, opus_val16 c, opus_
 }
 
 #define OVERRIDE_renormalise_vector
-void renormalise_vector(celt_norm *X, int N, opus_val16 gain, int arch)
+void renormalise_vector(celt_norm *X, int N, opus_val32 gain, int arch)
 {
    int i;
 #ifdef FIXED_POINT
    int k;
 #endif
-   opus_val32 E = EPSILON;
+   long long acc = EPSILON;
+   opus_val32 E;
    opus_val16 g;
    opus_val32 t;
    celt_norm *xptr = X;
@@ -78,31 +79,28 @@ void renormalise_vector(celt_norm *X, int N, opus_val16 gain, int arch)
 
    (void)arch;
 
-   asm volatile("mult $ac1, $0, $0");
-   asm volatile("MTLO %0, $ac1" : :"r" (E));
    /*if(N %4)
        printf("error");*/
    for (i=0;i<N-2;i+=2)
    {
       X0 = (int)*xptr++;
-      asm volatile("MADD $ac1, %0, %1" : : "r" (X0), "r" (X0));
-
       X1 = (int)*xptr++;
-      asm volatile("MADD $ac1, %0, %1" : : "r" (X1), "r" (X1));
+      acc = __builtin_mips_madd(acc, X0, X0);
+      acc = __builtin_mips_madd(acc, X1, X1);
    }
 
    for (;i<N;i++)
    {
       X0 = (int)*xptr++;
-      asm volatile("MADD $ac1, %0, %1" : : "r" (X0), "r" (X0));
+      acc = __builtin_mips_madd(acc, X0, X0);
    }
 
-   asm volatile("MFLO %0, $ac1" : "=r" (E));
+   E = (opus_val32)acc;
 #ifdef FIXED_POINT
    k = celt_ilog2(E)>>1;
 #endif
    t = VSHR32(E, 2*(k-7));
-   g = MULT16_16_P15(celt_rsqrt_norm(t),gain);
+   g = MULT32_32_Q31(celt_rsqrt_norm(t),gain);
 
    xptr = X;
    for (i=0;i<N;i++)
diff --git a/celt/pitch.h b/celt/pitch.h
index dd0e2bebd..25c0ad379 100644
--- a/celt/pitch.h
+++ b/celt/pitch.h
@@ -42,7 +42,7 @@
 #include "x86/pitch_sse.h"
 #endif
 
-#if defined(MIPSr1_ASM)
+#if defined(FIXED_POINT) && defined(__mips_dsp)
 #include "mips/pitch_mipsr1.h"
 #endif
 
diff --git a/celt/vq.c b/celt/vq.c
index df8754d9d..e49054303 100644
--- a/celt/vq.c
+++ b/celt/vq.c
@@ -39,7 +39,7 @@
 #include "rate.h"
 #include "pitch.h"
 
-#if defined(MIPSr1_ASM)
+#if defined(FIXED_POINT) && defined(__mips_dsp)
 #include "mips/vq_mipsr1.h"
 #endif
 
diff --git a/silk/NSQ_del_dec.c b/silk/NSQ_del_dec.c
index e8dadf159..1ec177446 100644
--- a/silk/NSQ_del_dec.c
+++ b/silk/NSQ_del_dec.c
@@ -61,7 +61,7 @@ typedef struct {
 
 typedef NSQ_sample_struct  NSQ_sample_pair[ 2 ];
 
-#if defined(MIPSr1_ASM)
+#if defined(FIXED_POINT) && defined(__mips_dsp)
 #include "mips/NSQ_del_dec_mipsr1.h"
 #endif
 static OPUS_INLINE void silk_nsq_del_dec_scale_states(
diff --git a/silk/SigProc_FIX.h b/silk/SigProc_FIX.h
index 2ac0d3451..49a70a8e9 100644
--- a/silk/SigProc_FIX.h
+++ b/silk/SigProc_FIX.h
@@ -631,7 +631,7 @@ static OPUS_INLINE opus_int64 silk_max_64(opus_int64 a, opus_int64 b)
 #include "arm/SigProc_FIX_armv5e.h"
 #endif
 
-#if defined(MIPSr1_ASM)
+#if defined(FIXED_POINT) && defined(__mips_dsp)
 #include "mips/sigproc_fix_mipsr1.h"
 #endif
 
diff --git a/silk/fixed/noise_shape_analysis_FIX.c b/silk/fixed/noise_shape_analysis_FIX.c
index 85fea0bf0..a8504e263 100644
--- a/silk/fixed/noise_shape_analysis_FIX.c
+++ b/silk/fixed/noise_shape_analysis_FIX.c
@@ -128,8 +128,8 @@ static OPUS_INLINE void limit_warped_coefs(
     silk_assert( 0 );
 }
 
-/* Disable MIPS version until it's updated. */
-#if 0 && defined(MIPSr1_ASM)
+/* Disable MIPS DSP version until it's updated. */
+#if 0 && defined(__mips_dsp)
 #include "mips/noise_shape_analysis_FIX_mipsr1.h"
 #endif
 
diff --git a/silk/fixed/warped_autocorrelation_FIX.c b/silk/fixed/warped_autocorrelation_FIX.c
index 5c79553bc..8caf0afb1 100644
--- a/silk/fixed/warped_autocorrelation_FIX.c
+++ b/silk/fixed/warped_autocorrelation_FIX.c
@@ -31,7 +31,7 @@ POSSIBILITY OF SUCH DAMAGE.
 
 #include "main_FIX.h"
 
-#if defined(MIPSr1_ASM)
+#if defined(__mips_dsp)
 #include "mips/warped_autocorrelation_FIX_mipsr1.h"
 #endif
 
diff --git a/silk/macros.h b/silk/macros.h
index 667b48d3a..7d3f3a28a 100644
--- a/silk/macros.h
+++ b/silk/macros.h
@@ -104,7 +104,7 @@ POSSIBILITY OF SUCH DAMAGE.
                                         (( (a) & ((b)^0x80000000) & 0x80000000) ? silk_int32_MIN : (a)-(b)) :    \
                                         ((((a)^0x80000000) & (b)  & 0x80000000) ? silk_int32_MAX : (a)-(b)) )
 
-#if defined(MIPSr1_ASM)
+#if defined(FIXED_POINT) && defined(__mips_dsp)
 #include "mips/macros_mipsr1.h"
 #endif
 
diff --git a/silk/mips/macros_mipsr1.h b/silk/mips/macros_mipsr1.h
index af408802c..0393a33fa 100644
--- a/silk/mips/macros_mipsr1.h
+++ b/silk/mips/macros_mipsr1.h
@@ -29,7 +29,10 @@ POSSIBILITY OF SUCH DAMAGE.
 #ifndef SILK_MACROS_MIPSR1_H__
 #define SILK_MACROS_MIPSR1_H__
 
-#define mips_clz(x) __builtin_clz(x)
+static inline int mips_clz(opus_uint32 x)
+{
+    return x ? __builtin_clz(x) : 32;
+}
 
 #undef silk_SMULWB
 static inline int silk_SMULWB(int a, int b)
@@ -75,9 +78,9 @@ static inline int silk_SMLAWW(int a, int b, int c)
 static inline opus_int32 silk_CLZ16(opus_int16 in16)
 {
     int re32;
-    opus_int32 in32 = (opus_int32 )in16;
+    opus_uint32 in32 = (opus_uint16)in16;
     re32 = mips_clz(in32);
-    re32-=16;
+    re32 -= 16;
     return re32;
 }