From 902b2a0ef384b6f21ece46a3e9c3ac67386f3230 Mon Sep 17 00:00:00 2001
From: Thomas Lively <tlively@google.com>
Date: Thu, 4 Apr 2019 15:49:58 -0700
Subject: [PATCH 01/16] Initial commit of intrinsics and test

---
 system/include/simd128.h          |  646 ++++++++++++++
 tests/test_core.py                |    8 +
 tests/test_wasm_intrinsics_simd.c | 1345 +++++++++++++++++++++++++++++
 3 files changed, 1999 insertions(+)
 create mode 100644 system/include/simd128.h
 create mode 100644 tests/test_wasm_intrinsics_simd.c

diff --git a/system/include/simd128.h b/system/include/simd128.h
new file mode 100644
index 0000000000000..0533644300fe7
--- /dev/null
+++ b/system/include/simd128.h
@@ -0,0 +1,646 @@
+/*
+Proposed WebAssembly SIMD Instructions 1 April 2019
+*/
+#include <stdint.h>  //rrw where to get uintx_t's from?
+typedef int8_t v128 __attribute__((__vector_size__(16)));
+typedef int8_t i8x16 __attribute__((__vector_size__(16)));
+typedef uint8_t u8x16 __attribute__((__vector_size__(16)));
+typedef int16_t i16x8 __attribute__((__vector_size__(16)));
+typedef uint16_t u16x8 __attribute__((__vector_size__(16)));
+typedef int32_t i32x4 __attribute__((__vector_size__(16)));
+typedef uint32_t u32x4 __attribute__((__vector_size__(16)));
+typedef int64_t i64x2 __attribute__((__vector_size__(16)));
+typedef uint64_t u64x2 __attribute__((__vector_size__(16)));
+typedef float f32x4 __attribute__((__vector_size__(16)));
+typedef double f64x2 __attribute__((__vector_size__(16)));
+
+#define __DEFAULT_FN_VIS_ATTRS \
+  __attribute__((used)) __attribute__((visibility("default")))
+#define __DEFAULT_FN_ATTRS __attribute__((__always_inline__, __nodebug__))
+
+// v128 wasm_v128_constant(...)
+static __inline__ v128 __DEFAULT_FN_ATTRS wasm_v128_const(int8_t c15,
+  int8_t c14, int8_t c13, int8_t c12, int8_t c11, int8_t c10,
+  int8_t c9,  int8_t c8,  int8_t c7,  int8_t c6,  int8_t c5,
+  int8_t c4,  int8_t c3,  int8_t c2,  int8_t c1,  int8_t c0) {
+  return __extension__(i8x16){c0, c1, c2,  c3,  c4,  c5,  c6,  c7,
+                              c8, c9, c10, c11, c12, c13, c14, c15};
+}
+
+// v128 wasm_v128_load(v128* mem)
+static __inline__ i8x16 __DEFAULT_FN_ATTRS wasm_v128_load(i8x16* mem) {
+  return __extension__(i8x16)(*mem);
+}
+
+// wasm_v128_store(v128 *mem, v128 a)
+static __inline__ void __DEFAULT_FN_ATTRS wasm_v128_store(v128* mem, v128 a) {
+  *(i8x16*)mem = a;
+  return;
+}
+
+// i8x16 wasm_i8x16_splat(int8_t a)
+static __inline__ i8x16 __DEFAULT_FN_ATTRS wasm_i8x16_splat(int8_t a) {
+  return __extension__(i8x16){a, a, a, a, a, a, a, a, a, a, a, a, a, a, a, a};
+}
+
+// i16x8 wasm_i16x8_splat(int16_t a)
+static __inline__ i16x8 __DEFAULT_FN_ATTRS wasm_i16x8_splat(int16_t a) {
+  return __extension__(i16x8){a, a, a, a, a, a, a, a};
+}
+
+// i32x4 wasm_i32x4_splat(int32_t a)
+static __inline__ i32x4 __DEFAULT_FN_ATTRS wasm_i32x4_splat(int32_t a) {
+  return __extension__(i32x4){a, a, a, a};
+}
+
+// i64x2 wasm_i64x2_splat(int64_t a)
+static __inline__ i64x2 __DEFAULT_FN_ATTRS wasm_i64x2_splat(int64_t a) {
+  return __extension__(i64x2){a, a};
+}
+
+// f32x4 wasm_f32x4_splat(float a)
+static __inline__ f32x4 __DEFAULT_FN_ATTRS wasm_f32x4_splat(float a) {
+  return __extension__(f32x4){a, a, a, a};
+}
+
+// f64x2 wasm_f64x2_splat(double a)
+static __inline__ f64x2 __DEFAULT_FN_ATTRS wasm_f64x2_splat(double a) {
+  return __extension__(f64x2){a, a};
+}
+
+// int8_t wasm_i8x16_extract_lane(i8x16 a, imm)
+#define wasm_i8x16_extract_lane(a, b) \
+  (__builtin_wasm_extract_lane_s_i8x16(a, b))
+
+// int8_t wasm_u8x16_extract_lane(u8x16 a, imm)
+#define wasm_u8x16_extract_lane(a, b) \
+  (__builtin_wasm_extract_lane_u_i8x16(a, b))
+
+// int16_t wasm_i16x8_extract_lane(i16x8 a, imm)
+#define wasm_i16x8_extract_lane(a, b) \
+  (__builtin_wasm_extract_lane_s_i16x8(a, b))
+
+#ifdef __wasm_unimplemented_simd128__
+// int16_t wasm_u16x8_extract_lane(u16x8 a, imm)
+#define wasm_u16x8_extract_lane(a, b) \
+  (__builtin_wasm_extract_lane_u_i16x8(a, b))
+#endif
+
+// int32_t wasm_i32x4_extract_lane(i32x4 a, imm)
+#define wasm_i32x4_extract_lane(a, b) (__builtin_wasm_extract_lane_i32x4(a, b))
+
+// int64_t wasm_i64x2_extract_lane(i8x16, imm)
+#define wasm_i64x2_extract_lane(a, b) (__builtin_wasm_extract_lane_i64x2(a, b))
+
+// float wasm_f32x4_extract_lane(f32x4, imm)
+#define wasm_f32x4_extract_lane(a, b) (__builtin_wasm_extract_lane_f32x4(a, b))
+
+// double __builtin_wasm_extract_lane_f64x2(f64x2, imm)
+#ifdef __wasm_undefined_simd128__
+#define wasm_f64x2_extract_lane(a, b) (__builtin_wasm_extract_lane_f64x2(a, b))
+#endif
+
+// i8x16 wasm_i8x16_replace_lane(i8x16 a, imm i, int8_t b)
+#define wasm_i8x16_replace_lane(a, i, b) \
+  (__builtin_wasm_replace_lane_i8x16(a, i, b))
+
+// i16x8 wasm_i16x8_replace_lane(i16x8 a, imm i, int16_t b)
+#define wasm_i16x8_replace_lane(a, i, b) \
+  (__builtin_wasm_replace_lane_i16x8(a, i, b))
+
+// i32x4 wasm_i32x4_replace_lane(i32x4 a, imm i, int32_t b)
+#define wasm_i32x4_replace_lane(a, i, b) \
+  (__builtin_wasm_replace_lane_i32x4(a, i, b))
+
+// i8x16 wasm_i64x2_replace_lane(i8x16 a, imm i, int64_t b)
+#define wasm_i64x2_replace_lane(a, i, b) \
+  (__builtin_wasm_replace_lane_i64x2(a, i, b))
+
+// f32x4 wasm_f32x4_replace_lane(f32x4 a, imm i, float b)
+#define wasm_f32x4_replace_lane(a, i, b) \
+  (__builtin_wasm_replace_lane_f32x4(a, i, b))
+
+#ifdef __wasm_unimplemented_simd128__
+// f64x2 wasm_f64x4_replace_lane(f64x2 a, imm i, double b)
+#define wasm_f64x2_replace_lane(a, i, b) \
+  (__builtin_wasm_replace_lane_f64x2(a, i, b))
+#endif
+
+// i8x16 wasm_i8x16_add(i8x16 a i8x16 b)
+static __inline__ i8x16 __DEFAULT_FN_ATTRS wasm_i8x16_add(i8x16 a, i8x16 b) {
+  return __extension__(i8x16){a + b};
+}
+
+// i16x8 wasm_i16x8_add(i16x8 a i16x8 b)
+static __inline__ i16x8 __DEFAULT_FN_ATTRS wasm_i16x8_add(i16x8 a, i16x8 b) {
+  return __extension__(i16x8){a + b};
+}
+
+// i32x4 wasm_i32x4_add(i32x4 a i32x4 b)
+static __inline__ i32x4 __DEFAULT_FN_ATTRS wasm_i32x4_add(i32x4 a, i32x4 b) {
+  return __extension__(i32x4){a + b};
+}
+
+// i8x16 wasm_i64x2_add(i8x16 a i8x16 b)
+static __inline__ i64x2 __DEFAULT_FN_ATTRS wasm_i64x2_add(i64x2 a, i64x2 b) {
+  return __extension__(i64x2){a + b};
+}
+
+// f32x4 wasm_f32x4_add(f32x4 a f32x4 b)
+static __inline__ f32x4 __DEFAULT_FN_ATTRS wasm_f32x4_add(f32x4 a, f32x4 b) {
+  return __extension__(f32x4){a + b};
+}
+
+// i8x16 wasm_i8x16_sub(i8x16 a, i8x16 b)
+static __inline__ i8x16 __DEFAULT_FN_ATTRS wasm_i8x16_sub(i8x16 a, i8x16 b) {
+  return __extension__(i8x16){a - b};
+}
+
+// i16x8 wasm_i16x8_sub(i16x8 a i16x8 b)
+static __inline__ i16x8 __DEFAULT_FN_ATTRS wasm_i16x8_sub(i16x8 a, i16x8 b) {
+  return __extension__(i16x8){a - b};
+}
+
+// i32x4 wasm_i32x4_sub(i32x4 a i32x4 b)
+static __inline__ i32x4 __DEFAULT_FN_ATTRS wasm_i32x4_sub(i32x4 a, i32x4 b) {
+  return __extension__(i32x4){a - b};
+}
+
+// i64x2 wasm_i64x2_sub(i64x2 a i64x2 b)
+static __inline__ i64x2 __DEFAULT_FN_ATTRS wasm_i64x2_sub(i64x2 a, i64x2 b) {
+  return __extension__(i64x2){a - b};
+}
+
+// f32x4 wasm_f32x4_sub(f32x4 a f32x4 b)
+static __inline__ f32x4 __DEFAULT_FN_ATTRS wasm_f32x4_sub(f32x4 a, f32x4 b) {
+  return __extension__(f32x4){a - b};
+}
+
+// i8x16 wasm_i8x16_mul(i8x16 a i8x16 b)
+static __inline__ i8x16 __DEFAULT_FN_ATTRS wasm_i8x16_mul(i8x16 a, i8x16 b) {
+  return __extension__(i8x16){a * b};
+}
+
+// i16x8 wasm_i16x8_mul(i16x8 a i16x8 b)
+static __inline__ i16x8 __DEFAULT_FN_ATTRS wasm_i16x8_mul(i16x8 a, i16x8 b) {
+  return __extension__(i16x8){a * b};
+}
+
+// i32x4 wasm_i32x4_mul(i32x4 a i32x4 b)
+static __inline__ i32x4 __DEFAULT_FN_ATTRS wasm_i32x4_mul(i32x4 a, i32x4 b) {
+  return __extension__(i32x4){a * b};
+}
+
+// i64x2 wasm_i64x2_mul(i64x2 a i64x2 b)
+static __inline__ i64x2 __DEFAULT_FN_ATTRS wasm_i64x2_mul(i64x2 a, i64x2 b) {
+  return __extension__(i64x2){a * b};
+}
+
+// f32x4 wasm_f32x4_mul(f32x4 a f32x4 b)
+static __inline__ f32x4 __DEFAULT_FN_ATTRS wasm_f32x4_mul(f32x4 a, f32x4 b) {
+  return __extension__(f32x4){a * b};
+}
+
+// i8x16 wasm_i8x16_neg(i8x16 a)
+static __inline__ i8x16 __DEFAULT_FN_ATTRS wasm_i8x16_neg(i8x16 a) {
+  return __extension__(i8x16){-a};
+}
+
+// i16x8 wasm_i16x8_neg(i16x8 a)
+static __inline__ i16x8 __DEFAULT_FN_ATTRS wasm_i16x8_neg(i16x8 a) {
+  return __extension__(i16x8){-a};
+}
+
+// i32x4 wasm_i32x4_neg(i32x4 a)
+static __inline__ i32x4 __DEFAULT_FN_ATTRS wasm_i32x4_neg(i32x4 a) {
+  return __extension__(i32x4){-a};
+}
+
+// i64x2 wasm_i64x2_neg(i64x2 a)
+static __inline__ i64x2 __DEFAULT_FN_ATTRS wasm_i64x2_neg(i64x2 a) {
+  return __extension__(i64x2){-a};
+}
+
+// f32x4 wasm_f32x4_neg(f32x4 a)
+static __inline__ f32x4 __DEFAULT_FN_ATTRS wasm_f32x4_neg(f32x4 a) {
+  return __extension__(f32x4){-a};
+}
+
+// f64x2 wasm_f64x2_neg(f64x2 a)
+static __inline__ f64x2 __DEFAULT_FN_ATTRS wasm_f64x2_neg(f64x2 a) {
+  return __extension__(f64x2){-a};
+}
+
+// i8x16 wasm_add_saturate(i8x16 a, i8x16 b)
+#define wasm_i8x16_add_saturate(a, b) \
+  (__builtin_wasm_add_saturate_s_i8x16(a, b))
+
+// u8x16 wasm_add_saturate(u8x16 a, u8x16 b)
+#define wasm_u8x16_add_saturate(a, b) \
+  (__builtin_wasm_add_saturate_u_i8x16(a, b))
+
+// i16x8 wasm_add_saturate(i16x8 a, i16x8 b)
+#define wasm_i16x8_add_saturate(a, b) \
+  (__builtin_wasm_add_saturate_s_i16x8(a, b))
+
+// u16x8 wasm_add_saturate(u16x8 a, u16x8 b)
+#define wasm_u16x8_add_saturate(a, b) \
+  (__builtin_wasm_add_saturate_u_i16x8(a, b))
+
+// i8x16 wasm_sub_saturate(i8x16 a, i8x16 b)
+#define wasm_i8x16_sub_saturate(a, b) \
+  (__builtin_wasm_sub_saturate_s_i8x16(a, b))
+
+// u8x16 wasm_sub_saturate(u8x16 a, u8x16 b)
+#define wasm_u8x16_sub_saturate(a, b) \
+  (__builtin_wasm_sub_saturate_u_i8x16(a, b))
+
+// i16x8 wasm_sub_saturate(i16x8 a, i16x8 b)
+#define wasm_i16x8_sub_saturate(a, b) \
+  (__builtin_wasm_sub_saturate_s_i16x8(a, b))
+
+// u16x8 wasm_sub_saturate(u16x8 a, u16x8 b)
+#define wasm_u16x8_sub_saturate(a, b) \
+  (__builtin_wasm_sub_saturate_u_i16x8(a, b))
+
+// i8x16 wasm_i8x16_shl(i8x16 a, int32_t b)
+static __inline__ i8x16 __DEFAULT_FN_ATTRS wasm_i8x16_shl(i8x16 a, int32_t b) {
+  return __extension__(i8x16){a << b};
+}
+
+// i16x8 wasm_i16x8_shl(i16x8 a, int32_t b)
+static __inline__ i16x8 __DEFAULT_FN_ATTRS wasm_i16x8_shl(i16x8 a, int32_t b) {
+  return __extension__(i16x8){a << b};
+}
+
+// i32x4 wasm_i32x4_shl(i32x4 a, int32_t b)
+static __inline__ i32x4 __DEFAULT_FN_ATTRS wasm_i32x4_shl(i32x4 a, int32_t b) {
+  return __extension__(i32x4){a << b};
+}
+
+// i64x2 wasm_i64x2_shl(i64x2 a, int32_t b)
+static __inline__ i64x2 __DEFAULT_FN_ATTRS wasm_i64x2_shl(i64x2 a, int32_t b) {
+  return __extension__(i64x2){a << b};
+}
+
+// i8x16 wasm_i8x64_shr(i8x16 a, int32_t b)
+static __inline__ i8x16 __DEFAULT_FN_ATTRS wasm_i8x16_shr(i8x16 a, int32_t b) {
+  return __extension__(i8x16){a >> b};
+}
+
+// u8x16 wasm_u8x16_shr(u8x16 a int32_t b)
+static __inline__ u8x16 __DEFAULT_FN_ATTRS wasm_u8x16_shr(u8x16 a, int32_t b) {
+  return __extension__(u8x16){a >> b};
+}
+
+// i16x8 wasm_i16x8_shr(i16x8 a, int32_t b)
+static __inline__ i16x8 __DEFAULT_FN_ATTRS wasm_i16x8_shr(i16x8 a, int32_t b) {
+  return __extension__(i16x8){a >> b};
+}
+
+// u16x8 wasm_u16x8_shr(u16x8 a, int32_t b)
+static __inline__ u16x8 __DEFAULT_FN_ATTRS wasm_u16x8_shr(u16x8 a, int32_t b) {
+  return __extension__(u16x8){a >> b};
+}
+
+// i32x4 wasm_i32x4_shr(i32x4 a, int32_t b)
+static __inline__ i32x4 __DEFAULT_FN_ATTRS wasm_i32x4_shr(i32x4 a, int32_t b) {
+  return __extension__(i32x4){a >> b};
+}
+
+// u32x4 wasm_u32x4_shr(u32x4 a, int32_t b)
+static __inline__ u32x4 __DEFAULT_FN_ATTRS wasm_u32x4_shr(u32x4 a, int32_t b) {
+  return __extension__(u32x4){a >> b};
+}
+
+// i64x2 wasm_i64x2_shr(i64x2 a, int32_t b)
+static __inline__ i64x2 __DEFAULT_FN_ATTRS wasm_i64x2_shr(i8x16 a, int32_t b) {
+  return __extension__(i64x2){a >> b};
+}
+
+// u64x2 wasm_u64x2_shr_u(u64x2 a, int32_t b)
+static __inline__ u64x2 __DEFAULT_FN_ATTRS wasm_u64x2_shr(u64x2 a, int32_t b) {
+  return __extension__(u64x2){a >> b};
+}
+
+// i8x16 wasm_i8x16_and(i8x16 a, i8x16 b)
+static __inline__ i8x16 __DEFAULT_FN_ATTRS wasm_i8x16_and(i8x16 a, i8x16 b) {
+  return __extension__(i8x16){a & b};
+}
+
+// i8x16 wasm_i8x16_or(i8x16 a, i8x16 b)
+static __inline__ i8x16 __DEFAULT_FN_ATTRS wasm_i8x16_or(i8x16 a, i8x16 b) {
+  return __extension__(i8x16){a | b};
+}
+
+// i8x16 wasm_i8x16_xor(i8x16 a, i8x16 b)
+static __inline__ i8x16 __DEFAULT_FN_ATTRS wasm_i8x16_xor(i8x16 a, i8x16 b) {
+  return __extension__(i8x16){a ^ b};
+}
+
+// i8x16 wasm_i8x16_not(i8x16 a)
+static __inline__ i8x16 __DEFAULT_FN_ATTRS wasm_i8x16_not(i8x16 a) {
+  return __extension__(i8x16){~a};
+}
+
+// i8x16 wasm_i8x16_bitselect(i8x16 a, i8x16 b, i8x16 c)
+#define wasm_i8x16_bitselect(a, b, c) (__builtin_wasm_bitselect(a, b, c))
+
+// bool wasm_i8x16_any_true(i8x16 a)
+#define wasm_i8x16_any_true(a) (__builtin_wasm_any_true_i8x16(a))
+
+// bool wasm_i16x8_any_true(i16x8 a)
+#define wasm_i16x8_any_true(a) (__builtin_wasm_any_true_i16x8(a))
+
+// bool wasm_i32x4_any_true(i32x4 a)
+#define wasm_i32x4_any_true(a) (__builtin_wasm_any_true_i32x4(a))
+
+#ifdef __wasm_undefined_simd128__
+// bool wasm_i64x2_any_true(i64x2 a)
+#define wasm_i64x2_any_true(a) (__builtin_wasm_any_true_i64x2(a))
+#endif
+
+// bool wasm_i8x16_all_true(i8x16 a)
+#define wasm_i8x16_all_true(a) (__builtin_wasm_all_true_i8x16(a))
+
+// bool wasm_i16x8_all_true(i16x8 a)
+#define wasm_i16x8_all_true(a) (__builtin_wasm_all_true_i16x8(a))
+
+// bool wasm_i32x4_all_true(i32x4 a)
+#define wasm_i32x4_all_true(a) (__builtin_wasm_all_true_i32x4(a))
+
+// bool wasm_i64x2_all_true(i64x2 a)
+#define wasm_i64x2_all_true(a) (__builtin_wasm_all_true_i64x2(a))
+
+// i8x16 wasm_i8x16_eq(i8x16 a, i8x16 b)
+static __inline__ i8x16 __DEFAULT_FN_ATTRS wasm_i8x16_eq(i8x16 a, i8x16 b) {
+  return __extension__(i8x16){a == b};
+}
+
+// i16x8 wasm_i16x8_eq(i16x8 a, i16x8 b)
+static __inline__ i16x8 __DEFAULT_FN_ATTRS wasm_i16x8_eq(i16x8 a, i16x8 b) {
+  return __extension__(i16x8){a == b};
+}
+
+// i32x4 wasm_i32x4_eq(i32x4 a, i32x4 b)
+static __inline__ i32x4 __DEFAULT_FN_ATTRS wasm_i32x4_eq(i32x4 a, i32x4 b) {
+  return __extension__(i32x4){a == b};
+}
+
+#ifdef __wasm_undefined_simd128__
+// i32x4 wasm_f32x4_eq(f32x4 a f32x4 b)
+static __inline__ i32x4 __DEFAULT_FN_ATTRS wasm_f32x4_eq(f32x4 a, f32x4 b) {
+  return __extension__(f32x4){a == b};
+}
+#endif
+
+#ifdef __wasm_undefined_simd128__
+// i64x2 wasm_f64x2_eq(f64x2 a, f64x2 b)
+static __inline__ i64x2 __DEFAULT_FN_ATTRS wasm_f64x2_eq(f64x2 a, f64x2 b) {
+  return __extension__(f64x2){a == b};
+}
+#endif
+
+// i8x16 wasm_i8x16_ne(i8x16 a, i8x16 b)
+static __inline__ i8x16 __DEFAULT_FN_ATTRS wasm_i8x16_ne(i8x16 a, i8x16 b) {
+  return __extension__(i8x16){a != b};
+}
+
+// i16x8 wasm_i16x8_ne(i16x8 a, i32x4 b)
+static __inline__ i16x8 __DEFAULT_FN_ATTRS wasm_i16x8_ne(i16x8 a, i16x8 b) {
+  return __extension__(i16x8){a != b};
+}
+
+// i32x4 wasm_i32x4_ne(i32x4 a, i32x4 b)
+static __inline__ i32x4 __DEFAULT_FN_ATTRS wasm_i32x4_ne(i32x4 a, i32x4 b) {
+  return __extension__(i32x4){a != b};
+}
+
+// i32x4 wasm_f32x4_ne(f32x4 a, f32x4 b)
+static __inline__ i32x4 __DEFAULT_FN_ATTRS wasm_f32x4_ne(f32x4 a, f32x4 b) {
+  return __extension__(i32x4){a != b};
+}
+
+// i64x2 wasm_f64x2_ne(f64x2 a, f64x2 b)
+static __inline__ i64x2 __DEFAULT_FN_ATTRS wasm_f64x2_ne(f64x2 a, f64x2 b) {
+  return __extension__(i64x2){a != b};
+}
+
+// i8x16 wasm_i8x16_lt(i8x16 a, i8x16 b)
+static __inline__ i8x16 wasm_i8x16_lt(i8x16 a, i8x16 b) {
+  return __extension__(i8x16){a < b};
+}
+
+// i8x16 wasm_u8x16_lt(u8x16 a, u8x16 b)
+static __inline__ i8x16 wasm_u8x16_lt(u8x16 a, u8x16 b) {
+  return __extension__(u8x16){a < b};
+}
+
+// i16x8 wasm_i16x8_lt(i16x8 a, i16x8 b)
+static __inline__ i16x8 wasm_i16x8_lt(i16x8 a, i16x8 b) {
+  return __extension__(i16x8){a < b};
+}
+
+// i16x8 wasm_u16x8_lt(u8x16 a, u8x16 b)
+static __inline__ i16x8 wasm_u16x8_lt(u16x8 a, u16x8 b) {
+  return __extension__(u16x8){a < b};
+}
+
+// i32x4 wasm_i32x4_lt(i32x4 a, i32x4 b)
+static __inline__ i32x4 wasm_i32x4_lt(i32x4 a, i32x4 b) {
+  return __extension__(i32x4){a < b};
+}
+
+// i32x4 wasm_u32x4_lt(u32x4 a, u32x4 b)
+static __inline__ i32x4 wasm_i32x4_lt(u32x4 a, u32x4 b) {
+  return __extension__(u32x4){a < b};
+}
+
+// i32x4 wasm_f32x4_lt(f32x4 a, f32x4 b)
+static __inline__ i32x4 wasm_f32x4_lt(f32x4 a, f32x4 b) {
+  return __extension__(f32x4){a < b};
+}
+
+// i64x2 wasm_f64x2_lt(f64x2 a, f64x2 b)
+static __inline__ i64x2 wasm_f64x2_lt(f64x2 a, f64x2 b) {
+  return __extension__(i64x2){a < b};
+}
+
+// i8x16 wasm_i8x16_le(i8x16 a, i8x16 b)
+static __inline__ i8x16 wasm_i8x16_le(i8x16 a, i8x16 b) {
+  return __extension__(i8x16){a <= b};
+}
+
+// i8x16 wasm_i8x16_le(u8x16 a, u8x16 b)
+static __inline__ i8x16 wasm_u8x16_le(u8x16 a, u8x16 b) {
+  return __extension__(u8x16){a <= b};
+}
+
+// i16x8 wasm_i16x8_le(i16x8 a, i16x8 b)
+static __inline__ i16x8 wasm_i16x8_le(i16x8 a, i16x8 b) {
+  return __extension__(i16x8){a <= b};
+}
+
+// i16x8 wasm_i16x8_le(u8x16 a, u8x16 b)
+static __inline__ i8x16 wasm_u16x8_le(u8x16 a, u8x16 b) {
+  return __extension__(u8x16){a <= b};
+}
+
+// i32x4 wasm_i32x4_le(i32x4 a, i32x4 b)
+static __inline__ i32x4 wasm_i32x4_le(i32x4 a, i32x4 b) {
+  return __extension__(i32x4){a <= b};
+}
+
+// i32x4 wasm_u32x4_le(u32x4 a, u32x4 b)
+static __inline__ i32x4 wasm_i32x4_le(u32x4 a, u32x4 b) {
+  return __extension__(u32x4){a <= b};
+}
+
+// i32x4 wasm_f32x4_le(f32x4 a, f32x4 b)
+static __inline__ i32x4 wasm_f32x4_le(f32x4 a, f32x4 b) {
+  return __extension__(f32x4){a <= b};
+}
+
+// i64x2 wasm_f64x2_le(f64x2 a, f64x2 b)
+static __inline__ i64x2 wasm_f64x2_le(f64x2 a, f64x2 b) {
+  return __extension__(f64x2){a <= b};
+}
+
+// i8x16 wasm_i8x16_gt(i8x16 a, i8x16 b)
+static __inline__ i8x16 wasm_i8x16_gt(i8x16 a, i8x16 b) {
+  return __extension__(i8x16){a > b};
+}
+
+// i8x16 wasm_u8x16_gt(u8x16 a, u8x16 b)
+static __inline__ i8x16 wasm_u8x16_gt(u8x16 a, u8x16 b) {
+  return __extension__(u8x16){a > b};
+}
+
+// i16x8 wasm_i16x8_gt(i16x8 a, i16x8 b)
+static __inline__ i16x8 wasm_i16x8_gt(i16x8 a, i16x8 b) {
+  return __extension__(i16x8){a > b};
+}
+
+// i16x8 wasm_u16x8_gt(u8x16 a, u8x16 b)
+static __inline__ i8x16 wasm_u16x8_gt(u8x16 a, u8x16 b) {
+  return __extension__(u8x16){a > b};
+}
+
+// i32x4 wasm_i32x4_gt(i32x4 a, i32x4 b)
+static __inline__ i32x4 wasm_i32x4_gt(i32x4 a, i32x4 b) {
+  return __extension__(i32x4){a > b};
+}
+
+// i32x4 wasm_i32x4_gt(u32x4 a, u32x4 b)
+static __inline__ i32x4 wasm_u32x4_gt(u32x4 a, u32x4 b) {
+  return __extension__(u32x4){a > b};
+}
+
+// f32x4 wasm_f32x4_gt(f32x4 a, f32x4 b)
+static __inline__ i32x4 wasm_f32x4_gt(f32x4 a, f32x4 b) {
+  return __extension__(f32x4){a > b};
+}
+
+// i64x2 wasm_f64x2_gt(f64x2 a, f64x2 b)
+static __inline__ i64x2 wasm_f64x2_gt(f64x2 a, f64x2 b) {
+  return __extension__(f64x2){a > b};
+}
+
+// i8x16 wasm_i8x16_ge(i8x16 a, i8x16 b)
+static __inline__ i8x16 wasm_i8x16_ge(i8x16 a, i8x16 b) {
+  return __extension__(i8x16){a >= b};
+}
+
+// i8x16 wasm_u8x16_ge(u8x16 a, u8x16 b)
+static __inline__ i8x16 wasm_u8x16_ge(u8x16 a, u8x16 b) {
+  return __extension__(u8x16){a >= b};
+}
+
+// i16x8 wasm_i16x8_ge(i16x8 a, i16x8 b)
+static __inline__ i16x8 wasm_i16x8_ge(i16x8 a, i16x8 b) {
+  return __extension__(i16x8){a >= b};
+}
+
+// i16x8 wasm_i16x8_ge(u16x8 a, u16x8 b)
+static __inline__ u16x8 wasm_u16x8_ge(u16x8 a, u16x8 b) {
+  return __extension__(u8x16){a >= b};
+}
+
+// i32x4 wasm_i32x4_ge(i32x4 a, i32x4 b)
+static __inline__ i32x4 wasm_i32x4_ge(i32x4 a, i32x4 b) {
+  return __extension__(i32x4){a >= b};
+}
+
+// i32x4 wasm_u32x4_ge(u32x4 a, u32x4 b)
+static __inline__ u32x4 wasm_u32x4_ge(u32x4 a, u32x4 b) {
+  return __extension__(u32x4){a >= b};
+}
+
+// i32x4 wasm_f32x4_ge(f32x4 a, f32x4 b)
+static __inline__ i32x4 wasm_f32x4_ge(f32x4 a, f32x4 b) {
+  return __extension__(f32x4){a >= b};
+}
+
+// i64x2 wasm_f64x2_ge(f64x2 a, f64x2 b)
+static __inline__ i64x2 wasm_f64x2_ge(f64x2 a, f64x2 b) {
+  return __extension__(f32x4){a >= b};
+}
+
+// i8x16  wasm_i8x16_abs(i8x16 a)
+#define wasm_i8x16_abs(a) (__builtin_wasm_abs_i8x16(a))
+
+// i16x8  wasm_i16x8_abs(i16x8 a)
+#define wasm_i16x8_abs(a) (__builtin_wasm_abs_i16x8(a))
+
+// i32x4  wasm_i32x4_abs(i32x4 a)
+#define wasm_i32x4_abs(a) (__builtin_wasm_abs_i32x4(a))
+
+// i64x2  wasm_i64x2_abs(i64x2 a)
+#define wasm_i64x2_abs(a) (__builtin_wasm_abs_i64x2(a))
+
+// f32x4  wasm_f32x4_abs(f32x4 a)
+#define wasm_f32x4_abs(a) (__builtin_wasm_abs_f32x4(a))
+
+// f64x2  wasm_f64x2_abs(f64x2 a)
+#define wasm_f64x2_abs(a) (__builtin_wasm_abs_f64x2(a))
+
+// f32x4 wasm_convert_f32x4_i32x4(i32x4 a)
+#define wasm_convert_f32x4_i32x4(v) (__builtin_convertvector(v, f32x4))
+
+// f32x4 wasm_convert_f32x4_u32x4(u32x4 a)
+#define wasm_convert_f32x4_u32x4(v) (__builtin_convertvector(v, f32x4))
+
+// f64x2 wasm_convert_f64x2_i64x2(i64x2 a)
+#define wasm_convert_f64x2_i64x2(v) (__builtin_convertvector(v, f64x2))
+
+// f64x2 wasm_convert_f64x2_u64x2(u64x2 a)
+#define wasm_convert_f64x2_u64x2(v) (__builtin_convertvector(v, f64x2))
+
+#ifdef __wasm_unimplemented_simd128__
+#endif
+
+// f32x4 wasm_f32x4_min(f32x4 a, f32x4 b)
+#define wasm_f32x4_min(a, b)  (__builtin_wasm_min_f32x4(a, b))
+
+// f32x4 wasm_f32x4_max(f32x4 a, f32x4 b)
+#define wasm_f32x4_max(a, b) (__builtin_wasm_max_f32x4(a, b))
+
+// f32x4 wasm_f32x4_sqrt(f32x4 a)
+#define wasm_f32x4_sqrt(v) (__builtin_wasm_sqrt_f32x4(v))
+
+#ifdef __wasm_unimplemented_simd128__
+
+// f64x2 wasm_f64x2_min(f64x2 a, f64x2 b)
+#define wasm_f64x2_min(a, b) (__builtin_wasm_min_f64x2(x,y))
+
+// f64x2 wasm_f64x2_max(f64x2 a, f64x2 b)
+#define wasm_f64x2_max(a, b) (__builtin_wasm_max_f64x2(x,y))
+
+// f64x2 wasm_f64x2_sqrt(f64x2 a)
+#define wasm_f64x2_sqrt(v) (__builtin_wasm_sqrt_f64x2(v))
+
+#endif
+
+// not sure how this should work with variable input
+// #define wasm_i8x16_shuffle(a, b) \
+//  (__builtin_shufflevector(a, b, 0, 1, 2, 3, 4, 5, 6, 7))
diff --git a/tests/test_core.py b/tests/test_core.py
index 99ffe96d6602b..e73daca33e233 100644
--- a/tests/test_core.py
+++ b/tests/test_core.py
@@ -5449,6 +5449,14 @@ def test_wasm_builtin_simd(self, js_engines):
     self.do_run(open(path_from_root('tests', 'test_wasm_builtin_simd.c')).read(), 'Success!',
                 js_engines=[])
 
+  @wasm_simd
+  def test_wasm_intrinsics_simd(self, js_engines):
+    self.do_run(open(path_from_root('tests', 'test_wasm_intrinsics_simd.c')).read(), 'Success!',
+                js_engines=js_engines)
+    self.emcc_args.append('-munimplemented-simd128')
+    self.do_run(open(path_from_root('tests', 'test_wasm_intrinsics_simd.c')).read(), 'Success!',
+                js_engines=[])
+
   @asm_simd
   def test_simd(self):
     self.do_run_in_out_file_test('tests', 'core', 'test_simd')
diff --git a/tests/test_wasm_intrinsics_simd.c b/tests/test_wasm_intrinsics_simd.c
new file mode 100644
index 0000000000000..86c119f2f641c
--- /dev/null
+++ b/tests/test_wasm_intrinsics_simd.c
@@ -0,0 +1,1345 @@
+#include <stdint.h>
+#include <stdio.h>
+#include <math.h>
+#include <emscripten.h>
+#include <simd128.h>
+
+#define TESTFN EMSCRIPTEN_KEEPALIVE __attribute__((noinline))
+
+i8x16 TESTFN i8x16_load(i8x16 *ptr) {
+  return (i8x16) wasm_v128_load(ptr);
+//  return *ptr;
+}
+void TESTFN i8x16_store(i8x16 *ptr, i8x16 vec) {
+  i8x16_store(ptr, vec);
+//  *ptr = vec;
+}
+i32x4 TESTFN i32x4_const(void) {
+  return (i32x4) = wasm_v128_const(0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15);
+//  return (i32x4) {1, 2, 3, 4};
+}
+//todo:
+i8x16 TESTFN i8x16_shuffle_interleave_bytes(i8x16 x, i8x16 y) {
+  return __builtin_shufflevector(x, y, 0, 17, 2, 19, 4, 21, 6, 23, 8, 25, 10, 27, 12, 29, 14, 31);
+}
+//todo:
+i32x4 TESTFN i32x4_shuffle_reverse(i32x4 vec) {
+  return __builtin_shufflevector(vec, vec, 3, 2, 1, 0);
+}
+i8x16 TESTFN i8x16_splat(int32_t x) {
+  return wasm_i8x16_splat(x);
+//  return (i8x16) {x, x, x, x, x, x, x, x, x, x, x, x, x, x, x, x};
+}
+int32_t TESTFN i8x16_extract_lane_s_first(i8x16 vec) {
+      return wasm_i8x16_extract_lane(vec, 0);
+//  return __builtin_wasm_extract_lane_s_i8x16(vec, 0);
+}
+int32_t TESTFN i8x16_extract_lane_s_last(i8x16 vec) {
+        return wasm_i8x16_extract_lane(vec, 15);
+//  return __builtin_wasm_extract_lane_s_i8x16(vec, 15);
+}
+#ifdef __wasm_unimplemented_simd128__
+uint32_t TESTFN i8x16_extract_lane_u_first(i8x16 vec) {
+  return wasm_u8x16_extract_lane(vec, 0);
+//  return __builtin_wasm_extract_lane_u_i8x16(vec, 0);
+}
+uint32_t TESTFN i8x16_extract_lane_u_last(i8x16 vec) {
+  return wasm_u8x16_extract_lane(vec, 15);
+//  return __builtin_wasm_extract_lane_u_i8x16(vec, 15);
+}
+#endif // __wasm_unimplemented_simd128__
+i8x16 TESTFN i8x16_replace_lane_first(i8x16 vec, int32_t val) {
+  return wasm_i8x16_replace_lane(vec, 0, val);
+//  return (i8x16){__builtin_wasm_replace_lane_i8x16(vec, 0, val)};
+}
+i8x16 TESTFN i8x16_replace_lane_last(i8x16 vec, int32_t val) {
+  return wasm_i8x16_replace_lane(vec, 15, val);
+//  return (i8x16){__builtin_wasm_replace_lane_i8x16(vec, 15, val)};
+}
+i16x8 TESTFN i16x8_splat(int32_t x) {
+  return wasm_i16x8_splat(x);
+//  return (i16x8) {x, x, x, x, x, x, x, x};
+}
+int32_t TESTFN i16x8_extract_lane_s_first(i16x8 vec) {
+  return wasm_i16x8_extract_lane(vec, 0);
+//  return __builtin_wasm_extract_lane_s_i16x8(vec, 0);
+}
+int32_t TESTFN i16x8_extract_lane_s_last(i16x8 vec) {
+  return wasm_i16x8_extract_lane(vec, 7);
+//  return __builtin_wasm_extract_lane_s_i16x8(vec, 7);
+}
+#ifdef __wasm_unimplemented_simd128__
+int32_t TESTFN i16x8_extract_lane_u_first(i16x8 vec) {
+    return wasm_u16x8_extract_lane(vec, 0);
+//  return __builtin_wasm_extract_lane_u_i16x8(vec, 0);
+}
+int32_t TESTFN i16x8_extract_lane_u_last(i16x8 vec) {
+  return wasm_u16x8_extract_lane(vec, 7);
+//  return __builtin_wasm_extract_lane_u_i16x8(vec, 7);
+}
+#endif // __wasm_unimplemented_simd128__
+i16x8 TESTFN i16x8_replace_lane_first(i16x8 vec, int32_t val) {
+  return wasm_i16x8_replace_lane(vec, 0, val);
+//  return __builtin_wasm_replace_lane_i16x8(vec, 0, val);
+}
+i16x8 TESTFN i16x8_replace_lane_last(i16x8 vec, int32_t val) {
+  return wasm_i16x8_replace_lane(vec, 7, val);
+//  return __builtin_wasm_replace_lane_i16x8(vec, 7, val);
+}
+i32x4 TESTFN i32x4_splat(int32_t x) {
+  return wasm_i32x4_splat(x);
+//  return (i32x4) {x, x, x, x};
+}
+int32_t TESTFN i32x4_extract_lane_first(i32x4 vec) {
+  return wasm_i32x4_extract_lane(vec, 0);
+//  return __builtin_wasm_extract_lane_i32x4(vec, 0);
+}
+int32_t TESTFN i32x4_extract_lane_last(i32x4 vec) {
+  return wasm_i32x4_extract_lane(vec, 3);
+//  return __builtin_wasm_extract_lane_i32x4(vec, 3);
+}
+i32x4 TESTFN i32x4_replace_lane_first(i32x4 vec, int32_t val) {
+  return wasm_i32x4_replace_lane(vec, 0, val);
+//  return __builtin_wasm_replace_lane_i32x4(vec, 0, val);
+}
+i32x4 TESTFN i32x4_replace_lane_last(i32x4 vec, int32_t val) {
+  return wasm_i32x4_replace_lane(vec, 3, val);
+//  return __builtin_wasm_replace_lane_i32x4(vec, 3, val);
+}
+i64x2 TESTFN i64x2_splat(int64_t x) {
+  return wasm_i64x2_splat(x);
+//  return (i64x2) {x, x};
+}
+#ifdef __wasm_unimplemented_simd128__
+int64_t TESTFN i64x2_extract_lane_first(i64x2 vec) {
+  return wasm_i64x2_extract_lane(vec, 0);
+//  return __builtin_wasm_extract_lane_i64x2(vec, 0);
+}
+int64_t TESTFN i64x2_extract_lane_last(i64x2 vec) {
+  return wasm_i64x2_extract_lane(vec, 1);
+//  return __builtin_wasm_extract_lane_i64x2(vec, 1);
+}
+i64x2 TESTFN i64x2_replace_lane_first(i64x2 vec, int64_t val) {
+  return wasm_i64x2_replace_lane(vec, 0, val);
+//  return __builtin_wasm_replace_lane_i64x2(vec, 0, val);
+}
+i64x2 TESTFN i64x2_replace_lane_last(i64x2 vec, int64_t val) {
+  return wasm_i64x2_replace_lane(vec, 1, val);
+//  return __builtin_wasm_replace_lane_i64x2(vec, 1, val);
+}
+#endif // __wasm_unimplemented_simd128__
+f32x4 TESTFN f32x4_splat(float x) {
+  return wasm_f32x4_splat(x);
+//  return (f32x4) {x, x, x, x};
+}
+float TESTFN f32x4_extract_lane_first(f32x4 vec) {
+  return wasm_f32x4_extract_lane(vec, 0);
+//  return __builtin_wasm_extract_lane_f32x4(vec, 0);
+}
+float TESTFN f32x4_extract_lane_last(f32x4 vec) {
+  return wasm_f32x4_extract_lane(vec, 3);
+//  return __builtin_wasm_extract_lane_f32x4(vec, 3);
+}
+f32x4 TESTFN f32x4_replace_lane_first(f32x4 vec, float val) {
+  return wasm_f32x4_replace_lane(vec, 0, val);
+//  return __builtin_wasm_replace_lane_f32x4(vec, 0, val);
+}
+f32x4 TESTFN f32x4_replace_lane_last(f32x4 vec, float val) {
+  return wasm_f32x4_replace_lane(vec, 3, val);
+//  return __builtin_wasm_replace_lane_f32x4(vec, 3, val);
+}
+f64x2 TESTFN f64x2_splat(int64_t x) {
+    return wasm_f64x2_splat((double ) x);
+//  return (f64x2) {x, x};
+}
+#ifdef __wasm_unimplemented_simd128__
+double TESTFN f64x2_extract_lane_first(f64x2 vec) {
+    return wasm_f64x2_extract_lane(vec, 0);
+//  return __builtin_wasm_extract_lane_f64x2(vec, 0);
+}
+double TESTFN f64x2_extract_lane_last(f64x2 vec) {
+  return wasm_f64x2_extract_lane(vec, 1);
+//  return __builtin_wasm_extract_lane_f64x2(vec, 1);
+}
+f64x2 TESTFN f64x2_replace_lane_first(f64x2 vec, double val) {
+  return wasm_f64x2_replace_lane(vec, 0, val);
+//  return __builtin_wasm_replace_lane_f64x2(vec, 0, val);
+}
+f64x2 TESTFN f64x2_replace_lane_last(f64x2 vec, double val) {
+  return wasm_f64x2_replace_lane(vec, 1, val);
+//  return __builtin_wasm_replace_lane_f64x2(vec, 1, val);
+}
+#endif // __wasm_unimplemented_simd128__
+i8x16 TESTFN i8x16_eq(i8x16 x, i8x16 y) {
+  return wasm_i8x16_eq(x, y);
+//  return x == y;
+}
+i8x16 TESTFN i8x16_ne(i8x16 x, i8x16 y) {
+  return wasm_i8x16_ne(x, y);
+//  return x != y;
+}
+i8x16 TESTFN i8x16_lt_s(i8x16 x, i8x16 y) {
+  return wasm_i8x16_lt(x, y);
+//  return x < y;
+}
+i8x16 TESTFN i8x16_lt_u(u8x16 x, u8x16 y) {
+  return wasm_u8x16_lt(x, y);
+//  return (u8x16)x < (u8x16)y;
+}
+i8x16 TESTFN i8x16_gt_s(i8x16 x, i8x16 y) {
+  return wasm_i8x16_gt(x, y);
+//  return x > y;
+}
+i8x16 TESTFN u8x16_gt_u(i8x16 x, i8x16 y) {
+  return wasm_u8x16_gt(x,y);
+//  return (u8x16)x > (u8x16)y;
+}
+i8x16 TESTFN i8x16_le_s(i8x16 x, i8x16 y) {
+  return wasm_i8x16_le(x,y);
+//  return x <= y;
+}
+i8x16 TESTFN u8x16_le_u(u8x16 x, u8x16 y) {
+   return wasm_u8x16_le(x, y);
+//  return (u8x16)x <= (u8x16)y;
+}
+i8x16 TESTFN i8x16_ge_s(i8x16 x, i8x16 y) {
+  return wasm_i8x16_ge(x, y);
+//  return x >= y;
+}
+i8x16  TESTFN i8x16_ge_u(u8x16 x, u8x16 y) {
+  return wasm_u8x16_ge(x, y);
+//  return (u8x16)x >= (u8x16)y;
+}
+i16x8 TESTFN i16x8_eq(i16x8 x, i16x8 y) {
+  return wasm_i16x8_eq(x,y);
+//  return x == y;
+}
+i16x8 TESTFN i16x8_ne(i16x8 x, i16x8 y) {
+  return wasm_i16x8_ne(x,y);
+//  return x != y;
+}
+i16x8 TESTFN i16x8_lt_s(i16x8 x, i16x8 y) {
+  return wasm_i16x8_lt(x,y);
+//  return x < y;
+}
+i16x8 TESTFN i16x8_lt_u(u16x8 x, u16x8 y) {
+    return wasm_u16x8_lt(x,y);
+//  return (u16x8)x < (u16x8)y;
+}
+i16x8 TESTFN i16x8_gt_s(i16x8 x, i16x8 y) {
+  return wasm_i16x8_gt(x,y);
+//  return x > y;
+}
+i16x8 TESTFN i16x8_gt_u(u16x8 x, u16x8 y) {
+  return wasm_u16x8_gt(x,y);
+//  return (u16x8)x > (u16x8)y;
+}
+i16x8 TESTFN i16x8_le_s(i16x8 x, i16x8 y) {
+  return wasm_i16x8_le(x, y);
+//  return x <= y;
+}
+i16x8 TESTFN i16x8_le_u(u16x8 x, u16x8 y) {
+  return wasm_u16x8_le(x, y);
+//  return (u16x8)x <= (u16x8)y;
+}
+i16x8 TESTFN i16x8_ge_s(i16x8 x, i16x8 y) {
+  return wasm_i16x8_ge(x, y);
+//  return x >= y;
+}
+i16x8  TESTFN i16x8_ge_u(u16x8 x, u16x8 y) {
+  return wasm_u16x8_ge(x, y);
+//  return (u16x8)x >= (u16x8)y;
+}
+i32x4 TESTFN i32x4_eq(i32x4 x, i32x4 y) {
+  return wasm_i32x4_eq(x, y);
+//  return (i32x4)(x == y);
+}
+i32x4 TESTFN i32x4_ne(i32x4 x, i32x4 y) {
+  return wasm_i32x4_ne(x, y);
+//  return (i32x4)(x != y);
+}
+i32x4 TESTFN i32x4_lt_s(i32x4 x, i32x4 y) {
+  return wasm_i32x4_lt(x, y);
+//  return (i32x4)(x < y);
+}
+i32x4 TESTFN i32x4_lt_u(u32x4 x, u32x4 y) {
+  return wasm_u32x4_lt(x, y);
+//  return (i32x4)((u32x4)x < (u32x4)y);
+}
+i32x4 TESTFN i32x4_gt_s(i32x4 x, i32x4 y) {
+  return wasm_i32x4_gt(x, y);
+//  return (i32x4)(x > y);
+}
+i32x4 TESTFN i32x4_gt_u(u32x4 x, u32x4 y) {
+  return wasm_u32x4_gt(x, y);
+//  return (i32x4)((u32x4)x > (u32x4)y);
+}
+i32x4 TESTFN i32x4_le_s(i32x4 x, i32x4 y) {
+  return wasm_i32x4_le(x, y);
+//  return (i32x4)(x <= y);
+}
+i32x4 TESTFN i32x4_le_u(u32x4 x, u32x4 y) {
+  return wasm_u32x4_le(x, y);
+//  return (i32x4)((u32x4)x <= (u32x4)y);
+}
+i32x4 TESTFN i32x4_ge_s(i32x4 x, i32x4 y) {
+  return wasm_i32x4_ge(x, y);
+//  return (i32x4)(x >= y);
+}
+i32x4  TESTFN i32x4_ge_u(i32x4 x, i32x4 y) {
+  return wasm_u32x4_ge(x, y);
+//  return (i32x4)((u32x4)x >= (u32x4)y);
+}
+i32x4 TESTFN f32x4_eq(f32x4 x, f32x4 y) {
+  return wasm_f32x4_eq(x,y);
+//  return (f32x4)(x == y);
+}
+i32x4 TESTFN f32x4_ne(f32x4 x, f32x4 y) {
+  return wasm_f32x4_ne(x, y);
+//  return (i32x4)(x != y);
+}
+i32x4 TESTFN f32x4_lt(f32x4 x, f32x4 y) {
+  return wasm_f32x4_lt(x, y);
+//  return (i32x4)(x < y);
+}
+i32x4 TESTFN f32x4_gt(f32x4 x, f32x4 y) {
+  return wasm_f32x4_gt(x,y);
+//  return (i32x4)(x > y);
+}
+i32x4 TESTFN f32x4_le(f32x4 x, f32x4 y) {
+  return wasm_f32x4_le(x, y);
+//  return (i32x4)(x <= y);
+}
+i32x4 TESTFN f32x4_ge(f32x4 x, f32x4 y) {
+  return wasm_f32x4_ge(x, y);
+//  return (i32x4)(x >= y);
+}
+i64x2 TESTFN f64x2_eq(f64x2 x, f64x2 y) {
+  return wasm_f64x2_eq(x,y);
+//  return (i64x2)(x == y);
+}
+i64x2 TESTFN f64x2_ne(f64x2 x, f64x2 y) {
+  return wasm_f64x2_ne(x,y);
+//  return (i64x2)(x != y);
+}
+i64x2 TESTFN f64x2_lt(f64x2 x, f64x2 y) {
+  return wasm_f64x2_lt(x,y);
+//  return (i64x2)(x < y);
+}
+i64x2 TESTFN f64x2_gt(f64x2 x, f64x2 y) {
+  return wasm_f64x2_gt(x, y);
+//  return (i64x2)(x > y);
+}
+i64x2 TESTFN f64x2_le(f64x2 x, f64x2 y) {
+  return wasm_f64x2_le(x, y);
+//  return (i64x2)(x <= y);
+}
+i64x2 TESTFN f64x2_ge(f64x2 x, f64x2 y) {
+  return wasm_f64x2_ge(x, y);
+//  return (i64x2)(x >= y);
+}
+i8x16 TESTFN i8x16_not(i8x16 vec) {
+  return wasm_i8x16_not(vec);
+//  return ~vec;
+}
+i8x16 TESTFN i8x16_and(i8x16 x, i8x16 y) {
+  return wasm_i8x16_and(x, y);
+//  return x & y;
+}
+i8x16 TESTFN i8x16_or(i8x16 x, i8x16 y) {
+  return wasm_i8x16_or(x,y);
+//  return x | y;
+}
+i8x16 TESTFN i8x16_xor(i8x16 x, i8x16 y) {
+  return wasm_i8x16_xor(x,y);
+//  return x ^ y;
+}
+i8x16 TESTFN i8x16_bitselect(i8x16 x, i8x16 y, i8x16 cond) {
+  return wasm_i8x16_bitselect(x,y,cond);
+//  return (i8x16)__builtin_wasm_bitselect((i32x4)x, (i32x4)y, (i32x4)cond);
+}
+i8x16 TESTFN i8x16_neg(i8x16 vec) {
+  return wasm_i8x16_neg(vec);
+//  return -vec;
+}
+int32_t TESTFN i8x16_any_true(i8x16 vec) {
+  return wasm_i8x16_any_true(vec);
+//  return __builtin_wasm_any_true_i8x16(vec);
+}
+int32_t TESTFN i8x16_all_true(i8x16 vec) {
+  return wasm_i8x16_all_true(vec);
+//  return __builtin_wasm_all_true_i8x16(vec);
+}
+i8x16 TESTFN i8x16_shl(i8x16 vec, int32_t shift) {
+  return wasm_i8x16_shl(vec, shift);
+//  return vec << shift;
+}
+i8x16 TESTFN i8x16_shr_s(i8x16 vec, int32_t shift) {
+  return wasm_i8x16_shr(vec, shift);
+//  return vec >> shift;
+}
+u8x16 TESTFN i8x16_shr_u(u8x16 vec, int32_t shift) {
+  return wasm_u8x16_shr(vec, shift);
+//  return (i8x16)((u8x16)vec >> shift);
+}
+i8x16 TESTFN i8x16_add(i8x16 x, i8x16 y) {
+  return wasm_i8x16_add(x,y);
+//  return x + y;
+}
+i8x16 TESTFN i8x16_add_saturate_s(i8x16 x, i8x16 y) {
+  return wasm_i8x16_add_saturate(x, y);
+//  return __builtin_wasm_add_saturate_s_i8x16(x, y);
+}
+u8x16 TESTFN i8x16_add_saturate_u(u8x16 x, u8x16 y) {
+  return wasm_u8x16_add_saturate(x, y);
+//  return __builtin_wasm_add_saturate_u_i8x16(x, y);
+}
+i8x16 TESTFN i8x16_sub(i8x16 x, i8x16 y) {
+  return wasm_i8x16_sub(x,y);
+//  return x - y;
+}
+i8x16 TESTFN i8x16_sub_saturate_s(i8x16 x, i8x16 y) {
+  return wasm_i8x16_sub_saturate(x, y);
+//  return __builtin_wasm_sub_saturate_s_i8x16(x, y);
+}
+u8x16 TESTFN i8x16_sub_saturate_u(u8x16 x, u8x16 y) {
+  return wasm_u8x16_sub_saturate(x, y);
+//  return __builtin_wasm_sub_saturate_u_i8x16(x, y);
+}
+i8x16 TESTFN i8x16_mul(i8x16 x, i8x16 y) {
+  return wasm_i8x16_mul(x, y);
+//  return x * y;
+}
+i16x8 TESTFN i16x8_neg(i16x8 vec) {
+  return wasm_i16x8_neg(vec);
+//  return -vec;
+}
+bool TESTFN i16x8_any_true(i16x8 vec) {
+  return wasm_i16x8_any_true(vec);
+//  return __builtin_wasm_any_true_i16x8(vec);
+}
+int32_t TESTFN i16x8_all_true(i16x8 vec) {
+  return wasm_i16x8_all_true(vec);
+  //return __builtin_wasm_all_true_i16x8(vec);
+}
+i16x8 TESTFN i16x8_shl(i16x8 vec, int32_t shift) {
+  return wasm_i16x8_shl(vec, shift);
+//  return vec << shift;
+}
+i16x8 TESTFN i16x8_shr_s(i16x8 vec, int32_t shift) {
+  return wasm_i16x8_shr(vec, shift);
+//  return vec >> shift;
+}
+u16x8 TESTFN i16x8_shr_u(u16x8 vec, int32_t shift) {
+  return wasm_u16x8_shr(vec, shift);
+//  return (i16x8)((u16x8)vec >> shift);
+}
+i16x8 TESTFN i16x8_add(i16x8 x, i16x8 y) {
+  return wasm_i16x8_add(x, y);
+//  return x + y;
+}
+i16x8 TESTFN i16x8_add_saturate_s(i16x8 x, i16x8 y) {
+  return wasm_i16x8_add_saturate(x, y);
+//  return __builtin_wasm_add_saturate_s_i16x8(x, y);
+}
+u16x8 TESTFN i16x8_add_saturate_u(u16x8 x, u16x8 y) {
+  return wasm_u16x8_add_saturate(x, y);
+//  return __builtin_wasm_add_saturate_u_i16x8(x, y);
+}
+i16x8 TESTFN i16x8_sub(i16x8 x, i16x8 y) {
+  return wasm_i16x8_sub(x, y);
+//  return x - y;
+}
+i16x8 TESTFN i16x8_sub_saturate_s(i16x8 x, i16x8 y) {
+  return wasm_i16x8_sub_saturate(x,y);
+//  return __builtin_wasm_sub_saturate_s_i16x8(x, y);
+}
+u16x8 TESTFN i16x8_sub_saturate_u(u16x8 x, u16x8 y) {
+  return wasm_u16x8_sub_saturate(x, y);
+//  return __builtin_wasm_sub_saturate_u_i16x8(x, y);
+}
+i16x8 TESTFN i16x8_mul(i16x8 x, i16x8 y) {
+  return wasm_i16x8_mul(x, y);
+//  return x * y;
+}
+i32x4 TESTFN i32x4_neg(i32x4 vec) {
+  return wasm_i32x4_neg(vec);
+// return -vec;
+}
+int32_t TESTFN i32x4_any_true(i32x4 vec) {
+  return wasm_i32x4_any_true(vec);
+//  return __builtin_wasm_any_true_i32x4(vec);
+}
+int32_t TESTFN i32x4_all_true(i32x4 vec) {
+  return wasm_i32x4_all_true(vec);
+//  return __builtin_wasm_all_true_i32x4(vec);
+}
+i32x4 TESTFN i32x4_shl(i32x4 vec, int32_t shift) {
+  return wasm_i32x4_shl(vec, shift);
+//  return vec << shift;
+}
+i32x4 TESTFN i32x4_shr_s(i32x4 vec, int32_t shift) {
+  return wasm_i32x4_shr(vec, shift);
+//  return vec >> shift;
+}
+u32x4 TESTFN i32x4_shr_u(u32x4 vec, int32_t shift) {
+  return wasm_u32x4_shr(vec, shift);
+//  return (i32x4)((u32x4)vec >> shift);
+}
+i32x4 TESTFN i32x4_add(i32x4 x, i32x4 y) {
+  return wasm_i32x4_add(x, y);
+//  return x + y;
+}
+i32x4 TESTFN i32x4_sub(i32x4 x, i32x4 y) {
+  return wasm_i32x4_sub(x, y);
+//  return x - y;
+}
+i32x4 TESTFN i32x4_mul(i32x4 x, i32x4 y) {
+  return wasm_i32x4_mul(x, y);
+//  return x * y;
+}
+i64x2 TESTFN i64x2_neg(i64x2 vec) {
+  return wasm_i64x2_neg(vec);
+//  return -vec;
+}
+#ifdef __wasm_unimplemented_simd128__
+bool TESTFN i64x2_any_true(i64x2 vec) {
+  return wasm_i64x2_any_true(vec);
+//  return __builtin_wasm_any_true_i64x2(vec);
+}
+bool TESTFN i64x2_all_true(i64x2 vec) {
+  return wasm_i64x2_all_true(vec);
+//  return __builtin_wasm_all_true_i64x2(vec);
+}
+#endif // __wasm_unimplemented_simd128__
+i64x2 TESTFN i64x2_shl(i64x2 vec, int32_t shift) {
+  return wasm_i64x2_shl(vec, shift);
+//  return vec << shift;
+}
+
+i64x2 TESTFN i64x2_shr_s(i64x2 vec, int32_t shift) {
+  return wasm_i64x2_shr(vec, shift);
+//  return vec >> shift;
+}
+u64x2 TESTFN i64x2_shr_u(u64x2 vec, int32_t shift) {
+  return wasm_u64x2_shr(vec, shift);
+//  return (i64x2)((u64x2)vec >> shift);
+}
+i64x2 TESTFN i64x2_add(i64x2 x, i64x2 y) {
+  return wasm_i64x2_add(x, y);
+//  return x + y;
+}
+i64x2 TESTFN i64x2_sub(i64x2 x, i64x2 y) {
+  return wasm_i64x2_sub(x, y);
+//  return x - y;
+}
+f32x4 TESTFN f32x4_abs(f32x4 vec) {
+  return wasm_f32x4_abs(vec);
+//  return __builtin_wasm_abs_f32x4(vec);
+}
+f32x4 TESTFN f32x4_neg(f32x4 vec) {
+  return wasm_f32x4_neg(vec);
+//  return -vec;
+}
+#ifdef __wasm_unimplemented_simd128__
+f32x4 TESTFN f32x4_sqrt(f32x4 vec) {
+  return wasm_f32x4_sqrt(vec);
+//  return __builtin_wasm_sqrt_f32x4(vec);
+}
+#endif // __wasm_unimplemented_simd128__
+f32x4 TESTFN f32x4_add(f32x4 x, f32x4 y) {
+  return wasm_f32x4_add(x,y);
+//  return x + y;
+}
+f32x4 TESTFN f32x4_sub(f32x4 x, f32x4 y) {
+  return wasm_f32x4_sub(x, y);
+//  return x - y;
+}
+f32x4 TESTFN f32x4_mul(f32x4 x, f32x4 y) {
+  return wasm_f32x4_mul(x, y);
+//  return x * y;
+}
+f32x4 TESTFN f32x4_div(f32x4 x, f32x4 y) {
+  return wasm_f32x4_div(x, y);
+//  return x / y;
+}
+f32x4 TESTFN f32x4_min(f32x4 x, f32x4 y) {
+  return wasm_f32x4_min(f32x4)
+//  return __builtin_wasm_min_f32x4(x, y);
+}
+f32x4 TESTFN f32x4_max(f32x4 x, f32x4 y) {
+  return wasm_f32x4_max(x, y);
+//  return __builtin_wasm_max_f32x4(x, y);
+}
+#ifdef __wasm_unimplemented_simd128__
+f64x2 TESTFN f64x2_abs(f64x2 vec) {
+  return __builtin_wasm_abs_f64x2(vec);
+}
+#endif // __wasm_unimplemented_simd128__
+f64x2 TESTFN f64x2_neg(f64x2 vec) {
+  return -vec;
+}
+#ifdef __wasm_unimplemented_simd128__
+f64x2 TESTFN f64x2_sqrt(f64x2 vec) {
+  return __builtin_wasm_sqrt_f64x2(vec);
+}
+#endif // __wasm_unimplemented_simd128__
+f64x2 TESTFN f64x2_add(f64x2 x, f64x2 y) {
+  return x + y;
+}
+f64x2 TESTFN f64x2_sub(f64x2 x, f64x2 y) {
+  return x - y;
+}
+f64x2 TESTFN f64x2_mul(f64x2 x, f64x2 y) {
+  return x * y;
+}
+f64x2 TESTFN f64x2_div(f64x2 x, f64x2 y) {
+  return x / y;
+}
+#ifdef __wasm_unimplemented_simd128__
+f64x2 TESTFN f64x2_min(f64x2 x, f64x2 y) {
+  return __builtin_wasm_min_f64x2(x, y);
+}
+f64x2 TESTFN f64x2_max(f64x2 x, f64x2 y) {
+  return __builtin_wasm_max_f64x2(x, y);
+}
+#endif // __wasm_unimplemented_simd128__
+i32x4 TESTFN i32x4_trunc_s_f32x4_sat(f32x4 vec) {
+  return __builtin_wasm_trunc_saturate_s_i32x4_f32x4(vec);
+}
+i32x4 TESTFN i32x4_trunc_u_f32x4_sat(f32x4 vec) {
+  return __builtin_wasm_trunc_saturate_u_i32x4_f32x4(vec);
+}
+#ifdef __wasm_unimplemented_simd128__
+i64x2 TESTFN i64x2_trunc_s_f64x2_sat(f64x2 vec) {
+  return __builtin_wasm_trunc_saturate_s_i64x2_f64x2(vec);
+}
+i64x2 TESTFN i64x2_trunc_u_f64x2_sat(f64x2 vec) {
+  return __builtin_wasm_trunc_saturate_u_i64x2_f64x2(vec);
+}
+#endif // __wasm_unimplemented_simd128__
+f32x4 TESTFN f32x4_convert_s_i32x4(i32x4 vec) {
+  return __builtin_convertvector(vec, f32x4);
+}
+f32x4 TESTFN f32x4_convert_u_i32x4(i32x4 vec) {
+  return __builtin_convertvector((u32x4)vec, f32x4);
+}
+f64x2 TESTFN f64x2_convert_s_i64x2(i64x2 vec) {
+  return __builtin_convertvector(vec, f64x2);
+}
+f64x2 TESTFN f64x2_convert_u_i64x2(i64x2 vec) {
+  return __builtin_convertvector((u64x2)vec, f64x2);
+}
+
+static int failures = 0;
+
+#define formatter(x) _Generic((x),                      \
+                              char: "%d",               \
+                              unsigned char: "%d",      \
+                              short: "%d",              \
+                              int64_t: "%ld",           \
+                              int32_t: "%d",            \
+                              uint32_t: "%d",           \
+                              float: "%f",              \
+                              double: "%f"              \
+  )
+
+#define err(x) fprintf(stderr, formatter(x), x)
+
+#define eq(a, b) ({                             \
+      bool anan = _Generic((a),                 \
+                           float: isnan(a),     \
+                           double: isnan(a),    \
+                           default: false);     \
+      bool bnan = _Generic((b),                 \
+                           float: isnan(b),     \
+                           double: isnan(b),    \
+                           default: false);     \
+      ((anan && bnan) || (!anan && a == b));    \
+    })
+
+#define expect_eq(_a, _b) ({                                    \
+      __typeof__(_a) a = (_a), b = (_b);                        \
+      if (!eq(a, b)) {                                          \
+        failures++;                                             \
+        fprintf(stderr, "line %d: expected ", __LINE__);        \
+        err(b);                                                 \
+        fprintf(stderr, ", got ");                              \
+        err(a);                                                 \
+        fprintf(stderr, "\n");                                  \
+      }                                                         \
+    })
+
+#define expect_vec(_a, _b) ({                                   \
+      __typeof__(_a) a = (_a), b = (_b);                        \
+      bool err = false;                                         \
+      size_t lanes = _Generic((a),                              \
+                              u8x16: 16,                        \
+                              i8x16: 16,                        \
+                              i16x8: 8,                         \
+                              i32x4: 4,                         \
+                              i64x2: 2,                         \
+                              f32x4: 4,                         \
+                              f64x2: 2);                        \
+      for (size_t i = 0; i < lanes; i++) {                      \
+        if (!eq(a[i], b[i])) {                                  \
+          err = true;                                           \
+          break;                                                \
+        }                                                       \
+      }                                                         \
+      if (err) {                                                \
+        failures++;                                             \
+        fprintf(stderr, "line %d: expected {", __LINE__);       \
+        for (size_t i = 0; i < lanes - 1; i++) {                \
+          err(b[i]);                                            \
+          fprintf(stderr, ", ");                                \
+        }                                                       \
+        err(b[lanes - 1]);                                      \
+        fprintf(stderr, "}, got {");                            \
+        for (size_t i = 0; i < lanes - 1; i++) {                \
+          err(a[i]);                                            \
+          fprintf(stderr, ", ");                                \
+        }                                                       \
+        err(a[lanes - 1]);                                      \
+        fprintf(stderr, "}\n");                                 \
+      }                                                         \
+    })
+
+int EMSCRIPTEN_KEEPALIVE main(int argc, char** argv) {
+  {
+    i8x16 vec = {3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3};
+    expect_vec(i8x16_load(&vec),
+              ((i8x16){3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3}));
+    i8x16_store(&vec, (i8x16){7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7});
+    expect_vec(i8x16_load(&vec),
+              ((i8x16){7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7}));
+  }
+  expect_vec(i32x4_const(), ((i32x4){1, 2, 3, 4}));
+  expect_vec(
+    i8x16_shuffle_interleave_bytes(
+      (i8x16){1, 0, 3, 0, 5, 0, 7, 0, 9, 0, 11, 0, 13, 0, 15, 0},
+      (i8x16){0, 2, 0, 4, 0, 6, 0, 8, 0, 10, 0, 12, 0, 14, 0, 16}
+    ),
+    ((i8x16){1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16})
+  );
+  expect_vec(i32x4_shuffle_reverse((i32x4){1, 2, 3, 4}), ((i32x4){4, 3, 2, 1}));
+
+  // i8x16 lane accesses
+  expect_vec(i8x16_splat(5), ((i8x16){5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5}));
+  expect_vec(i8x16_splat(257), ((i8x16){1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1}));
+  expect_eq(i8x16_extract_lane_s_first((i8x16){-1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0}), -1);
+  expect_eq(i8x16_extract_lane_s_last((i8x16){0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, -1}), -1);
+#ifdef __wasm_unimplemented_simd128__
+  expect_eq(i8x16_extract_lane_u_first((i8x16){-1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0}), 255);
+  expect_eq(i8x16_extract_lane_u_last((i8x16){0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, -1}), 255);
+#endif // __wasm_unimplemented_simd128__
+  expect_vec(
+    i8x16_replace_lane_first((i8x16){0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0}, 7),
+    ((i8x16){7, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0})
+  );
+  expect_vec(
+    i8x16_replace_lane_last((i8x16){0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0}, 7),
+    ((i8x16){0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 7})
+  );
+
+  // i16x8 lane accesses
+  expect_vec(i16x8_splat(5), ((i16x8){5, 5, 5, 5, 5, 5, 5, 5}));
+  expect_vec(i16x8_splat(65537), ((i16x8){1, 1, 1, 1, 1, 1, 1, 1}));
+  expect_eq(i16x8_extract_lane_s_first((i16x8){-1, 0, 0, 0, 0, 0, 0, 0}), -1);
+  expect_eq(i16x8_extract_lane_s_last((i16x8){0, 0, 0, 0, 0, 0, 0, -1}), -1);
+#ifdef __wasm_unimplemented_simd128__
+  expect_eq(i16x8_extract_lane_u_first((i16x8){-1, 0, 0, 0, 0, 0, 0, 0}), 65535);
+  expect_eq(i16x8_extract_lane_u_last((i16x8){0, 0, 0, 0, 0, 0, 0, -1}), 65535);
+#endif // __wasm_unimplemented_simd128__
+  expect_vec(i16x8_replace_lane_first((i16x8){0, 0, 0, 0, 0, 0, 0, 0}, 7), ((i16x8){7, 0, 0, 0, 0, 0, 0, 0}));
+  expect_vec(i16x8_replace_lane_last((i16x8){0, 0, 0, 0, 0, 0, 0, 0}, 7), ((i16x8){0, 0, 0, 0, 0, 0, 0, 7}));
+
+  // i32x4 lane accesses
+  expect_vec(i32x4_splat(-5), ((i32x4){-5, -5, -5, -5}));
+  expect_eq(i32x4_extract_lane_first((i32x4){-5, 0, 0, 0}), -5);
+  expect_eq(i32x4_extract_lane_last((i32x4){0, 0, 0, -5}), -5);
+  expect_vec(i32x4_replace_lane_first((i32x4){0, 0, 0, 0}, 53), ((i32x4){53, 0, 0, 0}));
+  expect_vec(i32x4_replace_lane_last((i32x4){0, 0, 0, 0}, 53), ((i32x4){0, 0, 0, 53}));
+
+  // i64x2 lane accesses
+  expect_vec(i64x2_splat(-5), ((i64x2){-5, -5}));
+#ifdef __wasm_unimplemented_simd128__
+  expect_eq(i64x2_extract_lane_first((i64x2){-5, 0}), -5);
+  expect_eq(i64x2_extract_lane_last((i64x2){0, -5}), -5);
+  expect_vec(i64x2_replace_lane_first((i64x2){0, 0}, 53), ((i64x2){53, 0}));
+  expect_vec(i64x2_replace_lane_last((i64x2){0, 0}, 53), ((i64x2){0, 53}));
+#endif // __wasm_unimplemented_simd128__
+
+  // f32x4 lane accesses
+  expect_vec(f32x4_splat(-5), ((f32x4){-5, -5, -5, -5}));
+  expect_eq(f32x4_extract_lane_first((f32x4){-5, 0, 0, 0}), -5);
+  expect_eq(f32x4_extract_lane_last((f32x4){0, 0, 0, -5}), -5);
+  expect_vec(f32x4_replace_lane_first((f32x4){0, 0, 0, 0}, 53), ((f32x4){53, 0, 0, 0}));
+  expect_vec(f32x4_replace_lane_last((f32x4){0, 0, 0, 0}, 53), ((f32x4){0, 0, 0, 53}));
+
+  // f64x2 lane accesses
+  expect_vec(f64x2_splat(-5), ((f64x2){-5, -5}));
+#ifdef __wasm_unimplemented_simd128__
+  expect_eq(f64x2_extract_lane_first((f64x2){-5, 0}), -5);
+  expect_eq(f64x2_extract_lane_last((f64x2){0, -5}), -5);
+  expect_vec(f64x2_replace_lane_first((f64x2){0, 0}, 53), ((f64x2){53, 0}));
+  expect_vec(f64x2_replace_lane_last((f64x2){0, 0}, 53), ((f64x2){0, 53}));
+#endif // __wasm_unimplemented_simd128__
+
+  // i8x16 comparisons
+  expect_vec(
+    i8x16_eq(
+      (i8x16){0, 127, 13, 128,  1,  13, 129,  42, 0, 127, 255, 42,   1,  13, 129,  42},
+      (i8x16){0, 255, 13, 42, 129, 127,   0, 128, 0, 255,  13, 42, 129, 127,   0, 128}
+    ),
+    ((i8x16){-1, 0, -1, 0, 0, 0, 0, 0, -1, 0, 0, -1, 0, 0, 0, 0})
+  );
+  expect_vec(
+    i8x16_ne(
+      (i8x16){0, 127, 13, 128,  1,  13, 129,  42, 0, 127, 255, 42,   1,  13, 129,  42},
+      (i8x16){0, 255, 13, 42, 129, 127,   0, 128, 0, 255,  13, 42, 129, 127,   0, 128}
+    ),
+    ((i8x16){0, -1, 0, -1, -1, -1, -1, -1, 0, -1, -1, 0, -1, -1, -1, -1})
+  );
+  expect_vec(
+    i8x16_lt_s(
+      (i8x16){0, 127, 13, 128,  1,  13, 129,  42, 0, 127, 255, 42,   1,  13, 129,  42},
+      (i8x16){0, 255, 13, 42, 129, 127,   0, 128, 0, 255,  13, 42, 129, 127,   0, 128}
+    ),
+    ((i8x16){0, 0, 0, -1, 0, -1, -1, 0, 0, 0, -1, 0, 0, -1, -1, 0})
+  );
+  expect_vec(
+    i8x16_lt_u(
+      (i8x16){0, 127, 13, 128,  1,  13, 129,  42, 0, 127, 255, 42,   1,  13, 129,  42},
+      (i8x16){0, 255, 13, 42, 129, 127,   0, 128, 0, 255,  13, 42, 129, 127,   0, 128}
+    ),
+    ((i8x16){0, -1, 0, 0, -1, -1, 0, -1, 0, -1, 0, 0, -1, -1, 0, -1})
+  );
+  expect_vec(
+    i8x16_gt_s(
+      (i8x16){0, 127, 13, 128,  1,  13, 129,  42, 0, 127, 255, 42,   1,  13, 129,  42},
+      (i8x16){0, 255, 13, 42, 129, 127,   0, 128, 0, 255,  13, 42, 129, 127,   0, 128}
+    ),
+    ((i8x16){0, -1, 0, 0, -1, 0, 0, -1, 0, -1, 0, 0, -1, 0, 0, -1})
+  );
+  expect_vec(
+    i8x16_gt_u(
+      (i8x16){0, 127, 13, 128,  1,  13, 129,  42, 0, 127, 255, 42,   1,  13, 129,  42},
+      (i8x16){0, 255, 13, 42, 129, 127,   0, 128, 0, 255,  13, 42, 129, 127,   0, 128}
+    ),
+    ((i8x16){0, 0, 0, -1, 0, 0, -1, 0, 0, 0, -1, 0, 0, 0, -1, 0})
+  );
+  expect_vec(
+    i8x16_le_s(
+      (i8x16){0, 127, 13, 128,  1,  13, 129,  42, 0, 127, 255, 42,   1,  13, 129,  42},
+      (i8x16){0, 255, 13, 42, 129, 127,   0, 128, 0, 255,  13, 42, 129, 127,   0, 128}
+    ),
+    ((i8x16){-1, 0, -1, -1, 0, -1, -1, 0, -1, 0, -1, -1, 0, -1, -1, 0})
+  );
+  // bugs.chromium.org/p/v8/issues/detail?id=8635
+  // expect_vec(
+  //   i8x16_le_u(
+  //     (i8x16){0, 127, 13, 128,  1,  13, 129,  42, 0, 127, 255, 42,   1,  13, 129,  42},
+  //     (i8x16){0, 255, 13, 42, 129, 127,   0, 128, 0, 255,  13, 42, 129, 127,   0, 128}
+  //   ),
+  //   ((i8x16){-1, -1, -1, 0, -1, -1, 0, -1, -1, -1, 0, -1, -1, -1, 0, -1})
+  // );
+  expect_vec(
+    i8x16_ge_s(
+      (i8x16){0, 127, 13, 128,  1,  13, 129,  42, 0, 127, 255, 42,   1,  13, 129,  42},
+      (i8x16){0, 255, 13, 42, 129, 127,   0, 128, 0, 255,  13, 42, 129, 127,   0, 128}
+    ),
+    ((i8x16){-1, -1, -1, 0, -1, 0, 0, -1, -1, -1, 0, -1, -1, 0, 0, -1})
+  );
+  // expect_vec(
+  //   i8x16_ge_u(
+  //     (i8x16){0, 127, 13, 128,  1,  13, 129,  42, 0, 127, 255, 42,   1,  13, 129,  42},
+  //     (i8x16){0, 255, 13, 42, 129, 127,   0, 128, 0, 255,  13, 42, 129, 127,   0, 128}
+  //   ),
+  //   ((i8x16){-1, 0, -1, -1, 0, 0, -1, 0, -1, 0, -1, -1, 0, 0, -1, 0})
+  // );
+
+  // i16x8 comparisons
+  expect_vec(
+    i16x8_eq(
+      (i16x8){0, 32767, 13, -32768,      1, -32767,     42, -25536},
+      (i16x8){0,    13,  1,  32767, -32767,     42, -25536,  32767}
+    ),
+    ((i16x8){-1, 0, 0, 0, 0, 0, 0, 0})
+  );
+  expect_vec(
+    i16x8_ne(
+      (i16x8){0, 32767, 13, -32768,      1, -32767,     42, -25536},
+      (i16x8){0,    13,  1,  32767, -32767,     42, -25536,  32767}
+    ),
+    ((i16x8){0, -1, -1, -1, -1, -1, -1, -1})
+  );
+  expect_vec(
+    i16x8_lt_s(
+      (i16x8){0, 32767, 13, -32768,      1, -32767,     42, -25536},
+      (i16x8){0,    13,  1,  32767, -32767,     42, -25536,  32767}
+    ),
+    ((i16x8){0, 0, 0, -1, 0, -1, 0, -1})
+  );
+  expect_vec(
+    i16x8_lt_u(
+      (i16x8){0, 32767, 13, -32768,      1, -32767,     42, -25536},
+      (i16x8){0,    13,  1,  32767, -32767,     42, -25536,  32767}
+    ),
+    ((i16x8){0, 0, 0, 0, -1, 0, -1, 0})
+  );
+  expect_vec(
+    i16x8_gt_s(
+      (i16x8){0, 32767, 13, -32768,      1, -32767,     42, -25536},
+      (i16x8){0,    13,  1,  32767, -32767,     42, -25536,  32767}
+    ),
+    ((i16x8){0, -1, -1, 0, -1, 0, -1, 0})
+  );
+  expect_vec(
+    i16x8_gt_u(
+      (i16x8){0, 32767, 13, -32768,      1, -32767,     42, -25536},
+      (i16x8){0,    13,  1,  32767, -32767,     42, -25536,  32767}
+    ),
+    ((i16x8){0, -1, -1, -1, 0, -1, 0, -1})
+  );
+  expect_vec(
+    i16x8_le_s(
+      (i16x8){0, 32767, 13, -32768,      1, -32767,     42, -25536},
+      (i16x8){0,    13,  1,  32767, -32767,     42, -25536,  32767}
+    ),
+    ((i16x8){-1, 0, 0, -1, 0, -1, 0, -1})
+  );
+  expect_vec(
+    i16x8_le_u(
+      (i16x8){0, 32767, 13, -32768,      1, -32767,     42, -25536},
+      (i16x8){0,    13,  1,  32767, -32767,     42, -25536,  32767}
+    ),
+    ((i16x8){-1, 0, 0, 0, -1, 0, -1, 0})
+  );
+  expect_vec(
+    i16x8_ge_s(
+      (i16x8){0, 32767, 13, -32768,      1, -32767,     42, -25536},
+      (i16x8){0,    13,  1,  32767, -32767,     42, -25536,  32767}
+    ),
+    ((i16x8){-1, -1, -1, 0, -1, 0, -1, 0})
+  );
+  expect_vec(
+    i16x8_ge_u(
+      (i16x8){0, 32767, 13, -32768,      1, -32767,     42, -25536},
+      (i16x8){0,    13,  1,  32767, -32767,     42, -25536,  32767}
+    ),
+    ((i16x8){-1, -1, -1, -1, 0, -1, 0, -1})
+  );
+
+  // i342x4 comparisons
+  expect_vec(
+    i32x4_eq((i32x4){0, -1, 53, -7}, (i32x4){0, 53, -7, -1}), ((i32x4){-1, 0, 0, 0})
+  );
+  expect_vec(
+    i32x4_ne((i32x4){0, -1, 53, -7}, (i32x4){0, 53, -7, -1}), ((i32x4){0, -1, -1, -1})
+  );
+  expect_vec(
+    i32x4_lt_s((i32x4){0, -1, 53, -7}, (i32x4){0, 53, -7, -1}), ((i32x4){0, -1, 0, -1})
+  );
+  expect_vec(
+    i32x4_lt_u((i32x4){0, -1, 53, -7}, (i32x4){0, 53, -7, -1}), ((i32x4){0, 0, -1, -1})
+  );
+  expect_vec(
+    i32x4_gt_s((i32x4){0, -1, 53, -7}, (i32x4){0, 53, -7, -1}), ((i32x4){0, 0, -1, 0})
+  );
+  expect_vec(
+    i32x4_gt_u((i32x4){0, -1, 53, -7}, (i32x4){0, 53, -7, -1}), ((i32x4){0, -1, 0, 0})
+  );
+  expect_vec(
+    i32x4_le_s((i32x4){0, -1, 53, -7}, (i32x4){0, 53, -7, -1}), ((i32x4){-1, -1, 0, -1})
+  );
+  expect_vec(
+    i32x4_le_u((i32x4){0, -1, 53, -7}, (i32x4){0, 53, -7, -1}), ((i32x4){-1, 0, -1, -1})
+  );
+  expect_vec(
+    i32x4_ge_s((i32x4){0, -1, 53, -7}, (i32x4){0, 53, -7, -1}), ((i32x4){-1, 0, -1, 0})
+  );
+  expect_vec(
+    i32x4_ge_u((i32x4){0, -1, 53, -7}, (i32x4){0, 53, -7, -1}), ((i32x4){-1, -1, 0, 0})
+  );
+
+  // f32x4 comparisons
+  expect_vec(
+    f32x4_eq((f32x4){0, -1, 1, 0}, (f32x4){0, 0, -1, 1}), ((i32x4){-1, 0, 0, 0})
+  );
+  expect_vec(
+    f32x4_ne((f32x4){0, -1, 1, 0}, (f32x4){0, 0, -1, 1}), ((i32x4){0, -1, -1, -1})
+  );
+  expect_vec(
+    f32x4_lt((f32x4){0, -1, 1, 0}, (f32x4){0, 0, -1, 1}), ((i32x4){0, -1, 0, -1})
+  );
+  expect_vec(
+    f32x4_gt((f32x4){0, -1, 1, 0}, (f32x4){0, 0, -1, 1}), ((i32x4){0, 0, -1, 0})
+  );
+  expect_vec(
+    f32x4_le((f32x4){0, -1, 1, 0}, (f32x4){0, 0, -1, 1}), ((i32x4){-1, -1, 0, -1})
+  );
+  expect_vec(
+    f32x4_ge((f32x4){0, -1, 1, 0}, (f32x4){0, 0, -1, 1}), ((i32x4){-1, 0, -1, 0})
+  );
+  expect_vec(
+    f32x4_eq((f32x4){NAN, 0, NAN, INFINITY}, (f32x4){0, NAN, NAN, INFINITY}),
+    ((i32x4){0, 0, 0, -1})
+  );
+  expect_vec(
+    f32x4_ne((f32x4){NAN, 0, NAN, INFINITY}, (f32x4){0, NAN, NAN, INFINITY}),
+    ((i32x4){-1, -1, -1, 0})
+  );
+  expect_vec(
+    f32x4_lt((f32x4){NAN, 0, NAN, INFINITY}, (f32x4){0, NAN, NAN, INFINITY}),
+    ((i32x4){0, 0, 0, 0})
+  );
+  expect_vec(
+    f32x4_gt((f32x4){NAN, 0, NAN, INFINITY}, (f32x4){0, NAN, NAN, INFINITY}),
+    ((i32x4){0, 0, 0, 0})
+  );
+  expect_vec(
+    f32x4_le((f32x4){NAN, 0, NAN, INFINITY}, (f32x4){0, NAN, NAN, INFINITY}),
+    ((i32x4){0, 0, 0, -1})
+  );
+  expect_vec(
+    f32x4_ge((f32x4){NAN, 0, NAN, INFINITY}, (f32x4){0, NAN, NAN, INFINITY}),
+    ((i32x4){0, 0, 0, -1})
+  );
+  expect_vec(
+    f32x4_eq((f32x4){-INFINITY, 0, NAN, -INFINITY}, (f32x4){0, INFINITY, INFINITY, NAN}),
+    ((i32x4){0, 0, 0, 0})
+  );
+  expect_vec(
+    f32x4_ne((f32x4){-INFINITY, 0, NAN, -INFINITY}, (f32x4){0, INFINITY, INFINITY, NAN}),
+    ((i32x4){-1, -1, -1, -1})
+  );
+  expect_vec(
+    f32x4_lt((f32x4){-INFINITY, 0, NAN, -INFINITY}, (f32x4){0, INFINITY, INFINITY, NAN}),
+    ((i32x4){-1, -1, 0, 0})
+  );
+  expect_vec(
+    f32x4_gt((f32x4){-INFINITY, 0, NAN, -INFINITY}, (f32x4){0, INFINITY, INFINITY, NAN}),
+    ((i32x4){0, 0, 0, 0})
+  );
+  expect_vec(
+    f32x4_le((f32x4){-INFINITY, 0, NAN, -INFINITY}, (f32x4){0, INFINITY, INFINITY, NAN}),
+    ((i32x4){-1, -1, 0, 0})
+  );
+  expect_vec(
+    f32x4_ge((f32x4){-INFINITY, 0, NAN, -INFINITY}, (f32x4){0, INFINITY, INFINITY, NAN}),
+    ((i32x4){0, 0, 0, 0})
+  );
+
+  // f64x2 comparisons
+  expect_vec(f64x2_eq((f64x2){0, 1}, (f64x2){0, 0}), ((i64x2){-1, 0}));
+  expect_vec(f64x2_ne((f64x2){0, 1}, (f64x2){0, 0}), ((i64x2){0, -1}));
+  expect_vec(f64x2_lt((f64x2){0, 1}, (f64x2){0, 0}), ((i64x2){0, 0}));
+  expect_vec(f64x2_gt((f64x2){0, 1}, (f64x2){0, 0}), ((i64x2){0, -1}));
+  expect_vec(f64x2_le((f64x2){0, 1}, (f64x2){0, 0}), ((i64x2){-1, 0}));
+  expect_vec(f64x2_ge((f64x2){0, 1}, (f64x2){0, 0}), ((i64x2){-1, -1}));
+  expect_vec(f64x2_eq((f64x2){NAN, 0}, (f64x2){INFINITY, INFINITY}), ((i64x2){0, 0}));
+  expect_vec(f64x2_ne((f64x2){NAN, 0}, (f64x2){INFINITY, INFINITY}), ((i64x2){-1, -1}));
+  expect_vec(f64x2_lt((f64x2){NAN, 0}, (f64x2){INFINITY, INFINITY}), ((i64x2){0, -1}));
+  expect_vec(f64x2_gt((f64x2){NAN, 0}, (f64x2){INFINITY, INFINITY}), ((i64x2){0, 0}));
+  expect_vec(f64x2_le((f64x2){NAN, 0}, (f64x2){INFINITY, INFINITY}), ((i64x2){0, -1}));
+  expect_vec(f64x2_ge((f64x2){NAN, 0}, (f64x2){INFINITY, INFINITY}), ((i64x2){0, 0}));
+
+  // bitwise operations
+  expect_vec(i8x16_not((i8x16)(i32x4){0, -1, 0, -1}), (i8x16)((i32x4){-1, 0, -1, 0}));
+  expect_vec(
+    i8x16_and((i8x16)(i32x4){0, 0, -1, -1}, (i8x16)(i32x4){0, -1, 0, -1}),
+    (i8x16)((i32x4){0, 0, 0, -1})
+  );
+  expect_vec(
+    i8x16_or((i8x16)(i32x4){0, 0, -1, -1}, (i8x16)(i32x4){0, -1, 0, -1}),
+    (i8x16)((i32x4){0, -1, -1, -1})
+  );
+  expect_vec(
+    i8x16_xor((i8x16)(i32x4){0, 0, -1, -1}, (i8x16)(i32x4){0, -1, 0, -1}),
+    (i8x16)((i32x4){0, -1, -1, 0})
+  );
+  expect_vec(
+    i8x16_bitselect(
+      (i8x16)(i32x4){0xAAAAAAAA, 0xAAAAAAAA, 0xAAAAAAAA, 0xAAAAAAAA},
+      (i8x16)(i32x4){0xBBBBBBBB, 0xBBBBBBBB, 0xBBBBBBBB, 0xBBBBBBBB},
+      (i8x16)(i32x4){0xF0F0F0F0, 0xFFFFFFFF, 0x00000000, 0xFF00FF00}
+    ),
+    (i8x16)((i32x4){0xABABABAB, 0xAAAAAAAA, 0xBBBBBBBB, 0xAABBAABB})
+  );
+
+  // i8x16 arithmetic
+  expect_vec(
+    i8x16_neg((i8x16){0, 1, 42, -3, -56, 127, -128, -126, 0, -1, -42, 3, 56, -127, -128, 126}),
+    ((i8x16){0, -1, -42, 3, 56, -127, -128, 126, 0, 1, 42, -3, -56, 127, -128, -126})
+  );
+  expect_eq(i8x16_any_true((i8x16){0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0}), 0);
+  expect_eq(i8x16_any_true((i8x16){0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0}), 1);
+  expect_eq(i8x16_any_true((i8x16){1, 1, 1, 1, 1, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1}), 1);
+  expect_eq(i8x16_any_true((i8x16){1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1}), 1);
+  expect_eq(i8x16_all_true((i8x16){0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0}), 0);
+  expect_eq(i8x16_all_true((i8x16){0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0}), 0);
+  expect_eq(i8x16_all_true((i8x16){1, 1, 1, 1, 1, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1}), 0);
+  expect_eq(i8x16_all_true((i8x16){1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1}), 1);
+  expect_vec(
+    i8x16_shl((i8x16){0, 1, 2, 4, 8, 16, 32, 64, -128, 3, 6, 12, 24, 48, 96, -64}, 1),
+    ((i8x16){0, 2, 4, 8, 16, 32, 64, -128, 0, 6, 12, 24, 48, 96, -64, -128})
+  );
+  expect_vec(
+    i8x16_shl((i8x16){0, 1, 2, 4, 8, 16, 32, 64, -128, 3, 6, 12, 24, 48, 96, -64}, 8),
+    ((i8x16){0, 1, 2, 4, 8, 16, 32, 64, -128, 3, 6, 12, 24, 48, 96, -64})
+  );
+  expect_vec(
+    i8x16_shr_u((i8x16){0, 1, 2, 4, 8, 16, 32, 64, -128, 3, 6, 12, 24, 48, 96, -64}, 1),
+    ((i8x16){0, 0, 1, 2, 4, 8, 16, 32, 64, 1, 3, 6, 12, 24, 48, 96})
+  );
+  expect_vec(
+    i8x16_shr_u((i8x16){0, 1, 2, 4, 8, 16, 32, 64, -128, 3, 6, 12, 24, 48, 96, -64}, 8),
+    ((i8x16){0, 1, 2, 4, 8, 16, 32, 64, -128, 3, 6, 12, 24, 48, 96, -64})
+  );
+  expect_vec(
+    i8x16_shr_s((i8x16){0, 1, 2, 4, 8, 16, 32, 64, -128, 3, 6, 12, 24, 48, 96, -64}, 1),
+    ((i8x16){0, 0, 1, 2, 4, 8, 16, 32, -64, 1, 3, 6, 12, 24, 48, -32})
+  );
+  expect_vec(
+    i8x16_shr_s((i8x16){0, 1, 2, 4, 8, 16, 32, 64, -128, 3, 6, 12, 24, 48, 96, -64}, 8),
+    ((i8x16){0, 1, 2, 4, 8, 16, 32, 64, -128, 3, 6, 12, 24, 48, 96, -64})
+  );
+  expect_vec(
+    i8x16_add(
+      (i8x16){0, 42, 255, 128, 127, 129, 6, 29, 103, 196, 231, 142, 17, 250, 1, 73},
+      (i8x16){3, 231, 1, 128, 129, 6, 103, 17, 42, 29, 73, 42, 0, 255, 127, 142}
+    ),
+    ((i8x16){3, 17, 0, 0, 0, 135, 109, 46, 145, 225, 48, 184, 17, 249, 128, 215})
+  );
+  expect_vec(
+    i8x16_add_saturate_s(
+      (i8x16){0, 42, 255, 128, 127, 129, 6, 29, 103, 196, 231, 142, 17, 250, 1, 73},
+      (i8x16){3, 231, 1, 128, 129, 6, 103, 17, 42, 29, 73, 42, 0, 255, 127, 142}
+    ),
+    ((i8x16){3, 17, 0, 128, 0, 135, 109, 46, 127, 225, 48, 184, 17, 249, 127, 215})
+  );
+  expect_vec(
+    i8x16_add_saturate_u(
+      (i8x16){0, 42, 255, 128, 127, 129, 6, 29, 103, 196, 231, 142, 17, 250, 1, 73},
+      (i8x16){3, 231, 1, 128, 129, 6, 103, 17, 42, 29, 73, 42, 0, 255, 127, 142}
+    ),
+    ((i8x16){3, 255, 255, 255, 255, 135, 109, 46, 145, 225, 255, 184, 17, 255, 128, 215})
+  );
+  expect_vec(
+    i8x16_sub(
+      (i8x16){0, 42, 255, 128, 127, 129, 6, 29, 103, 196, 231, 142, 17, 250, 1, 73},
+      (i8x16){3, 231, 1, 128, 129, 6, 103, 17, 42, 29, 73, 42, 0, 255, 127, 142}
+    ),
+    ((i8x16){253, 67, 254, 0, 254, 123, 159, 12, 61, 167, 158, 100, 17, 251, 130, 187})
+  );
+  expect_vec(
+    i8x16_sub_saturate_s(
+      (i8x16){0, 42, 255, 128, 127, 129, 6, 29, 103, 196, 231, 142, 17, 250, 1, 73},
+      (i8x16){3, 231, 1, 128, 129, 6, 103, 17, 42, 29, 73, 42, 0, 255, 127, 142}
+    ),
+    ((i8x16){253, 67, 254, 0, 127, 128, 159, 12, 61, 167, 158, 128, 17, 251, 130, 127})
+  );
+  expect_vec(
+    i8x16_sub_saturate_u(
+      (i8x16){0, 42, 255, 128, 127, 129, 6, 29, 103, 196, 231, 142, 17, 250, 1, 73},
+      (i8x16){3, 231, 1, 128, 129, 6, 103, 17, 42, 29, 73, 42, 0, 255, 127, 142}
+    ),
+    ((i8x16){0, 0, 254, 0, 0, 123, 0, 12, 61, 167, 158, 100, 17, 0, 0, 0})
+  );
+  expect_vec(
+    i8x16_mul(
+      (i8x16){0, 42, 255, 128, 127, 129, 6, 29, 103, 196, 231, 142, 17, 250, 1, 73},
+      (i8x16){3, 231, 1, 128, 129, 6, 103, 17, 42, 29, 73, 42, 0, 255, 127, 142}
+    ),
+    ((i8x16){0, 230, 255, 0, 255, 6, 106, 237, 230, 52, 223, 76, 0, 6, 127, 126})
+  );
+
+  // i16x8 arithmetic
+  expect_vec(
+    i16x8_neg((i16x8){0, 1, 42, -3, -56, 32767, -32768, 32766}),
+    ((i16x8){0, -1, -42, 3, 56, -32767, -32768, -32766})
+  );
+  expect_eq(i16x8_any_true((i16x8){0, 0, 0, 0, 0, 0, 0, 0}), 0);
+  expect_eq(i16x8_any_true((i16x8){0, 0, 1, 0, 0, 0, 0, 0}), 1);
+  expect_eq(i16x8_any_true((i16x8){1, 1, 1, 1, 1, 0, 1, 1}), 1);
+  expect_eq(i16x8_any_true((i16x8){1, 1, 1, 1, 1, 1, 1, 1}), 1);
+  expect_eq(i16x8_all_true((i16x8){0, 0, 0, 0, 0, 0, 0, 0}), 0);
+  expect_eq(i16x8_all_true((i16x8){0, 0, 1, 0, 0, 0, 0, 0}), 0);
+  expect_eq(i16x8_all_true((i16x8){1, 1, 1, 1, 1, 0, 1, 1}), 0);
+  expect_eq(i16x8_all_true((i16x8){1, 1, 1, 1, 1, 1, 1, 1}), 1);
+  expect_vec(
+    i16x8_shl((i16x8){0, 8, 16, 128, 256, 2048, 4096, -32768}, 1),
+    ((i16x8){0, 16, 32, 256, 512, 4096, 8192, 0})
+  );
+  expect_vec(
+    i16x8_shl((i16x8){0, 8, 16, 128, 256, 2048, 4096, -32768}, 16),
+    ((i16x8){0, 8, 16, 128, 256, 2048, 4096, -32768})
+  );
+  expect_vec(
+    i16x8_shr_u((i16x8){0, 8, 16, 128, 256, 2048, 4096, -32768}, 1),
+    ((i16x8){0, 4, 8, 64, 128, 1024, 2048, 16384})
+  );
+  expect_vec(
+    i16x8_shr_u((i16x8){0, 8, 16, 128, 256, 2048, 4096, -32768}, 16),
+    ((i16x8){0, 8, 16, 128, 256, 2048, 4096, -32768})
+  );
+  expect_vec(
+    i16x8_shr_s((i16x8){0, 8, 16, 128, 256, 2048, 4096, -32768}, 1),
+    ((i16x8){0, 4, 8, 64, 128, 1024, 2048, -16384})
+  );
+  expect_vec(
+    i16x8_shr_s((i16x8){0, 8, 16, 128, 256, 2048, 4096, -32768}, 16),
+    ((i16x8){0, 8, 16, 128, 256, 2048, 4096, -32768})
+  );
+  expect_vec(
+    i16x8_add(
+      (i16x8){0, -256, -32768, 32512, -32512, -6400, -1536, 32766},
+      (i16x8){768, 1, -32768, -32512, 1536, 18688, -256, 2}
+    ),
+    ((i16x8){768, -255, 0, 0, -30976, 12288, -1792, -32768})
+  );
+  expect_vec(
+    i16x8_add_saturate_s(
+      (i16x8){0, -256, -32768, 32512, -32512, -6400, -1536, 32766},
+      (i16x8){768, 1, -32768, -32512, 1536, 18688, -256, 2}
+    ),
+    ((i16x8){768, -255, -32768, 0, -30976, 12288, -1792, 32767})
+  );
+  expect_vec(
+    i16x8_add_saturate_u(
+      (i16x8){0, -256, -32768, 32512, -32512, -6400, -1536, 32766},
+      (i16x8){768, 1, -32768, -32512, 1536, 18688, -256, 2}
+    ),
+    ((i16x8){768, -255, -1, -1, -30976, -1, -1, -32768})
+  );
+  expect_vec(
+    i16x8_sub(
+      (i16x8){0, -256, -32768, 32512, -32512, -6400, -1536, 32766},
+      (i16x8){768, 1, -32768, -32512, 1536, 18688, -256, 2}
+    ),
+    ((i16x8){-768, -257, 0, -512, 31488, -25088, -1280, 32764})
+  );
+  expect_vec(
+    i16x8_sub_saturate_s(
+      (i16x8){0, -256, -32768, 32512, -32512, -6400, -1536, 32766},
+      (i16x8){768, 1, -32768, -32512, 1536, 18688, -256, 2}
+    ),
+    ((i16x8){-768, -257, 0, 32767, -32768, -25088, -1280, 32764})
+  );
+  expect_vec(
+    i16x8_sub_saturate_u(
+      (i16x8){0, -256, -32768, 32512, -32512, -6400, -1536, 32766},
+      (i16x8){768, 1, -32768, -32512, 1536, 18688, -256, 2}
+    ),
+    ((i16x8){0, -257, 0, 0, 31488, -25088, 0, 32764})
+  );
+  expect_vec(
+    i16x8_mul(
+      (i16x8){0, -256, -32768, 32512, -32512, -6400, -1536, 32766},
+      (i16x8){768, 1, -32768, -32512, 1536, 18688, -256, 2}
+    ),
+    ((i16x8){0, -256, 0, 0, 0, 0, 0, -4})
+  );
+
+  // i32x4 arithmetic
+  expect_vec(i32x4_neg((i32x4){0, 1, 0x80000000, 0x7fffffff}), ((i32x4){0, -1, 0x80000000, 0x80000001}));
+  expect_eq(i32x4_any_true((i32x4){0, 0, 0, 0}), 0);
+  expect_eq(i32x4_any_true((i32x4){0, 0, 1, 0}), 1);
+  expect_eq(i32x4_any_true((i32x4){1, 0, 1, 1}), 1);
+  expect_eq(i32x4_any_true((i32x4){1, 1, 1, 1}), 1);
+  expect_eq(i32x4_all_true((i32x4){0, 0, 0, 0}), 0);
+  expect_eq(i32x4_all_true((i32x4){0, 0, 1, 0}), 0);
+  expect_eq(i32x4_all_true((i32x4){1, 0, 1, 1}), 0);
+  expect_eq(i32x4_all_true((i32x4){1, 1, 1, 1}), 1);
+  expect_vec(i32x4_shl((i32x4){1, 0x40000000, 0x80000000, -1}, 1), ((i32x4){2, 0x80000000, 0, -2}));
+  expect_vec(i32x4_shl((i32x4){1, 0x40000000, 0x80000000, -1}, 32), ((i32x4){1, 0x40000000, 0x80000000, -1}));
+  expect_vec(i32x4_shr_s((i32x4){1, 0x40000000, 0x80000000, -1}, 1), ((i32x4){0, 0x20000000, 0xc0000000, -1}));
+  expect_vec(i32x4_shr_s((i32x4){1, 0x40000000, 0x80000000, -1}, 32), ((i32x4){1, 0x40000000, 0x80000000, -1}));
+  expect_vec(i32x4_shr_u((i32x4){1, 0x40000000, 0x80000000, -1}, 1), ((i32x4){0, 0x20000000, 0x40000000, 0x7fffffff}));
+  expect_vec(i32x4_shr_u((i32x4){1, 0x40000000, 0x80000000, -1}, 32), ((i32x4){1, 0x40000000, 0x80000000, -1}));
+  expect_vec(i32x4_add((i32x4){0, 0x80000001, 42, 5}, (i32x4){0, 0x80000001, 5, 42}), ((i32x4){0, 2, 47, 47}));
+  expect_vec(i32x4_sub((i32x4){0, 2, 47, 47}, (i32x4){0, 0x80000001, 42, 5}), ((i32x4){0, 0x80000001, 5, 42}));
+  expect_vec(i32x4_mul((i32x4){0, 0x80000001, 42, 5}, (i32x4){0, 0x80000001, 42, 5}), ((i32x4){0, 1, 1764, 25}));
+
+
+  // i64x2 arithmetic
+  expect_vec(i64x2_neg((i64x2){0x8000000000000000, 42}), ((i64x2){0x8000000000000000, -42}));
+#ifdef __wasm_unimplemented_simd128__
+  expect_eq(i64x2_any_true((i64x2){0, 0}), 0);
+  expect_eq(i64x2_any_true((i64x2){1, 0}), 1);
+  expect_eq(i64x2_any_true((i64x2){1, 1}), 1);
+  expect_eq(i64x2_all_true((i64x2){0, 0}), 0);
+  expect_eq(i64x2_all_true((i64x2){1, 0}), 0);
+  expect_eq(i64x2_all_true((i64x2){1, 1}), 1);
+#endif // __wasm_unimplemented_simd128__
+  expect_vec(i64x2_shl((i64x2){1, 0x8000000000000000}, 1), ((i64x2){2, 0}));
+  expect_vec(i64x2_shl((i64x2){1, 0x8000000000000000}, 64), ((i64x2){1, 0x8000000000000000}));
+  expect_vec(i64x2_shr_s((i64x2){1, 0x8000000000000000}, 1), ((i64x2){0, 0xc000000000000000}));
+  expect_vec(i64x2_shr_s((i64x2){1, 0x8000000000000000}, 64), ((i64x2){1, 0x8000000000000000}));
+  expect_vec(i64x2_shr_u((i64x2){1, 0x8000000000000000}, 1), ((i64x2){0, 0x4000000000000000}));
+  expect_vec(i64x2_shr_u((i64x2){1, 0x8000000000000000}, 64), ((i64x2){1, 0x8000000000000000}));
+  expect_vec(i64x2_add((i64x2){0x8000000000000001, 42}, (i64x2){0x8000000000000001, 0}), ((i64x2){2, 42}));
+  expect_vec(i64x2_sub((i64x2){2, 42}, (i64x2){0x8000000000000001, 0}), ((i64x2){0x8000000000000001, 42}));
+
+  // f32x4 arithmetic
+  expect_vec(f32x4_abs((f32x4){-0., NAN, -INFINITY, 5}), ((f32x4){0, NAN, INFINITY, 5}));
+  expect_vec(f32x4_neg((f32x4){-0., NAN, -INFINITY, 5}), ((f32x4){0, -NAN, INFINITY, -5}));
+#ifdef __wasm_unimplemented_simd128__
+  expect_vec(f32x4_sqrt((f32x4){-0., NAN, INFINITY, 4}), ((f32x4){-0., NAN, INFINITY, 2}));
+#endif // __wasm_unimplemented_simd128__
+  expect_vec(f32x4_add((f32x4){NAN, -NAN, INFINITY, 42}, (f32x4){42, INFINITY, INFINITY, 1}), ((f32x4){NAN, -NAN, INFINITY, 43}));
+  expect_vec(f32x4_sub((f32x4){NAN, -NAN, INFINITY, 42}, (f32x4){42, INFINITY, -INFINITY, 1}), ((f32x4){NAN, -NAN, INFINITY, 41}));
+  expect_vec(f32x4_mul((f32x4){NAN, -NAN, INFINITY, 42}, (f32x4){42, INFINITY, INFINITY, 2}), ((f32x4){NAN, -NAN, INFINITY, 84}));
+  expect_vec(f32x4_div((f32x4){NAN, -NAN, INFINITY, 42}, (f32x4){42, INFINITY, 2, 2}), ((f32x4){NAN, -NAN, INFINITY, 21}));
+  // expect_vec(f32x4_min((f32x4){-0., 0, NAN, 5}, (f32x4){0, -0., 5, NAN}), ((f32x4){-0., -0., NAN, NAN}));
+  // expect_vec(f32x4_max((f32x4){-0., 0, NAN, 5}, (f32x4){0, -0., 5, NAN}), ((f32x4){0, 0, NAN, NAN}));
+
+  // f64x2 arithmetic
+#ifdef __wasm_unimplemented_simd128__
+  expect_vec(f64x2_abs((f64x2){-0., NAN}), ((f64x2){0, NAN}));
+  expect_vec(f64x2_abs((f64x2){-INFINITY, 5}), ((f64x2){INFINITY, 5}));
+#endif // __wasm_unimplemented_simd128__
+  expect_vec(f64x2_neg((f64x2){-0., NAN}), ((f64x2){0, -NAN}));
+  expect_vec(f64x2_neg((f64x2){-INFINITY, 5}), ((f64x2){INFINITY, -5}));
+#ifdef __wasm_unimplemented_simd128__
+  expect_vec(f64x2_sqrt((f64x2){-0., NAN}), ((f64x2){-0., NAN}));
+  expect_vec(f64x2_sqrt((f64x2){INFINITY, 4}), ((f64x2){INFINITY, 2}));
+#endif // __wasm_unimplemented_simd128__
+  expect_vec(f64x2_add((f64x2){NAN, -NAN}, (f64x2){42, INFINITY}), ((f64x2){NAN, -NAN}));
+  expect_vec(f64x2_add((f64x2){INFINITY, 42}, (f64x2){INFINITY, 1}), ((f64x2){INFINITY, 43}));
+  expect_vec(f64x2_sub((f64x2){NAN, -NAN}, (f64x2){42, INFINITY}), ((f64x2){NAN, -NAN}));
+  expect_vec(f64x2_sub((f64x2){INFINITY, 42}, (f64x2){-INFINITY, 1}), ((f64x2){INFINITY, 41}));
+  expect_vec(f64x2_mul((f64x2){NAN, -NAN}, (f64x2){42, INFINITY}), ((f64x2){NAN, -NAN}));
+  expect_vec(f64x2_mul((f64x2){INFINITY, 42}, (f64x2){INFINITY, 2}), ((f64x2){INFINITY, 84}));
+  expect_vec(f64x2_div((f64x2){NAN, -NAN}, (f64x2){42, INFINITY}), ((f64x2){NAN, -NAN}));
+  expect_vec(f64x2_div((f64x2){INFINITY, 42}, (f64x2){2, 2}), ((f64x2){INFINITY, 21}));
+#ifdef __wasm_unimplemented_simd128__
+  expect_vec(f64x2_min((f64x2){-0., 0}, (f64x2){0, -0}), ((f64x2){-0., -0}));
+  expect_vec(f64x2_min((f64x2){NAN, 5}, (f64x2){5, NAN}), ((f64x2){NAN, NAN}));
+  expect_vec(f64x2_max((f64x2){-0., 0}, (f64x2){0, -0}), ((f64x2){0, 0}));
+  expect_vec(f64x2_max((f64x2){NAN, 5}, (f64x2){5, NAN}), ((f64x2){NAN, NAN}));
+#endif // __wasm_unimplemented_simd128__
+
+  // conversions
+  expect_vec(i32x4_trunc_s_f32x4_sat((f32x4){42, NAN, INFINITY, -INFINITY}), ((i32x4){42, 0, 2147483647, -2147483648ll}));
+  expect_vec(i32x4_trunc_u_f32x4_sat((f32x4){42, NAN, INFINITY, -INFINITY}), ((i32x4){42, 0, 4294967295ull, 0}));
+#ifdef __wasm_unimplemented_simd128__
+  expect_vec(i64x2_trunc_s_f64x2_sat((f64x2){42, NAN}), ((i64x2){42, 0}));
+  expect_vec(i64x2_trunc_s_f64x2_sat((f64x2){INFINITY, -INFINITY}), ((i64x2){9223372036854775807ll, -9223372036854775807ll - 1}));
+  expect_vec(i64x2_trunc_u_f64x2_sat((f64x2){42, NAN}), ((i64x2){42, 0}));
+  expect_vec(i64x2_trunc_u_f64x2_sat((f64x2){INFINITY, -INFINITY}), ((i64x2){18446744073709551615ull, 0}));
+#endif // __wasm_unimplemented_simd128__
+  expect_vec(f32x4_convert_s_i32x4((i32x4){0, -1, 2147483647, -2147483647 - 1}), ((f32x4){0, -1, 2147483648., -2147483648.}));
+  expect_vec(f32x4_convert_u_i32x4((i32x4){0, -1, 2147483647, -2147483647 - 1}), ((f32x4){0, 4294967296., 2147483648., 2147483648.}));
+  expect_vec(f64x2_convert_s_i64x2((i64x2){0, -1}), ((f64x2){0, -1}));
+  expect_vec(f64x2_convert_s_i64x2((i64x2){9223372036854775807, -9223372036854775807 - 1}), ((f64x2){9223372036854775807., -9223372036854775808.}));
+  expect_vec(f64x2_convert_u_i64x2((i64x2){0, -1}), ((f64x2){0, 18446744073709551616.}));
+  expect_vec(f64x2_convert_u_i64x2((i64x2){9223372036854775807 , -9223372036854775808.}), ((f64x2){9223372036854775807., 9223372036854775808.}));
+
+  if (failures == 0) {
+    printf("Success!\n");
+  } else {
+    printf("Failed :(\n");
+  }
+}

From 1c515fcac1c0612789db6f9f289e715844ec5be2 Mon Sep 17 00:00:00 2001
From: Thomas Lively <tlively@google.com>
Date: Mon, 6 May 2019 21:39:02 -0700
Subject: [PATCH 02/16] Get intrinsic tests compiling with -msimd128

---
 system/include/simd128.h          |  827 +++++-----
 tests/test_core.py                |    1 +
 tests/test_wasm_intrinsics_simd.c | 2562 ++++++++++++++---------------
 3 files changed, 1658 insertions(+), 1732 deletions(-)

diff --git a/system/include/simd128.h b/system/include/simd128.h
index 0533644300fe7..6ddeb07c13923 100644
--- a/system/include/simd128.h
+++ b/system/include/simd128.h
@@ -1,7 +1,10 @@
 /*
-Proposed WebAssembly SIMD Instructions 1 April 2019
+WebAssembly SIMD128 Intrinsics
 */
-#include <stdint.h>  //rrw where to get uintx_t's from?
+
+#include <stdint.h>
+
+// User-facing types
 typedef int8_t v128 __attribute__((__vector_size__(16)));
 typedef int8_t i8x16 __attribute__((__vector_size__(16)));
 typedef uint8_t u8x16 __attribute__((__vector_size__(16)));
@@ -14,18 +17,15 @@ typedef uint64_t u64x2 __attribute__((__vector_size__(16)));
 typedef float f32x4 __attribute__((__vector_size__(16)));
 typedef double f64x2 __attribute__((__vector_size__(16)));
 
-#define __DEFAULT_FN_VIS_ATTRS \
-  __attribute__((used)) __attribute__((visibility("default")))
-#define __DEFAULT_FN_ATTRS __attribute__((__always_inline__, __nodebug__))
+// Internal types
+typedef char __v128 __attribute__((__vector_size__(16)));
+typedef char  __i8x16 __attribute__((__vector_size__(16)));
+typedef short __i16x8 __attribute__((__vector_size__(16)));
+typedef int __i32x4 __attribute__((__vector_size__(16)));
+typedef long long __i64x2 __attribute__((__vector_size__(16)));
 
-// v128 wasm_v128_constant(...)
-static __inline__ v128 __DEFAULT_FN_ATTRS wasm_v128_const(int8_t c15,
-  int8_t c14, int8_t c13, int8_t c12, int8_t c11, int8_t c10,
-  int8_t c9,  int8_t c8,  int8_t c7,  int8_t c6,  int8_t c5,
-  int8_t c4,  int8_t c3,  int8_t c2,  int8_t c1,  int8_t c0) {
-  return __extension__(i8x16){c0, c1, c2,  c3,  c4,  c5,  c6,  c7,
-                              c8, c9, c10, c11, c12, c13, c14, c15};
-}
+
+#define __DEFAULT_FN_ATTRS __attribute__((__always_inline__, __nodebug__))
 
 // v128 wasm_v128_load(v128* mem)
 static __inline__ i8x16 __DEFAULT_FN_ATTRS wasm_v128_load(i8x16* mem) {
@@ -38,47 +38,40 @@ static __inline__ void __DEFAULT_FN_ATTRS wasm_v128_store(v128* mem, v128 a) {
   return;
 }
 
-// i8x16 wasm_i8x16_splat(int8_t a)
-static __inline__ i8x16 __DEFAULT_FN_ATTRS wasm_i8x16_splat(int8_t a) {
-  return __extension__(i8x16){a, a, a, a, a, a, a, a, a, a, a, a, a, a, a, a};
-}
-
-// i16x8 wasm_i16x8_splat(int16_t a)
-static __inline__ i16x8 __DEFAULT_FN_ATTRS wasm_i16x8_splat(int16_t a) {
-  return __extension__(i16x8){a, a, a, a, a, a, a, a};
-}
-
-// i32x4 wasm_i32x4_splat(int32_t a)
-static __inline__ i32x4 __DEFAULT_FN_ATTRS wasm_i32x4_splat(int32_t a) {
-  return __extension__(i32x4){a, a, a, a};
-}
-
-// i64x2 wasm_i64x2_splat(int64_t a)
-static __inline__ i64x2 __DEFAULT_FN_ATTRS wasm_i64x2_splat(int64_t a) {
-  return __extension__(i64x2){a, a};
-}
-
-// f32x4 wasm_f32x4_splat(float a)
-static __inline__ f32x4 __DEFAULT_FN_ATTRS wasm_f32x4_splat(float a) {
-  return __extension__(f32x4){a, a, a, a};
+// v128 wasm_v128_constant(...)
+static __inline__ v128 __DEFAULT_FN_ATTRS wasm_v128_const(int8_t c15,
+  int8_t c14, int8_t c13, int8_t c12, int8_t c11, int8_t c10,
+  int8_t c9,  int8_t c8,  int8_t c7,  int8_t c6,  int8_t c5,
+  int8_t c4,  int8_t c3,  int8_t c2,  int8_t c1,  int8_t c0) {
+  return (i8x16){c0, c1, c2,  c3,  c4,  c5,  c6,  c7,
+                 c8, c9, c10, c11, c12, c13, c14, c15};
 }
 
-// f64x2 wasm_f64x2_splat(double a)
-static __inline__ f64x2 __DEFAULT_FN_ATTRS wasm_f64x2_splat(double a) {
-  return __extension__(f64x2){a, a};
+// i8x16 wasm_i8x16_splat(int8_t a)
+static __inline__ i8x16 __DEFAULT_FN_ATTRS wasm_i8x16_splat(int8_t a) {
+  return (i8x16){a, a, a, a, a, a, a, a, a, a, a, a, a, a, a, a};
 }
 
 // int8_t wasm_i8x16_extract_lane(i8x16 a, imm)
-#define wasm_i8x16_extract_lane(a, b) \
-  (__builtin_wasm_extract_lane_s_i8x16(a, b))
+#define wasm_i8x16_extract_lane(a, b)  \
+  (__builtin_wasm_extract_lane_s_i8x16((__i8x16)(a), b))
 
 // int8_t wasm_u8x16_extract_lane(u8x16 a, imm)
 #define wasm_u8x16_extract_lane(a, b) \
-  (__builtin_wasm_extract_lane_u_i8x16(a, b))
+  (__builtin_wasm_extract_lane_u_i8x16((__i8x16))(a), b))
+
+// i8x16 wasm_i8x16_replace_lane(i8x16 a, imm i, int8_t b)
+#define wasm_i8x16_replace_lane(a, i, b) \
+  ((i8x16)__builtin_wasm_replace_lane_i8x16((__i8x16)(a), i, b))
+
+// i16x8 wasm_i16x8_splat(int16_t a)
+static __inline__ i16x8 __DEFAULT_FN_ATTRS wasm_i16x8_splat(int16_t a) {
+  return (i16x8){a, a, a, a, a, a, a, a};
+}
 
 // int16_t wasm_i16x8_extract_lane(i16x8 a, imm)
 #define wasm_i16x8_extract_lane(a, b) \
-  (__builtin_wasm_extract_lane_s_i16x8(a, b))
+  (__builtin_wasm_extract_lane_s_i16x8((__i16x8)(a), b))
 
 #ifdef __wasm_unimplemented_simd128__
 // int16_t wasm_u16x8_extract_lane(u16x8 a, imm)
@@ -86,560 +79,620 @@ static __inline__ f64x2 __DEFAULT_FN_ATTRS wasm_f64x2_splat(double a) {
   (__builtin_wasm_extract_lane_u_i16x8(a, b))
 #endif
 
-// int32_t wasm_i32x4_extract_lane(i32x4 a, imm)
-#define wasm_i32x4_extract_lane(a, b) (__builtin_wasm_extract_lane_i32x4(a, b))
-
-// int64_t wasm_i64x2_extract_lane(i8x16, imm)
-#define wasm_i64x2_extract_lane(a, b) (__builtin_wasm_extract_lane_i64x2(a, b))
-
-// float wasm_f32x4_extract_lane(f32x4, imm)
-#define wasm_f32x4_extract_lane(a, b) (__builtin_wasm_extract_lane_f32x4(a, b))
-
-// double __builtin_wasm_extract_lane_f64x2(f64x2, imm)
-#ifdef __wasm_undefined_simd128__
-#define wasm_f64x2_extract_lane(a, b) (__builtin_wasm_extract_lane_f64x2(a, b))
-#endif
-
-// i8x16 wasm_i8x16_replace_lane(i8x16 a, imm i, int8_t b)
-#define wasm_i8x16_replace_lane(a, i, b) \
-  (__builtin_wasm_replace_lane_i8x16(a, i, b))
-
 // i16x8 wasm_i16x8_replace_lane(i16x8 a, imm i, int16_t b)
 #define wasm_i16x8_replace_lane(a, i, b) \
-  (__builtin_wasm_replace_lane_i16x8(a, i, b))
+  ((i16x8)__builtin_wasm_replace_lane_i16x8((__i16x8)(a), i, b))
+
+// i32x4 wasm_i32x4_splat(int32_t a)
+static __inline__ i32x4 __DEFAULT_FN_ATTRS wasm_i32x4_splat(int32_t a) {
+  return (i32x4){a, a, a, a};
+}
+
+// int32_t wasm_i32x4_extract_lane(i32x4 a, imm)
+#define wasm_i32x4_extract_lane(a, b) (__builtin_wasm_extract_lane_i32x4(a, b))
 
 // i32x4 wasm_i32x4_replace_lane(i32x4 a, imm i, int32_t b)
 #define wasm_i32x4_replace_lane(a, i, b) \
-  (__builtin_wasm_replace_lane_i32x4(a, i, b))
+    ((i32x4)__builtin_wasm_replace_lane_i32x4((__i32x4)(a), i, b))
+
+// i64x2 wasm_i64x2_splat(int64_t a)
+static __inline__ i64x2 __DEFAULT_FN_ATTRS wasm_i64x2_splat(int64_t a) {
+  return (i64x2){a, a};
+}
+
+// int64_t wasm_i64x2_extract_lane(i8x16, imm)
+#define wasm_i64x2_extract_lane(a, b) (__builtin_wasm_extract_lane_i64x2(a, b))
 
 // i8x16 wasm_i64x2_replace_lane(i8x16 a, imm i, int64_t b)
 #define wasm_i64x2_replace_lane(a, i, b) \
-  (__builtin_wasm_replace_lane_i64x2(a, i, b))
+  ((i64x2)__builtin_wasm_replace_lane_i64x2((__i64x1)(a), i, b))
+
+// f32x4 wasm_f32x4_splat(float a)
+static __inline__ f32x4 __DEFAULT_FN_ATTRS wasm_f32x4_splat(float a) {
+  return (f32x4){a, a, a, a};
+}
+
+// float wasm_f32x4_extract_lane(f32x4, imm)
+#define wasm_f32x4_extract_lane(a, b) (__builtin_wasm_extract_lane_f32x4(a, b))
 
 // f32x4 wasm_f32x4_replace_lane(f32x4 a, imm i, float b)
 #define wasm_f32x4_replace_lane(a, i, b) \
   (__builtin_wasm_replace_lane_f32x4(a, i, b))
 
-#ifdef __wasm_unimplemented_simd128__
+#ifdef __wasm_undefined_simd128__
+
+// f64x2 wasm_f64x2_splat(double a)
+static __inline__ f64x2 __DEFAULT_FN_ATTRS wasm_f64x2_splat(double a) {
+  return (f64x2){a, a};
+}
+
+// double __builtin_wasm_extract_lane_f64x2(f64x2, imm)
+#define wasm_f64x2_extract_lane(a, b) (__builtin_wasm_extract_lane_f64x2(a, b))
+
 // f64x2 wasm_f64x4_replace_lane(f64x2 a, imm i, double b)
 #define wasm_f64x2_replace_lane(a, i, b) \
   (__builtin_wasm_replace_lane_f64x2(a, i, b))
-#endif
 
-// i8x16 wasm_i8x16_add(i8x16 a i8x16 b)
-static __inline__ i8x16 __DEFAULT_FN_ATTRS wasm_i8x16_add(i8x16 a, i8x16 b) {
-  return __extension__(i8x16){a + b};
-}
+#endif // __wasm_undefined_simd128__
 
-// i16x8 wasm_i16x8_add(i16x8 a i16x8 b)
-static __inline__ i16x8 __DEFAULT_FN_ATTRS wasm_i16x8_add(i16x8 a, i16x8 b) {
-  return __extension__(i16x8){a + b};
+// i8x16 wasm_i8x16_eq(i8x16 a, i8x16 b)
+static __inline__ u8x16 __DEFAULT_FN_ATTRS wasm_i8x16_eq(i8x16 a, i8x16 b) {
+  return (u8x16)(a == b);
 }
 
-// i32x4 wasm_i32x4_add(i32x4 a i32x4 b)
-static __inline__ i32x4 __DEFAULT_FN_ATTRS wasm_i32x4_add(i32x4 a, i32x4 b) {
-  return __extension__(i32x4){a + b};
+// i8x16 wasm_i8x16_ne(i8x16 a, i8x16 b)
+static __inline__ u8x16 __DEFAULT_FN_ATTRS wasm_i8x16_ne(i8x16 a, i8x16 b) {
+  return (u8x16)(a != b);
 }
 
-// i8x16 wasm_i64x2_add(i8x16 a i8x16 b)
-static __inline__ i64x2 __DEFAULT_FN_ATTRS wasm_i64x2_add(i64x2 a, i64x2 b) {
-  return __extension__(i64x2){a + b};
+// i8x16 wasm_i8x16_lt(i8x16 a, i8x16 b)
+static __inline__ u8x16 __DEFAULT_FN_ATTRS wasm_i8x16_lt(i8x16 a, i8x16 b) {
+  return (u8x16)(a < b);
 }
 
-// f32x4 wasm_f32x4_add(f32x4 a f32x4 b)
-static __inline__ f32x4 __DEFAULT_FN_ATTRS wasm_f32x4_add(f32x4 a, f32x4 b) {
-  return __extension__(f32x4){a + b};
+// i8x16 wasm_u8x16_lt(u8x16 a, u8x16 b)
+static __inline__ u8x16 __DEFAULT_FN_ATTRS wasm_u8x16_lt(u8x16 a, u8x16 b) {
+  return (u8x16)(a < b);
 }
 
-// i8x16 wasm_i8x16_sub(i8x16 a, i8x16 b)
-static __inline__ i8x16 __DEFAULT_FN_ATTRS wasm_i8x16_sub(i8x16 a, i8x16 b) {
-  return __extension__(i8x16){a - b};
+// i8x16 wasm_i8x16_gt(i8x16 a, i8x16 b)
+static __inline__ u8x16 __DEFAULT_FN_ATTRS wasm_i8x16_gt(i8x16 a, i8x16 b) {
+  return (u8x16)(a > b);
 }
 
-// i16x8 wasm_i16x8_sub(i16x8 a i16x8 b)
-static __inline__ i16x8 __DEFAULT_FN_ATTRS wasm_i16x8_sub(i16x8 a, i16x8 b) {
-  return __extension__(i16x8){a - b};
+// i8x16 wasm_u8x16_gt(u8x16 a, u8x16 b)
+static __inline__ u8x16 __DEFAULT_FN_ATTRS wasm_u8x16_gt(u8x16 a, u8x16 b) {
+  return (u8x16)(a > b);
 }
 
-// i32x4 wasm_i32x4_sub(i32x4 a i32x4 b)
-static __inline__ i32x4 __DEFAULT_FN_ATTRS wasm_i32x4_sub(i32x4 a, i32x4 b) {
-  return __extension__(i32x4){a - b};
+// i8x16 wasm_i8x16_le(i8x16 a, i8x16 b)
+static __inline__ u8x16 __DEFAULT_FN_ATTRS wasm_i8x16_le(i8x16 a, i8x16 b) {
+  return (u8x16)(a <= b);
 }
 
-// i64x2 wasm_i64x2_sub(i64x2 a i64x2 b)
-static __inline__ i64x2 __DEFAULT_FN_ATTRS wasm_i64x2_sub(i64x2 a, i64x2 b) {
-  return __extension__(i64x2){a - b};
+// i8x16 wasm_i8x16_le(u8x16 a, u8x16 b)
+static __inline__ u8x16 __DEFAULT_FN_ATTRS wasm_u8x16_le(u8x16 a, u8x16 b) {
+  return (u8x16)(a <= b);
 }
 
-// f32x4 wasm_f32x4_sub(f32x4 a f32x4 b)
-static __inline__ f32x4 __DEFAULT_FN_ATTRS wasm_f32x4_sub(f32x4 a, f32x4 b) {
-  return __extension__(f32x4){a - b};
+// i8x16 wasm_i8x16_ge(i8x16 a, i8x16 b)
+static __inline__ u8x16 __DEFAULT_FN_ATTRS wasm_i8x16_ge(i8x16 a, i8x16 b) {
+  return (u8x16)(a >= b);
 }
 
-// i8x16 wasm_i8x16_mul(i8x16 a i8x16 b)
-static __inline__ i8x16 __DEFAULT_FN_ATTRS wasm_i8x16_mul(i8x16 a, i8x16 b) {
-  return __extension__(i8x16){a * b};
+// i8x16 wasm_u8x16_ge(u8x16 a, u8x16 b)
+static __inline__ u8x16 __DEFAULT_FN_ATTRS wasm_u8x16_ge(u8x16 a, u8x16 b) {
+  return (u8x16)(a >= b);
 }
 
-// i16x8 wasm_i16x8_mul(i16x8 a i16x8 b)
-static __inline__ i16x8 __DEFAULT_FN_ATTRS wasm_i16x8_mul(i16x8 a, i16x8 b) {
-  return __extension__(i16x8){a * b};
+// i16x8 wasm_i16x8_eq(i16x8 a, i16x8 b)
+static __inline__ u16x8 __DEFAULT_FN_ATTRS wasm_i16x8_eq(i16x8 a, i16x8 b) {
+  return (u16x8)(a == b);
 }
 
-// i32x4 wasm_i32x4_mul(i32x4 a i32x4 b)
-static __inline__ i32x4 __DEFAULT_FN_ATTRS wasm_i32x4_mul(i32x4 a, i32x4 b) {
-  return __extension__(i32x4){a * b};
+// i16x8 wasm_i16x8_ne(i16x8 a, i32x4 b)
+static __inline__ u16x8 __DEFAULT_FN_ATTRS wasm_i16x8_ne(i16x8 a, i16x8 b) {
+  return (u16x8)(a != b);
 }
-
-// i64x2 wasm_i64x2_mul(i64x2 a i64x2 b)
-static __inline__ i64x2 __DEFAULT_FN_ATTRS wasm_i64x2_mul(i64x2 a, i64x2 b) {
-  return __extension__(i64x2){a * b};
+// i16x8 wasm_i16x8_lt(i16x8 a, i16x8 b)
+static __inline__ u16x8 __DEFAULT_FN_ATTRS wasm_i16x8_lt(i16x8 a, i16x8 b) {
+  return (u16x8)(a < b);
 }
 
-// f32x4 wasm_f32x4_mul(f32x4 a f32x4 b)
-static __inline__ f32x4 __DEFAULT_FN_ATTRS wasm_f32x4_mul(f32x4 a, f32x4 b) {
-  return __extension__(f32x4){a * b};
+// i16x8 wasm_u16x8_lt(u8x16 a, u8x16 b)
+static __inline__ u16x8 __DEFAULT_FN_ATTRS wasm_u16x8_lt(u16x8 a, u16x8 b) {
+  return (u16x8)(a < b);
 }
 
-// i8x16 wasm_i8x16_neg(i8x16 a)
-static __inline__ i8x16 __DEFAULT_FN_ATTRS wasm_i8x16_neg(i8x16 a) {
-  return __extension__(i8x16){-a};
+// i16x8 wasm_i16x8_gt(i16x8 a, i16x8 b)
+static __inline__ u16x8 __DEFAULT_FN_ATTRS wasm_i16x8_gt(i16x8 a, i16x8 b) {
+  return (u16x8)(a > b);
 }
 
-// i16x8 wasm_i16x8_neg(i16x8 a)
-static __inline__ i16x8 __DEFAULT_FN_ATTRS wasm_i16x8_neg(i16x8 a) {
-  return __extension__(i16x8){-a};
+// i16x8 wasm_u16x8_gt(u8x16 a, u8x16 b)
+static __inline__ u16x8 __DEFAULT_FN_ATTRS wasm_u16x8_gt(u16x8 a, u16x8 b) {
+  return (u16x8)(a > b);
 }
 
-// i32x4 wasm_i32x4_neg(i32x4 a)
-static __inline__ i32x4 __DEFAULT_FN_ATTRS wasm_i32x4_neg(i32x4 a) {
-  return __extension__(i32x4){-a};
+// i16x8 wasm_i16x8_le(i16x8 a, i16x8 b)
+static __inline__ u16x8 __DEFAULT_FN_ATTRS wasm_i16x8_le(i16x8 a, i16x8 b) {
+  return (u16x8)(a <= b);
 }
 
-// i64x2 wasm_i64x2_neg(i64x2 a)
-static __inline__ i64x2 __DEFAULT_FN_ATTRS wasm_i64x2_neg(i64x2 a) {
-  return __extension__(i64x2){-a};
+// i16x8 wasm_i16x8_le(u8x16 a, u8x16 b)
+static __inline__ u16x8 __DEFAULT_FN_ATTRS wasm_u16x8_le(u16x8 a, u16x8 b) {
+  return (u16x8)(a <= b);
 }
 
-// f32x4 wasm_f32x4_neg(f32x4 a)
-static __inline__ f32x4 __DEFAULT_FN_ATTRS wasm_f32x4_neg(f32x4 a) {
-  return __extension__(f32x4){-a};
+// i16x8 wasm_i16x8_ge(i16x8 a, i16x8 b)
+static __inline__ u16x8 __DEFAULT_FN_ATTRS wasm_i16x8_ge(i16x8 a, i16x8 b) {
+  return (u16x8)(a >= b);
 }
 
-// f64x2 wasm_f64x2_neg(f64x2 a)
-static __inline__ f64x2 __DEFAULT_FN_ATTRS wasm_f64x2_neg(f64x2 a) {
-  return __extension__(f64x2){-a};
+// i16x8 wasm_i16x8_ge(u16x8 a, u16x8 b)
+static __inline__ u16x8 __DEFAULT_FN_ATTRS wasm_u16x8_ge(u16x8 a, u16x8 b) {
+  return (u16x8)(a >= b);
 }
 
-// i8x16 wasm_add_saturate(i8x16 a, i8x16 b)
-#define wasm_i8x16_add_saturate(a, b) \
-  (__builtin_wasm_add_saturate_s_i8x16(a, b))
+// i32x4 wasm_i32x4_eq(i32x4 a, i32x4 b)
+static __inline__ u32x4 __DEFAULT_FN_ATTRS wasm_i32x4_eq(i32x4 a, i32x4 b) {
+  return (u32x4)(a == b);
+}
 
-// u8x16 wasm_add_saturate(u8x16 a, u8x16 b)
-#define wasm_u8x16_add_saturate(a, b) \
-  (__builtin_wasm_add_saturate_u_i8x16(a, b))
+// i32x4 wasm_i32x4_ne(i32x4 a, i32x4 b)
+static __inline__ u32x4 __DEFAULT_FN_ATTRS wasm_i32x4_ne(i32x4 a, i32x4 b) {
+  return (u32x4)(a != b);
+}
 
-// i16x8 wasm_add_saturate(i16x8 a, i16x8 b)
-#define wasm_i16x8_add_saturate(a, b) \
-  (__builtin_wasm_add_saturate_s_i16x8(a, b))
+// i32x4 wasm_i32x4_lt(i32x4 a, i32x4 b)
+static __inline__ u32x4 __DEFAULT_FN_ATTRS wasm_i32x4_lt(i32x4 a, i32x4 b) {
+  return (u32x4)(a < b);
+}
 
-// u16x8 wasm_add_saturate(u16x8 a, u16x8 b)
-#define wasm_u16x8_add_saturate(a, b) \
-  (__builtin_wasm_add_saturate_u_i16x8(a, b))
+// u32x4 wasm_u32x4_lt(u32x4 a, u32x4 b)
+static __inline__ u32x4 __DEFAULT_FN_ATTRS wasm_u32x4_lt(u32x4 a, u32x4 b) {
+  return (u32x4)(a < b);
+}
 
-// i8x16 wasm_sub_saturate(i8x16 a, i8x16 b)
-#define wasm_i8x16_sub_saturate(a, b) \
-  (__builtin_wasm_sub_saturate_s_i8x16(a, b))
+// i32x4 wasm_i32x4_gt(i32x4 a, i32x4 b)
+static __inline__ u32x4 __DEFAULT_FN_ATTRS wasm_i32x4_gt(i32x4 a, i32x4 b) {
+  return (u32x4)(a > b);
+}
 
-// u8x16 wasm_sub_saturate(u8x16 a, u8x16 b)
-#define wasm_u8x16_sub_saturate(a, b) \
-  (__builtin_wasm_sub_saturate_u_i8x16(a, b))
+// i32x4 wasm_i32x4_gt(u32x4 a, u32x4 b)
+static __inline__ u32x4 __DEFAULT_FN_ATTRS wasm_u32x4_gt(u32x4 a, u32x4 b) {
+  return (u32x4)(a > b);
+}
 
-// i16x8 wasm_sub_saturate(i16x8 a, i16x8 b)
-#define wasm_i16x8_sub_saturate(a, b) \
-  (__builtin_wasm_sub_saturate_s_i16x8(a, b))
+// i32x4 wasm_i32x4_le(i32x4 a, i32x4 b)
+static __inline__ u32x4 __DEFAULT_FN_ATTRS wasm_i32x4_le(i32x4 a, i32x4 b) {
+  return (u32x4)(a <= b);
+}
 
-// u16x8 wasm_sub_saturate(u16x8 a, u16x8 b)
-#define wasm_u16x8_sub_saturate(a, b) \
-  (__builtin_wasm_sub_saturate_u_i16x8(a, b))
+// i32x4 wasm_u32x4_le(u32x4 a, u32x4 b)
+static __inline__ u32x4 __DEFAULT_FN_ATTRS wasm_u32x4_le(u32x4 a, u32x4 b) {
+  return (u32x4)(a <= b);
+}
 
-// i8x16 wasm_i8x16_shl(i8x16 a, int32_t b)
-static __inline__ i8x16 __DEFAULT_FN_ATTRS wasm_i8x16_shl(i8x16 a, int32_t b) {
-  return __extension__(i8x16){a << b};
+// i32x4 wasm_i32x4_ge(i32x4 a, i32x4 b)
+static __inline__ u32x4 __DEFAULT_FN_ATTRS wasm_i32x4_ge(i32x4 a, i32x4 b) {
+  return (u32x4)(a >= b);
 }
 
-// i16x8 wasm_i16x8_shl(i16x8 a, int32_t b)
-static __inline__ i16x8 __DEFAULT_FN_ATTRS wasm_i16x8_shl(i16x8 a, int32_t b) {
-  return __extension__(i16x8){a << b};
+// i32x4 wasm_u32x4_ge(u32x4 a, u32x4 b)
+static __inline__ u32x4 __DEFAULT_FN_ATTRS wasm_u32x4_ge(u32x4 a, u32x4 b) {
+  return (u32x4)(a >= b);
 }
 
-// i32x4 wasm_i32x4_shl(i32x4 a, int32_t b)
-static __inline__ i32x4 __DEFAULT_FN_ATTRS wasm_i32x4_shl(i32x4 a, int32_t b) {
-  return __extension__(i32x4){a << b};
+// i32x4 wasm_f32x4_eq(f32x4 a f32x4 b)
+static __inline__ u32x4 __DEFAULT_FN_ATTRS wasm_f32x4_eq(f32x4 a, f32x4 b) {
+  return (u32x4)(a == b);
 }
 
-// i64x2 wasm_i64x2_shl(i64x2 a, int32_t b)
-static __inline__ i64x2 __DEFAULT_FN_ATTRS wasm_i64x2_shl(i64x2 a, int32_t b) {
-  return __extension__(i64x2){a << b};
+// i32x4 wasm_f32x4_ne(f32x4 a, f32x4 b)
+static __inline__ u32x4 __DEFAULT_FN_ATTRS wasm_f32x4_ne(f32x4 a, f32x4 b) {
+  return (u32x4)(a != b);
 }
 
-// i8x16 wasm_i8x64_shr(i8x16 a, int32_t b)
-static __inline__ i8x16 __DEFAULT_FN_ATTRS wasm_i8x16_shr(i8x16 a, int32_t b) {
-  return __extension__(i8x16){a >> b};
+// i32x4 wasm_f32x4_lt(f32x4 a, f32x4 b)
+static __inline__ u32x4 __DEFAULT_FN_ATTRS wasm_f32x4_lt(f32x4 a, f32x4 b) {
+  return (u32x4)(a < b);
 }
 
-// u8x16 wasm_u8x16_shr(u8x16 a int32_t b)
-static __inline__ u8x16 __DEFAULT_FN_ATTRS wasm_u8x16_shr(u8x16 a, int32_t b) {
-  return __extension__(u8x16){a >> b};
+// f32x4 wasm_f32x4_gt(f32x4 a, f32x4 b)
+static __inline__ u32x4 __DEFAULT_FN_ATTRS wasm_f32x4_gt(f32x4 a, f32x4 b) {
+  return (u32x4)(a > b);
 }
 
-// i16x8 wasm_i16x8_shr(i16x8 a, int32_t b)
-static __inline__ i16x8 __DEFAULT_FN_ATTRS wasm_i16x8_shr(i16x8 a, int32_t b) {
-  return __extension__(i16x8){a >> b};
+// i32x4 wasm_f32x4_le(f32x4 a, f32x4 b)
+static __inline__ u32x4 __DEFAULT_FN_ATTRS wasm_f32x4_le(f32x4 a, f32x4 b) {
+  return (u32x4)(a <= b);
 }
 
-// u16x8 wasm_u16x8_shr(u16x8 a, int32_t b)
-static __inline__ u16x8 __DEFAULT_FN_ATTRS wasm_u16x8_shr(u16x8 a, int32_t b) {
-  return __extension__(u16x8){a >> b};
+// i32x4 wasm_f32x4_ge(f32x4 a, f32x4 b)
+static __inline__ u32x4 __DEFAULT_FN_ATTRS wasm_f32x4_ge(f32x4 a, f32x4 b) {
+  return (u32x4)(a >= b);
 }
 
-// i32x4 wasm_i32x4_shr(i32x4 a, int32_t b)
-static __inline__ i32x4 __DEFAULT_FN_ATTRS wasm_i32x4_shr(i32x4 a, int32_t b) {
-  return __extension__(i32x4){a >> b};
+#ifdef __wasm_undefined_simd128__
+
+// i64x2 wasm_f64x2_eq(f64x2 a, f64x2 b)
+static __inline__ u64x2 __DEFAULT_FN_ATTRS wasm_f64x2_eq(f64x2 a, f64x2 b) {
+  return (u64x2)(a == b);
 }
 
-// u32x4 wasm_u32x4_shr(u32x4 a, int32_t b)
-static __inline__ u32x4 __DEFAULT_FN_ATTRS wasm_u32x4_shr(u32x4 a, int32_t b) {
-  return __extension__(u32x4){a >> b};
+// i64x2 wasm_f64x2_ne(f64x2 a, f64x2 b)
+static __inline__ u64x2 __DEFAULT_FN_ATTRS wasm_f64x2_ne(f64x2 a, f64x2 b) {
+  return (u64x2)(a != b);
 }
 
-// i64x2 wasm_i64x2_shr(i64x2 a, int32_t b)
-static __inline__ i64x2 __DEFAULT_FN_ATTRS wasm_i64x2_shr(i8x16 a, int32_t b) {
-  return __extension__(i64x2){a >> b};
+// i64x2 wasm_f64x2_lt(f64x2 a, f64x2 b)
+static __inline__ u64x2 __DEFAULT_FN_ATTRS wasm_f64x2_lt(f64x2 a, f64x2 b) {
+  return (u64x2)(a < b);
 }
 
-// u64x2 wasm_u64x2_shr_u(u64x2 a, int32_t b)
-static __inline__ u64x2 __DEFAULT_FN_ATTRS wasm_u64x2_shr(u64x2 a, int32_t b) {
-  return __extension__(u64x2){a >> b};
+// i64x2 wasm_f64x2_gt(f64x2 a, f64x2 b)
+static __inline__ u64x2 __DEFAULT_FN_ATTRS wasm_f64x2_gt(f64x2 a, f64x2 b) {
+  return (u64x2)(a > b);
 }
 
-// i8x16 wasm_i8x16_and(i8x16 a, i8x16 b)
-static __inline__ i8x16 __DEFAULT_FN_ATTRS wasm_i8x16_and(i8x16 a, i8x16 b) {
-  return __extension__(i8x16){a & b};
+// i64x2 wasm_f64x2_le(f64x2 a, f64x2 b)
+static __inline__ u64x2 __DEFAULT_FN_ATTRS wasm_f64x2_le(f64x2 a, f64x2 b) {
+  return (u64x2)(a <= b);
 }
 
-// i8x16 wasm_i8x16_or(i8x16 a, i8x16 b)
-static __inline__ i8x16 __DEFAULT_FN_ATTRS wasm_i8x16_or(i8x16 a, i8x16 b) {
-  return __extension__(i8x16){a | b};
+// i64x2 wasm_f64x2_ge(f64x2 a, f64x2 b)
+static __inline__ u64x2 __DEFAULT_FN_ATTRS wasm_f64x2_ge(f64x2 a, f64x2 b) {
+  return (u64x2)(a >= b);
 }
 
-// i8x16 wasm_i8x16_xor(i8x16 a, i8x16 b)
-static __inline__ i8x16 __DEFAULT_FN_ATTRS wasm_i8x16_xor(i8x16 a, i8x16 b) {
-  return __extension__(i8x16){a ^ b};
+#endif // __wasm_undefined_simd128__
+
+// v128 wasm_v128_not(v128 a)
+static __inline__ v128 __DEFAULT_FN_ATTRS wasm_v128_not(v128 a) {
+  return ~a;
 }
 
-// i8x16 wasm_i8x16_not(i8x16 a)
-static __inline__ i8x16 __DEFAULT_FN_ATTRS wasm_i8x16_not(i8x16 a) {
-  return __extension__(i8x16){~a};
+// v128 wasm_v128_and(v128 a, v128 b)
+static __inline__ v128 __DEFAULT_FN_ATTRS wasm_v128_and(v128 a, v128 b) {
+  return a & b;
 }
 
-// i8x16 wasm_i8x16_bitselect(i8x16 a, i8x16 b, i8x16 c)
-#define wasm_i8x16_bitselect(a, b, c) (__builtin_wasm_bitselect(a, b, c))
+// v128 wasm_v128_or(v128 a, v128 b)
+static __inline__ v128 __DEFAULT_FN_ATTRS wasm_v128_or(v128 a, v128 b) {
+  return a | b;
+}
 
-// bool wasm_i8x16_any_true(i8x16 a)
-#define wasm_i8x16_any_true(a) (__builtin_wasm_any_true_i8x16(a))
+// v128 wasm_v128_xor(v128 a, v128 b)
+static __inline__ v128 __DEFAULT_FN_ATTRS wasm_v128_xor(v128 a, v128 b) {
+  return a ^ b;
+}
 
-// bool wasm_i16x8_any_true(i16x8 a)
-#define wasm_i16x8_any_true(a) (__builtin_wasm_any_true_i16x8(a))
+// v128 wasm_v128_bitselect(v128 a, v128 b, v128 c)
+static __inline__ v128 __DEFAULT_FN_ATTRS wasm_v128_bitselect(v128 a, v128 b, v128 c) {
+  return (v128)__builtin_wasm_bitselect((__i32x4)a, (__i32x4)b, (__i32x4)c);
+}
 
-// bool wasm_i32x4_any_true(i32x4 a)
-#define wasm_i32x4_any_true(a) (__builtin_wasm_any_true_i32x4(a))
+// i8x16 wasm_i8x16_neg(i8x16 a)
+static __inline__ i8x16 __DEFAULT_FN_ATTRS wasm_i8x16_neg(i8x16 a) {
+  return -a;
+}
 
-#ifdef __wasm_undefined_simd128__
-// bool wasm_i64x2_any_true(i64x2 a)
-#define wasm_i64x2_any_true(a) (__builtin_wasm_any_true_i64x2(a))
-#endif
+// bool wasm_i8x16_any_true(i8x16 a)
+static __inline__ bool __DEFAULT_FN_ATTRS wasm_i8x16_any_true(i8x16 a) {
+  return __builtin_wasm_any_true_i8x16((__i8x16)a);
+}
 
 // bool wasm_i8x16_all_true(i8x16 a)
-#define wasm_i8x16_all_true(a) (__builtin_wasm_all_true_i8x16(a))
+static __inline__ bool __DEFAULT_FN_ATTRS wasm_i8x16_all_true(i8x16 a) {
+  return __builtin_wasm_all_true_i8x16((__i8x16)a);
+}
 
-// bool wasm_i16x8_all_true(i16x8 a)
-#define wasm_i16x8_all_true(a) (__builtin_wasm_all_true_i16x8(a))
+// i8x16 wasm_i8x16_shl(i8x16 a, int32_t b)
+static __inline__ i8x16 __DEFAULT_FN_ATTRS wasm_i8x16_shl(i8x16 a, int32_t b) {
+  return a << b;
+}
 
-// bool wasm_i32x4_all_true(i32x4 a)
-#define wasm_i32x4_all_true(a) (__builtin_wasm_all_true_i32x4(a))
+// i8x16 wasm_i8x64_shr(i8x16 a, int32_t b)
+static __inline__ i8x16 __DEFAULT_FN_ATTRS wasm_i8x16_shr(i8x16 a, int32_t b) {
+  return a >> b;
+}
 
-// bool wasm_i64x2_all_true(i64x2 a)
-#define wasm_i64x2_all_true(a) (__builtin_wasm_all_true_i64x2(a))
+// u8x16 wasm_u8x16_shr(u8x16 a int32_t b)
+static __inline__ u8x16 __DEFAULT_FN_ATTRS wasm_u8x16_shr(u8x16 a, int32_t b) {
+  return a >> b;
+}
 
-// i8x16 wasm_i8x16_eq(i8x16 a, i8x16 b)
-static __inline__ i8x16 __DEFAULT_FN_ATTRS wasm_i8x16_eq(i8x16 a, i8x16 b) {
-  return __extension__(i8x16){a == b};
+// i8x16 wasm_i8x16_add(i8x16 a i8x16 b)
+static __inline__ i8x16 __DEFAULT_FN_ATTRS wasm_i8x16_add(i8x16 a, i8x16 b) {
+  return a + b;
 }
 
-// i16x8 wasm_i16x8_eq(i16x8 a, i16x8 b)
-static __inline__ i16x8 __DEFAULT_FN_ATTRS wasm_i16x8_eq(i16x8 a, i16x8 b) {
-  return __extension__(i16x8){a == b};
+// i8x16 wasm_add_saturate(i8x16 a, i8x16 b)
+static __inline__ i8x16 __DEFAULT_FN_ATTRS wasm_i8x16_add_saturate(i8x16 a, i8x16 b) {
+  return (i8x16) __builtin_wasm_add_saturate_s_i8x16((__i8x16)a, (__i8x16)b);
 }
 
-// i32x4 wasm_i32x4_eq(i32x4 a, i32x4 b)
-static __inline__ i32x4 __DEFAULT_FN_ATTRS wasm_i32x4_eq(i32x4 a, i32x4 b) {
-  return __extension__(i32x4){a == b};
+// u8x16 wasm_add_saturate(u8x16 a, u8x16 b)
+static __inline__ u8x16 __DEFAULT_FN_ATTRS wasm_u8x16_add_saturate(u8x16 a, u8x16 b) {
+  return (u8x16)__builtin_wasm_add_saturate_u_i8x16((__i8x16)a, (__i8x16)b);
 }
 
-#ifdef __wasm_undefined_simd128__
-// i32x4 wasm_f32x4_eq(f32x4 a f32x4 b)
-static __inline__ i32x4 __DEFAULT_FN_ATTRS wasm_f32x4_eq(f32x4 a, f32x4 b) {
-  return __extension__(f32x4){a == b};
+// i8x16 wasm_i8x16_sub(i8x16 a, i8x16 b)
+static __inline__ i8x16 __DEFAULT_FN_ATTRS wasm_i8x16_sub(i8x16 a, i8x16 b) {
+  return a - b;
 }
-#endif
 
-#ifdef __wasm_undefined_simd128__
-// i64x2 wasm_f64x2_eq(f64x2 a, f64x2 b)
-static __inline__ i64x2 __DEFAULT_FN_ATTRS wasm_f64x2_eq(f64x2 a, f64x2 b) {
-  return __extension__(f64x2){a == b};
+// i8x16 wasm_sub_saturate(i8x16 a, i8x16 b)
+static __inline__ i8x16 __DEFAULT_FN_ATTRS wasm_i8x16_sub_saturate(i8x16 a, i8x16 b) {
+  return (i8x16)__builtin_wasm_sub_saturate_s_i8x16((__i8x16)a, (__i8x16)b);
 }
-#endif
 
-// i8x16 wasm_i8x16_ne(i8x16 a, i8x16 b)
-static __inline__ i8x16 __DEFAULT_FN_ATTRS wasm_i8x16_ne(i8x16 a, i8x16 b) {
-  return __extension__(i8x16){a != b};
+// u8x16 wasm_sub_saturate(u8x16 a, u8x16 b)
+static __inline__ u8x16 __DEFAULT_FN_ATTRS wasm_u8x16_sub_saturate(u8x16 a, u8x16 b) {
+  return (u8x16)__builtin_wasm_sub_saturate_u_i8x16((__i8x16)a, (__i8x16)b);
 }
 
-// i16x8 wasm_i16x8_ne(i16x8 a, i32x4 b)
-static __inline__ i16x8 __DEFAULT_FN_ATTRS wasm_i16x8_ne(i16x8 a, i16x8 b) {
-  return __extension__(i16x8){a != b};
+// i8x16 wasm_i8x16_mul(i8x16 a i8x16 b)
+static __inline__ i8x16 __DEFAULT_FN_ATTRS wasm_i8x16_mul(i8x16 a, i8x16 b) {
+  return a * b;
 }
 
-// i32x4 wasm_i32x4_ne(i32x4 a, i32x4 b)
-static __inline__ i32x4 __DEFAULT_FN_ATTRS wasm_i32x4_ne(i32x4 a, i32x4 b) {
-  return __extension__(i32x4){a != b};
+// i16x8 wasm_i16x8_neg(i16x8 a)
+static __inline__ i16x8 __DEFAULT_FN_ATTRS wasm_i16x8_neg(i16x8 a) {
+  return -a;
 }
 
-// i32x4 wasm_f32x4_ne(f32x4 a, f32x4 b)
-static __inline__ i32x4 __DEFAULT_FN_ATTRS wasm_f32x4_ne(f32x4 a, f32x4 b) {
-  return __extension__(i32x4){a != b};
+// bool wasm_i16x8_any_true(i16x8 a)
+static __inline__ bool __DEFAULT_FN_ATTRS wasm_i16x8_any_true(i16x8 a) {
+  return __builtin_wasm_any_true_i16x8((__i16x8)a);
 }
 
-// i64x2 wasm_f64x2_ne(f64x2 a, f64x2 b)
-static __inline__ i64x2 __DEFAULT_FN_ATTRS wasm_f64x2_ne(f64x2 a, f64x2 b) {
-  return __extension__(i64x2){a != b};
+// bool wasm_i16x8_all_true(i16x8 a)
+static __inline__ bool __DEFAULT_FN_ATTRS wasm_i16x8_all_true(i16x8 a) {
+  return __builtin_wasm_all_true_i16x8((__i16x8)a);
 }
 
-// i8x16 wasm_i8x16_lt(i8x16 a, i8x16 b)
-static __inline__ i8x16 wasm_i8x16_lt(i8x16 a, i8x16 b) {
-  return __extension__(i8x16){a < b};
+// i16x8 wasm_i16x8_shl(i16x8 a, int32_t b)
+static __inline__ i16x8 __DEFAULT_FN_ATTRS wasm_i16x8_shl(i16x8 a, int32_t b) {
+  return a << b;
 }
 
-// i8x16 wasm_u8x16_lt(u8x16 a, u8x16 b)
-static __inline__ i8x16 wasm_u8x16_lt(u8x16 a, u8x16 b) {
-  return __extension__(u8x16){a < b};
+// i16x8 wasm_i16x8_shr(i16x8 a, int32_t b)
+static __inline__ i16x8 __DEFAULT_FN_ATTRS wasm_i16x8_shr(i16x8 a, int32_t b) {
+  return a >> b;
 }
 
-// i16x8 wasm_i16x8_lt(i16x8 a, i16x8 b)
-static __inline__ i16x8 wasm_i16x8_lt(i16x8 a, i16x8 b) {
-  return __extension__(i16x8){a < b};
+// u16x8 wasm_u16x8_shr(u16x8 a, int32_t b)
+static __inline__ u16x8 __DEFAULT_FN_ATTRS wasm_u16x8_shr(u16x8 a, int32_t b) {
+  return a >> b;
 }
 
-// i16x8 wasm_u16x8_lt(u8x16 a, u8x16 b)
-static __inline__ i16x8 wasm_u16x8_lt(u16x8 a, u16x8 b) {
-  return __extension__(u16x8){a < b};
+// i16x8 wasm_i16x8_add(i16x8 a i16x8 b)
+static __inline__ i16x8 __DEFAULT_FN_ATTRS wasm_i16x8_add(i16x8 a, i16x8 b) {
+  return a + b;
 }
 
-// i32x4 wasm_i32x4_lt(i32x4 a, i32x4 b)
-static __inline__ i32x4 wasm_i32x4_lt(i32x4 a, i32x4 b) {
-  return __extension__(i32x4){a < b};
+// i16x8 wasm_add_saturate(i16x8 a, i16x8 b)
+static __inline__ i16x8 __DEFAULT_FN_ATTRS wasm_i16x8_add_saturate(i16x8 a, i16x8 b) {
+  return (i16x8) __builtin_wasm_add_saturate_s_i16x8((__i16x8)a, (__i16x8)b);
 }
 
-// i32x4 wasm_u32x4_lt(u32x4 a, u32x4 b)
-static __inline__ i32x4 wasm_i32x4_lt(u32x4 a, u32x4 b) {
-  return __extension__(u32x4){a < b};
+// u16x8 wasm_add_saturate(u16x8 a, u16x8 b)
+static __inline__ u16x8 __DEFAULT_FN_ATTRS wasm_u16x8_add_saturate(u16x8 a, u16x8 b) {
+  return (u16x8)__builtin_wasm_add_saturate_u_i16x8((__i16x8)a, (__i16x8)b);
 }
 
-// i32x4 wasm_f32x4_lt(f32x4 a, f32x4 b)
-static __inline__ i32x4 wasm_f32x4_lt(f32x4 a, f32x4 b) {
-  return __extension__(f32x4){a < b};
+// i16x8 wasm_i16x8_sub(i16x8 a i16x8 b)
+static __inline__ i16x8 __DEFAULT_FN_ATTRS wasm_i16x8_sub(i16x8 a, i16x8 b) {
+  return a - b;
 }
 
-// i64x2 wasm_f64x2_lt(f64x2 a, f64x2 b)
-static __inline__ i64x2 wasm_f64x2_lt(f64x2 a, f64x2 b) {
-  return __extension__(i64x2){a < b};
+// i16x8 wasm_sub_saturate(i16x8 a, i16x8 b)
+static __inline__ i16x8 __DEFAULT_FN_ATTRS wasm_i16x8_sub_saturate(i16x8 a, i16x8 b) {
+  return (i16x8)__builtin_wasm_sub_saturate_s_i16x8((__i16x8)a, (__i16x8)b);
 }
 
-// i8x16 wasm_i8x16_le(i8x16 a, i8x16 b)
-static __inline__ i8x16 wasm_i8x16_le(i8x16 a, i8x16 b) {
-  return __extension__(i8x16){a <= b};
+// u16x8 wasm_sub_saturate(u16x8 a, u16x8 b)
+static __inline__ u16x8 __DEFAULT_FN_ATTRS wasm_u16x8_sub_saturate(u16x8 a, u16x8 b) {
+  return (u16x8)__builtin_wasm_sub_saturate_u_i16x8((__i16x8)a, (__i16x8)b);
 }
 
-// i8x16 wasm_i8x16_le(u8x16 a, u8x16 b)
-static __inline__ i8x16 wasm_u8x16_le(u8x16 a, u8x16 b) {
-  return __extension__(u8x16){a <= b};
+// i16x8 wasm_i16x8_mul(i16x8 a i16x8 b)
+static __inline__ i16x8 __DEFAULT_FN_ATTRS wasm_i16x8_mul(i16x8 a, i16x8 b) {
+  return a * b;
 }
 
-// i16x8 wasm_i16x8_le(i16x8 a, i16x8 b)
-static __inline__ i16x8 wasm_i16x8_le(i16x8 a, i16x8 b) {
-  return __extension__(i16x8){a <= b};
+// i32x4 wasm_i32x4_neg(i32x4 a)
+static __inline__ i32x4 __DEFAULT_FN_ATTRS wasm_i32x4_neg(i32x4 a) {
+  return -a;
 }
 
-// i16x8 wasm_i16x8_le(u8x16 a, u8x16 b)
-static __inline__ i8x16 wasm_u16x8_le(u8x16 a, u8x16 b) {
-  return __extension__(u8x16){a <= b};
+// bool wasm_i32x4_any_true(i32x4 a)
+static __inline__ bool __DEFAULT_FN_ATTRS wasm_i32x4_any_true(i32x4 a) {
+  return __builtin_wasm_any_true_i32x4((__i32x4)a);
 }
 
-// i32x4 wasm_i32x4_le(i32x4 a, i32x4 b)
-static __inline__ i32x4 wasm_i32x4_le(i32x4 a, i32x4 b) {
-  return __extension__(i32x4){a <= b};
+// bool wasm_i32x4_all_true(i32x4 a)
+static __inline__ bool __DEFAULT_FN_ATTRS wasm_i32x4_all_true(i32x4 a) {
+  return __builtin_wasm_all_true_i32x4((__i32x4)a);
 }
 
-// i32x4 wasm_u32x4_le(u32x4 a, u32x4 b)
-static __inline__ i32x4 wasm_i32x4_le(u32x4 a, u32x4 b) {
-  return __extension__(u32x4){a <= b};
+// i32x4 wasm_i32x4_shl(i32x4 a, int32_t b)
+static __inline__ i32x4 __DEFAULT_FN_ATTRS wasm_i32x4_shl(i32x4 a, int32_t b) {
+  return a << b;
 }
 
-// i32x4 wasm_f32x4_le(f32x4 a, f32x4 b)
-static __inline__ i32x4 wasm_f32x4_le(f32x4 a, f32x4 b) {
-  return __extension__(f32x4){a <= b};
+// i32x4 wasm_i32x4_shr(i32x4 a, int32_t b)
+static __inline__ i32x4 __DEFAULT_FN_ATTRS wasm_i32x4_shr(i32x4 a, int32_t b) {
+  return a >> b;
 }
 
-// i64x2 wasm_f64x2_le(f64x2 a, f64x2 b)
-static __inline__ i64x2 wasm_f64x2_le(f64x2 a, f64x2 b) {
-  return __extension__(f64x2){a <= b};
+// u32x4 wasm_u32x4_shr(u32x4 a, int32_t b)
+static __inline__ u32x4 __DEFAULT_FN_ATTRS wasm_u32x4_shr(u32x4 a, int32_t b) {
+  return a >> b;
 }
 
-// i8x16 wasm_i8x16_gt(i8x16 a, i8x16 b)
-static __inline__ i8x16 wasm_i8x16_gt(i8x16 a, i8x16 b) {
-  return __extension__(i8x16){a > b};
+// i32x4 wasm_i32x4_add(i32x4 a i32x4 b)
+static __inline__ i32x4 __DEFAULT_FN_ATTRS wasm_i32x4_add(i32x4 a, i32x4 b) {
+  return a + b;
 }
 
-// i8x16 wasm_u8x16_gt(u8x16 a, u8x16 b)
-static __inline__ i8x16 wasm_u8x16_gt(u8x16 a, u8x16 b) {
-  return __extension__(u8x16){a > b};
+// i32x4 wasm_i32x4_sub(i32x4 a i32x4 b)
+static __inline__ i32x4 __DEFAULT_FN_ATTRS wasm_i32x4_sub(i32x4 a, i32x4 b) {
+  return a - b;
 }
 
-// i16x8 wasm_i16x8_gt(i16x8 a, i16x8 b)
-static __inline__ i16x8 wasm_i16x8_gt(i16x8 a, i16x8 b) {
-  return __extension__(i16x8){a > b};
+// i32x4 wasm_i32x4_mul(i32x4 a i32x4 b)
+static __inline__ i32x4 __DEFAULT_FN_ATTRS wasm_i32x4_mul(i32x4 a, i32x4 b) {
+  return a * b;
 }
 
-// i16x8 wasm_u16x8_gt(u8x16 a, u8x16 b)
-static __inline__ i8x16 wasm_u16x8_gt(u8x16 a, u8x16 b) {
-  return __extension__(u8x16){a > b};
+// i64x2 wasm_i64x2_neg(i64x2 a)
+static __inline__ i64x2 __DEFAULT_FN_ATTRS wasm_i64x2_neg(i64x2 a) {
+  return -a;
 }
 
-// i32x4 wasm_i32x4_gt(i32x4 a, i32x4 b)
-static __inline__ i32x4 wasm_i32x4_gt(i32x4 a, i32x4 b) {
-  return __extension__(i32x4){a > b};
+// bool wasm_i64x2_any_true(i64x2 a)
+static __inline__ bool __DEFAULT_FN_ATTRS wasm_i64x2_any_true(i64x2 a) {
+  return __builtin_wasm_any_true_i64x2((__i64x2)a);
 }
 
-// i32x4 wasm_i32x4_gt(u32x4 a, u32x4 b)
-static __inline__ i32x4 wasm_u32x4_gt(u32x4 a, u32x4 b) {
-  return __extension__(u32x4){a > b};
+// bool wasm_i64x2_all_true(i64x2 a)
+static __inline__ bool __DEFAULT_FN_ATTRS wasm_i64x2_all_true(i64x2 a) {
+  return __builtin_wasm_all_true_i64x2((__i64x2)a);
 }
 
-// f32x4 wasm_f32x4_gt(f32x4 a, f32x4 b)
-static __inline__ i32x4 wasm_f32x4_gt(f32x4 a, f32x4 b) {
-  return __extension__(f32x4){a > b};
+// i64x2 wasm_i64x2_shl(i64x2 a, int32_t b)
+static __inline__ i64x2 __DEFAULT_FN_ATTRS wasm_i64x2_shl(i64x2 a, int32_t b) {
+  return a << b;
 }
 
-// i64x2 wasm_f64x2_gt(f64x2 a, f64x2 b)
-static __inline__ i64x2 wasm_f64x2_gt(f64x2 a, f64x2 b) {
-  return __extension__(f64x2){a > b};
+// i64x2 wasm_i64x2_shr(i64x2 a, int32_t b)
+static __inline__ i64x2 __DEFAULT_FN_ATTRS wasm_i64x2_shr(i64x2 a, int32_t b) {
+  return a >> b;
 }
 
-// i8x16 wasm_i8x16_ge(i8x16 a, i8x16 b)
-static __inline__ i8x16 wasm_i8x16_ge(i8x16 a, i8x16 b) {
-  return __extension__(i8x16){a >= b};
+// u64x2 wasm_u64x2_shr_u(u64x2 a, int32_t b)
+static __inline__ u64x2 __DEFAULT_FN_ATTRS wasm_u64x2_shr(u64x2 a, int32_t b) {
+  return a >> b;
 }
 
-// i8x16 wasm_u8x16_ge(u8x16 a, u8x16 b)
-static __inline__ i8x16 wasm_u8x16_ge(u8x16 a, u8x16 b) {
-  return __extension__(u8x16){a >= b};
+// i8x16 wasm_i64x2_add(i8x16 a i8x16 b)
+static __inline__ i64x2 __DEFAULT_FN_ATTRS wasm_i64x2_add(i64x2 a, i64x2 b) {
+  return a + b;
 }
 
-// i16x8 wasm_i16x8_ge(i16x8 a, i16x8 b)
-static __inline__ i16x8 wasm_i16x8_ge(i16x8 a, i16x8 b) {
-  return __extension__(i16x8){a >= b};
+// i64x2 wasm_i64x2_sub(i64x2 a i64x2 b)
+static __inline__ i64x2 __DEFAULT_FN_ATTRS wasm_i64x2_sub(i64x2 a, i64x2 b) {
+  return a - b;
 }
 
-// i16x8 wasm_i16x8_ge(u16x8 a, u16x8 b)
-static __inline__ u16x8 wasm_u16x8_ge(u16x8 a, u16x8 b) {
-  return __extension__(u8x16){a >= b};
+// f32x4  wasm_f32x4_abs(f32x4 a)
+static __inline__ f32x4 __DEFAULT_FN_ATTRS wasm_f32x4_abs(f32x4 a) {
+  return __builtin_wasm_abs_f32x4(a);
 }
 
-// i32x4 wasm_i32x4_ge(i32x4 a, i32x4 b)
-static __inline__ i32x4 wasm_i32x4_ge(i32x4 a, i32x4 b) {
-  return __extension__(i32x4){a >= b};
+// f32x4 wasm_f32x4_neg(f32x4 a)
+static __inline__ f32x4 __DEFAULT_FN_ATTRS wasm_f32x4_neg(f32x4 a) {
+  return -a;
 }
 
-// i32x4 wasm_u32x4_ge(u32x4 a, u32x4 b)
-static __inline__ u32x4 wasm_u32x4_ge(u32x4 a, u32x4 b) {
-  return __extension__(u32x4){a >= b};
+// f32x4 wasm_f32x4_sqrt(f32x4 a)
+static __inline__ f32x4 __DEFAULT_FN_ATTRS wasm_f32x4_sqrt(f32x4 a) {
+  return __builtin_wasm_sqrt_f32x4(a);
 }
 
-// i32x4 wasm_f32x4_ge(f32x4 a, f32x4 b)
-static __inline__ i32x4 wasm_f32x4_ge(f32x4 a, f32x4 b) {
-  return __extension__(f32x4){a >= b};
+// f32x4 wasm_f32x4_add(f32x4 a f32x4 b)
+static __inline__ f32x4 __DEFAULT_FN_ATTRS wasm_f32x4_add(f32x4 a, f32x4 b) {
+  return a + b;
 }
 
-// i64x2 wasm_f64x2_ge(f64x2 a, f64x2 b)
-static __inline__ i64x2 wasm_f64x2_ge(f64x2 a, f64x2 b) {
-  return __extension__(f32x4){a >= b};
+// f32x4 wasm_f32x4_sub(f32x4 a f32x4 b)
+static __inline__ f32x4 __DEFAULT_FN_ATTRS wasm_f32x4_sub(f32x4 a, f32x4 b) {
+  return a - b;
 }
 
-// i8x16  wasm_i8x16_abs(i8x16 a)
-#define wasm_i8x16_abs(a) (__builtin_wasm_abs_i8x16(a))
+// f32x4 wasm_f32x4_mul(f32x4 a f32x4 b)
+static __inline__ f32x4 __DEFAULT_FN_ATTRS wasm_f32x4_mul(f32x4 a, f32x4 b) {
+  return a * b;
+}
 
-// i16x8  wasm_i16x8_abs(i16x8 a)
-#define wasm_i16x8_abs(a) (__builtin_wasm_abs_i16x8(a))
+// f32x4 wasm_f32x4_div(f32x4 a f32x4 b)
+static __inline__ f32x4 __DEFAULT_FN_ATTRS wasm_f32x4_div(f32x4 a, f32x4 b) {
+  return a / b;
+}
 
-// i32x4  wasm_i32x4_abs(i32x4 a)
-#define wasm_i32x4_abs(a) (__builtin_wasm_abs_i32x4(a))
+// f32x4 wasm_f32x4_min(f32x4 a f32x4 b)
+static __inline__ f32x4 __DEFAULT_FN_ATTRS wasm_f32x4_min(f32x4 a, f32x4 b) {
+  return __builtin_wasm_min_f32x4(a, b);
+}
 
-// i64x2  wasm_i64x2_abs(i64x2 a)
-#define wasm_i64x2_abs(a) (__builtin_wasm_abs_i64x2(a))
+// f32x4 wasm_f32x4_max(f32x4 a f32x4 b)
+static __inline__ f32x4 __DEFAULT_FN_ATTRS wasm_f32x4_max(f32x4 a, f32x4 b) {
+  return __builtin_wasm_max_f32x4(a, b);
+}
 
-// f32x4  wasm_f32x4_abs(f32x4 a)
-#define wasm_f32x4_abs(a) (__builtin_wasm_abs_f32x4(a))
+#ifdef __wasm_undefined_simd128__
 
 // f64x2  wasm_f64x2_abs(f64x2 a)
-#define wasm_f64x2_abs(a) (__builtin_wasm_abs_f64x2(a))
+static __inline__ f64x2 __DEFAULT_FN_ATTRS wasm_f64x2_abs(f64x2 a) {
+  return __builtin_wasm_abs_f64x2(a);
+}
 
-// f32x4 wasm_convert_f32x4_i32x4(i32x4 a)
-#define wasm_convert_f32x4_i32x4(v) (__builtin_convertvector(v, f32x4))
+// f64x2 wasm_f64x2_neg(f64x2 a)
+static __inline__ f64x2 __DEFAULT_FN_ATTRS wasm_f64x2_neg(f64x2 a) {
+  return -a;
+}
 
-// f32x4 wasm_convert_f32x4_u32x4(u32x4 a)
-#define wasm_convert_f32x4_u32x4(v) (__builtin_convertvector(v, f32x4))
+// f64x2  wasm_f64x2_sqrt(f64x2 a)
+static __inline__ f64x2 __DEFAULT_FN_ATTRS wasm_f64x2_sqrt(f64x2 a) {
+  return __builtin_wasm_sqrt_f64x2(a);
+}
 
-// f64x2 wasm_convert_f64x2_i64x2(i64x2 a)
-#define wasm_convert_f64x2_i64x2(v) (__builtin_convertvector(v, f64x2))
+// f64x2 wasm_f64x2_add(f64x2 a f64x2 b)
+static __inline__ f64x2 __DEFAULT_FN_ATTRS wasm_f64x2_add(f64x2 a, f64x2 b) {
+  return a + b;
+}
 
-// f64x2 wasm_convert_f64x2_u64x2(u64x2 a)
-#define wasm_convert_f64x2_u64x2(v) (__builtin_convertvector(v, f64x2))
+// f64x2 wasm_f64x2_sub(f64x2 a f64x2 b)
+static __inline__ f64x2 __DEFAULT_FN_ATTRS wasm_f64x2_sub(f64x2 a, f64x2 b) {
+  return a - b;
+}
 
-#ifdef __wasm_unimplemented_simd128__
-#endif
+// f64x2 wasm_f64x2_mul(f64x2 a f64x2 b)
+static __inline__ f64x2 __DEFAULT_FN_ATTRS wasm_f64x2_mul(f64x2 a, f64x2 b) {
+  return a * b;
+}
 
-// f32x4 wasm_f32x4_min(f32x4 a, f32x4 b)
-#define wasm_f32x4_min(a, b)  (__builtin_wasm_min_f32x4(a, b))
+// f64x2 wasm_f64x2_div(f64x2 a f64x2 b)
+static __inline__ f64x2 __DEFAULT_FN_ATTRS wasm_f64x2_div(f64x2 a, f64x2 b) {
+  return a / b;
+}
 
-// f32x4 wasm_f32x4_max(f32x4 a, f32x4 b)
-#define wasm_f32x4_max(a, b) (__builtin_wasm_max_f32x4(a, b))
+// f64x2 wasm_f64x2_min(f64x2 a f64x2 b)
+static __inline__ f64x2 __DEFAULT_FN_ATTRS wasm_f64x2_min(f64x2 a, f64x2 b) {
+  return __builtin_wasm_min_f64x2(a, b);
+}
 
-// f32x4 wasm_f32x4_sqrt(f32x4 a)
-#define wasm_f32x4_sqrt(v) (__builtin_wasm_sqrt_f32x4(v))
+// f64x2 wasm_f64x2_max(f64x2 a f64x2 b)
+static __inline__ f64x2 __DEFAULT_FN_ATTRS wasm_f64x2_max(f64x2 a, f64x2 b) {
+  return __builtin_wasm_max_f64x2(a, b);
+}
 
-#ifdef __wasm_unimplemented_simd128__
+#endif
 
-// f64x2 wasm_f64x2_min(f64x2 a, f64x2 b)
-#define wasm_f64x2_min(a, b) (__builtin_wasm_min_f64x2(x,y))
+// // f32x4 wasm_convert_f32x4_i32x4(i32x4 a)
+// #define wasm_convert_f32x4_i32x4(v) (__builtin_convertvector(v, f32x4))
 
-// f64x2 wasm_f64x2_max(f64x2 a, f64x2 b)
-#define wasm_f64x2_max(a, b) (__builtin_wasm_max_f64x2(x,y))
+// // f32x4 wasm_convert_f32x4_u32x4(u32x4 a)
+// #define wasm_convert_f32x4_u32x4(v) (__builtin_convertvector(v, f32x4))
 
-// f64x2 wasm_f64x2_sqrt(f64x2 a)
-#define wasm_f64x2_sqrt(v) (__builtin_wasm_sqrt_f64x2(v))
+// // f64x2 wasm_convert_f64x2_i64x2(i64x2 a)
+// #define wasm_convert_f64x2_i64x2(v) (__builtin_convertvector(v, f64x2))
 
-#endif
+// // f64x2 wasm_convert_f64x2_u64x2(u64x2 a)
+// #define wasm_convert_f64x2_u64x2(v) (__builtin_convertvector(v, f64x2))
 
 // not sure how this should work with variable input
 // #define wasm_i8x16_shuffle(a, b) \
diff --git a/tests/test_core.py b/tests/test_core.py
index e73daca33e233..f2d861495a7e0 100644
--- a/tests/test_core.py
+++ b/tests/test_core.py
@@ -5451,6 +5451,7 @@ def test_wasm_builtin_simd(self, js_engines):
 
   @wasm_simd
   def test_wasm_intrinsics_simd(self, js_engines):
+    self.emcc_args.append('-Wpedantic')
     self.do_run(open(path_from_root('tests', 'test_wasm_intrinsics_simd.c')).read(), 'Success!',
                 js_engines=js_engines)
     self.emcc_args.append('-munimplemented-simd128')
diff --git a/tests/test_wasm_intrinsics_simd.c b/tests/test_wasm_intrinsics_simd.c
index 86c119f2f641c..659005743b3d4 100644
--- a/tests/test_wasm_intrinsics_simd.c
+++ b/tests/test_wasm_intrinsics_simd.c
@@ -1,1345 +1,1217 @@
-#include <stdint.h>
-#include <stdio.h>
-#include <math.h>
-#include <emscripten.h>
-#include <simd128.h>
-
-#define TESTFN EMSCRIPTEN_KEEPALIVE __attribute__((noinline))
-
-i8x16 TESTFN i8x16_load(i8x16 *ptr) {
-  return (i8x16) wasm_v128_load(ptr);
-//  return *ptr;
-}
-void TESTFN i8x16_store(i8x16 *ptr, i8x16 vec) {
-  i8x16_store(ptr, vec);
-//  *ptr = vec;
-}
-i32x4 TESTFN i32x4_const(void) {
-  return (i32x4) = wasm_v128_const(0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15);
-//  return (i32x4) {1, 2, 3, 4};
-}
-//todo:
-i8x16 TESTFN i8x16_shuffle_interleave_bytes(i8x16 x, i8x16 y) {
-  return __builtin_shufflevector(x, y, 0, 17, 2, 19, 4, 21, 6, 23, 8, 25, 10, 27, 12, 29, 14, 31);
-}
-//todo:
-i32x4 TESTFN i32x4_shuffle_reverse(i32x4 vec) {
-  return __builtin_shufflevector(vec, vec, 3, 2, 1, 0);
-}
-i8x16 TESTFN i8x16_splat(int32_t x) {
-  return wasm_i8x16_splat(x);
-//  return (i8x16) {x, x, x, x, x, x, x, x, x, x, x, x, x, x, x, x};
-}
-int32_t TESTFN i8x16_extract_lane_s_first(i8x16 vec) {
-      return wasm_i8x16_extract_lane(vec, 0);
-//  return __builtin_wasm_extract_lane_s_i8x16(vec, 0);
-}
-int32_t TESTFN i8x16_extract_lane_s_last(i8x16 vec) {
-        return wasm_i8x16_extract_lane(vec, 15);
-//  return __builtin_wasm_extract_lane_s_i8x16(vec, 15);
-}
-#ifdef __wasm_unimplemented_simd128__
-uint32_t TESTFN i8x16_extract_lane_u_first(i8x16 vec) {
-  return wasm_u8x16_extract_lane(vec, 0);
-//  return __builtin_wasm_extract_lane_u_i8x16(vec, 0);
-}
-uint32_t TESTFN i8x16_extract_lane_u_last(i8x16 vec) {
-  return wasm_u8x16_extract_lane(vec, 15);
-//  return __builtin_wasm_extract_lane_u_i8x16(vec, 15);
-}
-#endif // __wasm_unimplemented_simd128__
-i8x16 TESTFN i8x16_replace_lane_first(i8x16 vec, int32_t val) {
-  return wasm_i8x16_replace_lane(vec, 0, val);
-//  return (i8x16){__builtin_wasm_replace_lane_i8x16(vec, 0, val)};
-}
-i8x16 TESTFN i8x16_replace_lane_last(i8x16 vec, int32_t val) {
-  return wasm_i8x16_replace_lane(vec, 15, val);
-//  return (i8x16){__builtin_wasm_replace_lane_i8x16(vec, 15, val)};
-}
-i16x8 TESTFN i16x8_splat(int32_t x) {
-  return wasm_i16x8_splat(x);
-//  return (i16x8) {x, x, x, x, x, x, x, x};
-}
-int32_t TESTFN i16x8_extract_lane_s_first(i16x8 vec) {
-  return wasm_i16x8_extract_lane(vec, 0);
-//  return __builtin_wasm_extract_lane_s_i16x8(vec, 0);
-}
-int32_t TESTFN i16x8_extract_lane_s_last(i16x8 vec) {
-  return wasm_i16x8_extract_lane(vec, 7);
-//  return __builtin_wasm_extract_lane_s_i16x8(vec, 7);
-}
-#ifdef __wasm_unimplemented_simd128__
-int32_t TESTFN i16x8_extract_lane_u_first(i16x8 vec) {
-    return wasm_u16x8_extract_lane(vec, 0);
-//  return __builtin_wasm_extract_lane_u_i16x8(vec, 0);
-}
-int32_t TESTFN i16x8_extract_lane_u_last(i16x8 vec) {
-  return wasm_u16x8_extract_lane(vec, 7);
-//  return __builtin_wasm_extract_lane_u_i16x8(vec, 7);
-}
-#endif // __wasm_unimplemented_simd128__
-i16x8 TESTFN i16x8_replace_lane_first(i16x8 vec, int32_t val) {
-  return wasm_i16x8_replace_lane(vec, 0, val);
-//  return __builtin_wasm_replace_lane_i16x8(vec, 0, val);
-}
-i16x8 TESTFN i16x8_replace_lane_last(i16x8 vec, int32_t val) {
-  return wasm_i16x8_replace_lane(vec, 7, val);
-//  return __builtin_wasm_replace_lane_i16x8(vec, 7, val);
-}
-i32x4 TESTFN i32x4_splat(int32_t x) {
-  return wasm_i32x4_splat(x);
-//  return (i32x4) {x, x, x, x};
-}
-int32_t TESTFN i32x4_extract_lane_first(i32x4 vec) {
-  return wasm_i32x4_extract_lane(vec, 0);
-//  return __builtin_wasm_extract_lane_i32x4(vec, 0);
-}
-int32_t TESTFN i32x4_extract_lane_last(i32x4 vec) {
-  return wasm_i32x4_extract_lane(vec, 3);
-//  return __builtin_wasm_extract_lane_i32x4(vec, 3);
-}
-i32x4 TESTFN i32x4_replace_lane_first(i32x4 vec, int32_t val) {
-  return wasm_i32x4_replace_lane(vec, 0, val);
-//  return __builtin_wasm_replace_lane_i32x4(vec, 0, val);
-}
-i32x4 TESTFN i32x4_replace_lane_last(i32x4 vec, int32_t val) {
-  return wasm_i32x4_replace_lane(vec, 3, val);
-//  return __builtin_wasm_replace_lane_i32x4(vec, 3, val);
-}
-i64x2 TESTFN i64x2_splat(int64_t x) {
-  return wasm_i64x2_splat(x);
-//  return (i64x2) {x, x};
-}
-#ifdef __wasm_unimplemented_simd128__
-int64_t TESTFN i64x2_extract_lane_first(i64x2 vec) {
-  return wasm_i64x2_extract_lane(vec, 0);
-//  return __builtin_wasm_extract_lane_i64x2(vec, 0);
-}
-int64_t TESTFN i64x2_extract_lane_last(i64x2 vec) {
-  return wasm_i64x2_extract_lane(vec, 1);
-//  return __builtin_wasm_extract_lane_i64x2(vec, 1);
-}
-i64x2 TESTFN i64x2_replace_lane_first(i64x2 vec, int64_t val) {
-  return wasm_i64x2_replace_lane(vec, 0, val);
-//  return __builtin_wasm_replace_lane_i64x2(vec, 0, val);
-}
-i64x2 TESTFN i64x2_replace_lane_last(i64x2 vec, int64_t val) {
-  return wasm_i64x2_replace_lane(vec, 1, val);
-//  return __builtin_wasm_replace_lane_i64x2(vec, 1, val);
-}
-#endif // __wasm_unimplemented_simd128__
-f32x4 TESTFN f32x4_splat(float x) {
-  return wasm_f32x4_splat(x);
-//  return (f32x4) {x, x, x, x};
-}
-float TESTFN f32x4_extract_lane_first(f32x4 vec) {
-  return wasm_f32x4_extract_lane(vec, 0);
-//  return __builtin_wasm_extract_lane_f32x4(vec, 0);
-}
-float TESTFN f32x4_extract_lane_last(f32x4 vec) {
-  return wasm_f32x4_extract_lane(vec, 3);
-//  return __builtin_wasm_extract_lane_f32x4(vec, 3);
-}
-f32x4 TESTFN f32x4_replace_lane_first(f32x4 vec, float val) {
-  return wasm_f32x4_replace_lane(vec, 0, val);
-//  return __builtin_wasm_replace_lane_f32x4(vec, 0, val);
-}
-f32x4 TESTFN f32x4_replace_lane_last(f32x4 vec, float val) {
-  return wasm_f32x4_replace_lane(vec, 3, val);
-//  return __builtin_wasm_replace_lane_f32x4(vec, 3, val);
-}
-f64x2 TESTFN f64x2_splat(int64_t x) {
-    return wasm_f64x2_splat((double ) x);
-//  return (f64x2) {x, x};
-}
-#ifdef __wasm_unimplemented_simd128__
-double TESTFN f64x2_extract_lane_first(f64x2 vec) {
-    return wasm_f64x2_extract_lane(vec, 0);
-//  return __builtin_wasm_extract_lane_f64x2(vec, 0);
-}
-double TESTFN f64x2_extract_lane_last(f64x2 vec) {
-  return wasm_f64x2_extract_lane(vec, 1);
-//  return __builtin_wasm_extract_lane_f64x2(vec, 1);
-}
-f64x2 TESTFN f64x2_replace_lane_first(f64x2 vec, double val) {
-  return wasm_f64x2_replace_lane(vec, 0, val);
-//  return __builtin_wasm_replace_lane_f64x2(vec, 0, val);
-}
-f64x2 TESTFN f64x2_replace_lane_last(f64x2 vec, double val) {
-  return wasm_f64x2_replace_lane(vec, 1, val);
-//  return __builtin_wasm_replace_lane_f64x2(vec, 1, val);
-}
-#endif // __wasm_unimplemented_simd128__
-i8x16 TESTFN i8x16_eq(i8x16 x, i8x16 y) {
-  return wasm_i8x16_eq(x, y);
-//  return x == y;
-}
-i8x16 TESTFN i8x16_ne(i8x16 x, i8x16 y) {
-  return wasm_i8x16_ne(x, y);
-//  return x != y;
-}
-i8x16 TESTFN i8x16_lt_s(i8x16 x, i8x16 y) {
-  return wasm_i8x16_lt(x, y);
-//  return x < y;
-}
-i8x16 TESTFN i8x16_lt_u(u8x16 x, u8x16 y) {
-  return wasm_u8x16_lt(x, y);
-//  return (u8x16)x < (u8x16)y;
-}
-i8x16 TESTFN i8x16_gt_s(i8x16 x, i8x16 y) {
-  return wasm_i8x16_gt(x, y);
-//  return x > y;
-}
-i8x16 TESTFN u8x16_gt_u(i8x16 x, i8x16 y) {
-  return wasm_u8x16_gt(x,y);
-//  return (u8x16)x > (u8x16)y;
-}
-i8x16 TESTFN i8x16_le_s(i8x16 x, i8x16 y) {
-  return wasm_i8x16_le(x,y);
-//  return x <= y;
-}
-i8x16 TESTFN u8x16_le_u(u8x16 x, u8x16 y) {
-   return wasm_u8x16_le(x, y);
-//  return (u8x16)x <= (u8x16)y;
-}
-i8x16 TESTFN i8x16_ge_s(i8x16 x, i8x16 y) {
-  return wasm_i8x16_ge(x, y);
-//  return x >= y;
-}
-i8x16  TESTFN i8x16_ge_u(u8x16 x, u8x16 y) {
-  return wasm_u8x16_ge(x, y);
-//  return (u8x16)x >= (u8x16)y;
-}
-i16x8 TESTFN i16x8_eq(i16x8 x, i16x8 y) {
-  return wasm_i16x8_eq(x,y);
-//  return x == y;
-}
-i16x8 TESTFN i16x8_ne(i16x8 x, i16x8 y) {
-  return wasm_i16x8_ne(x,y);
-//  return x != y;
-}
-i16x8 TESTFN i16x8_lt_s(i16x8 x, i16x8 y) {
-  return wasm_i16x8_lt(x,y);
-//  return x < y;
-}
-i16x8 TESTFN i16x8_lt_u(u16x8 x, u16x8 y) {
-    return wasm_u16x8_lt(x,y);
-//  return (u16x8)x < (u16x8)y;
-}
-i16x8 TESTFN i16x8_gt_s(i16x8 x, i16x8 y) {
-  return wasm_i16x8_gt(x,y);
-//  return x > y;
-}
-i16x8 TESTFN i16x8_gt_u(u16x8 x, u16x8 y) {
-  return wasm_u16x8_gt(x,y);
-//  return (u16x8)x > (u16x8)y;
-}
-i16x8 TESTFN i16x8_le_s(i16x8 x, i16x8 y) {
-  return wasm_i16x8_le(x, y);
-//  return x <= y;
-}
-i16x8 TESTFN i16x8_le_u(u16x8 x, u16x8 y) {
-  return wasm_u16x8_le(x, y);
-//  return (u16x8)x <= (u16x8)y;
-}
-i16x8 TESTFN i16x8_ge_s(i16x8 x, i16x8 y) {
-  return wasm_i16x8_ge(x, y);
-//  return x >= y;
-}
-i16x8  TESTFN i16x8_ge_u(u16x8 x, u16x8 y) {
-  return wasm_u16x8_ge(x, y);
-//  return (u16x8)x >= (u16x8)y;
-}
-i32x4 TESTFN i32x4_eq(i32x4 x, i32x4 y) {
-  return wasm_i32x4_eq(x, y);
-//  return (i32x4)(x == y);
-}
-i32x4 TESTFN i32x4_ne(i32x4 x, i32x4 y) {
-  return wasm_i32x4_ne(x, y);
-//  return (i32x4)(x != y);
-}
-i32x4 TESTFN i32x4_lt_s(i32x4 x, i32x4 y) {
-  return wasm_i32x4_lt(x, y);
-//  return (i32x4)(x < y);
-}
-i32x4 TESTFN i32x4_lt_u(u32x4 x, u32x4 y) {
-  return wasm_u32x4_lt(x, y);
-//  return (i32x4)((u32x4)x < (u32x4)y);
-}
-i32x4 TESTFN i32x4_gt_s(i32x4 x, i32x4 y) {
-  return wasm_i32x4_gt(x, y);
-//  return (i32x4)(x > y);
-}
-i32x4 TESTFN i32x4_gt_u(u32x4 x, u32x4 y) {
-  return wasm_u32x4_gt(x, y);
-//  return (i32x4)((u32x4)x > (u32x4)y);
-}
-i32x4 TESTFN i32x4_le_s(i32x4 x, i32x4 y) {
-  return wasm_i32x4_le(x, y);
-//  return (i32x4)(x <= y);
-}
-i32x4 TESTFN i32x4_le_u(u32x4 x, u32x4 y) {
-  return wasm_u32x4_le(x, y);
-//  return (i32x4)((u32x4)x <= (u32x4)y);
-}
-i32x4 TESTFN i32x4_ge_s(i32x4 x, i32x4 y) {
-  return wasm_i32x4_ge(x, y);
-//  return (i32x4)(x >= y);
-}
-i32x4  TESTFN i32x4_ge_u(i32x4 x, i32x4 y) {
-  return wasm_u32x4_ge(x, y);
-//  return (i32x4)((u32x4)x >= (u32x4)y);
-}
-i32x4 TESTFN f32x4_eq(f32x4 x, f32x4 y) {
-  return wasm_f32x4_eq(x,y);
-//  return (f32x4)(x == y);
-}
-i32x4 TESTFN f32x4_ne(f32x4 x, f32x4 y) {
-  return wasm_f32x4_ne(x, y);
-//  return (i32x4)(x != y);
-}
-i32x4 TESTFN f32x4_lt(f32x4 x, f32x4 y) {
-  return wasm_f32x4_lt(x, y);
-//  return (i32x4)(x < y);
-}
-i32x4 TESTFN f32x4_gt(f32x4 x, f32x4 y) {
-  return wasm_f32x4_gt(x,y);
-//  return (i32x4)(x > y);
-}
-i32x4 TESTFN f32x4_le(f32x4 x, f32x4 y) {
-  return wasm_f32x4_le(x, y);
-//  return (i32x4)(x <= y);
-}
-i32x4 TESTFN f32x4_ge(f32x4 x, f32x4 y) {
-  return wasm_f32x4_ge(x, y);
-//  return (i32x4)(x >= y);
-}
-i64x2 TESTFN f64x2_eq(f64x2 x, f64x2 y) {
-  return wasm_f64x2_eq(x,y);
-//  return (i64x2)(x == y);
-}
-i64x2 TESTFN f64x2_ne(f64x2 x, f64x2 y) {
-  return wasm_f64x2_ne(x,y);
-//  return (i64x2)(x != y);
-}
-i64x2 TESTFN f64x2_lt(f64x2 x, f64x2 y) {
-  return wasm_f64x2_lt(x,y);
-//  return (i64x2)(x < y);
-}
-i64x2 TESTFN f64x2_gt(f64x2 x, f64x2 y) {
-  return wasm_f64x2_gt(x, y);
-//  return (i64x2)(x > y);
-}
-i64x2 TESTFN f64x2_le(f64x2 x, f64x2 y) {
-  return wasm_f64x2_le(x, y);
-//  return (i64x2)(x <= y);
-}
-i64x2 TESTFN f64x2_ge(f64x2 x, f64x2 y) {
-  return wasm_f64x2_ge(x, y);
-//  return (i64x2)(x >= y);
-}
-i8x16 TESTFN i8x16_not(i8x16 vec) {
-  return wasm_i8x16_not(vec);
-//  return ~vec;
-}
-i8x16 TESTFN i8x16_and(i8x16 x, i8x16 y) {
-  return wasm_i8x16_and(x, y);
-//  return x & y;
-}
-i8x16 TESTFN i8x16_or(i8x16 x, i8x16 y) {
-  return wasm_i8x16_or(x,y);
-//  return x | y;
-}
-i8x16 TESTFN i8x16_xor(i8x16 x, i8x16 y) {
-  return wasm_i8x16_xor(x,y);
-//  return x ^ y;
-}
-i8x16 TESTFN i8x16_bitselect(i8x16 x, i8x16 y, i8x16 cond) {
-  return wasm_i8x16_bitselect(x,y,cond);
-//  return (i8x16)__builtin_wasm_bitselect((i32x4)x, (i32x4)y, (i32x4)cond);
-}
-i8x16 TESTFN i8x16_neg(i8x16 vec) {
-  return wasm_i8x16_neg(vec);
-//  return -vec;
-}
-int32_t TESTFN i8x16_any_true(i8x16 vec) {
-  return wasm_i8x16_any_true(vec);
-//  return __builtin_wasm_any_true_i8x16(vec);
-}
-int32_t TESTFN i8x16_all_true(i8x16 vec) {
-  return wasm_i8x16_all_true(vec);
-//  return __builtin_wasm_all_true_i8x16(vec);
-}
-i8x16 TESTFN i8x16_shl(i8x16 vec, int32_t shift) {
-  return wasm_i8x16_shl(vec, shift);
-//  return vec << shift;
-}
-i8x16 TESTFN i8x16_shr_s(i8x16 vec, int32_t shift) {
-  return wasm_i8x16_shr(vec, shift);
-//  return vec >> shift;
-}
-u8x16 TESTFN i8x16_shr_u(u8x16 vec, int32_t shift) {
-  return wasm_u8x16_shr(vec, shift);
-//  return (i8x16)((u8x16)vec >> shift);
-}
-i8x16 TESTFN i8x16_add(i8x16 x, i8x16 y) {
-  return wasm_i8x16_add(x,y);
-//  return x + y;
-}
-i8x16 TESTFN i8x16_add_saturate_s(i8x16 x, i8x16 y) {
-  return wasm_i8x16_add_saturate(x, y);
-//  return __builtin_wasm_add_saturate_s_i8x16(x, y);
-}
-u8x16 TESTFN i8x16_add_saturate_u(u8x16 x, u8x16 y) {
-  return wasm_u8x16_add_saturate(x, y);
-//  return __builtin_wasm_add_saturate_u_i8x16(x, y);
-}
-i8x16 TESTFN i8x16_sub(i8x16 x, i8x16 y) {
-  return wasm_i8x16_sub(x,y);
-//  return x - y;
-}
-i8x16 TESTFN i8x16_sub_saturate_s(i8x16 x, i8x16 y) {
-  return wasm_i8x16_sub_saturate(x, y);
-//  return __builtin_wasm_sub_saturate_s_i8x16(x, y);
-}
-u8x16 TESTFN i8x16_sub_saturate_u(u8x16 x, u8x16 y) {
-  return wasm_u8x16_sub_saturate(x, y);
-//  return __builtin_wasm_sub_saturate_u_i8x16(x, y);
-}
-i8x16 TESTFN i8x16_mul(i8x16 x, i8x16 y) {
-  return wasm_i8x16_mul(x, y);
-//  return x * y;
-}
-i16x8 TESTFN i16x8_neg(i16x8 vec) {
-  return wasm_i16x8_neg(vec);
-//  return -vec;
-}
-bool TESTFN i16x8_any_true(i16x8 vec) {
-  return wasm_i16x8_any_true(vec);
-//  return __builtin_wasm_any_true_i16x8(vec);
-}
-int32_t TESTFN i16x8_all_true(i16x8 vec) {
-  return wasm_i16x8_all_true(vec);
-  //return __builtin_wasm_all_true_i16x8(vec);
-}
-i16x8 TESTFN i16x8_shl(i16x8 vec, int32_t shift) {
-  return wasm_i16x8_shl(vec, shift);
-//  return vec << shift;
-}
-i16x8 TESTFN i16x8_shr_s(i16x8 vec, int32_t shift) {
-  return wasm_i16x8_shr(vec, shift);
-//  return vec >> shift;
-}
-u16x8 TESTFN i16x8_shr_u(u16x8 vec, int32_t shift) {
-  return wasm_u16x8_shr(vec, shift);
-//  return (i16x8)((u16x8)vec >> shift);
-}
-i16x8 TESTFN i16x8_add(i16x8 x, i16x8 y) {
-  return wasm_i16x8_add(x, y);
-//  return x + y;
-}
-i16x8 TESTFN i16x8_add_saturate_s(i16x8 x, i16x8 y) {
-  return wasm_i16x8_add_saturate(x, y);
-//  return __builtin_wasm_add_saturate_s_i16x8(x, y);
-}
-u16x8 TESTFN i16x8_add_saturate_u(u16x8 x, u16x8 y) {
-  return wasm_u16x8_add_saturate(x, y);
-//  return __builtin_wasm_add_saturate_u_i16x8(x, y);
-}
-i16x8 TESTFN i16x8_sub(i16x8 x, i16x8 y) {
-  return wasm_i16x8_sub(x, y);
-//  return x - y;
-}
-i16x8 TESTFN i16x8_sub_saturate_s(i16x8 x, i16x8 y) {
-  return wasm_i16x8_sub_saturate(x,y);
-//  return __builtin_wasm_sub_saturate_s_i16x8(x, y);
-}
-u16x8 TESTFN i16x8_sub_saturate_u(u16x8 x, u16x8 y) {
-  return wasm_u16x8_sub_saturate(x, y);
-//  return __builtin_wasm_sub_saturate_u_i16x8(x, y);
-}
-i16x8 TESTFN i16x8_mul(i16x8 x, i16x8 y) {
-  return wasm_i16x8_mul(x, y);
-//  return x * y;
-}
-i32x4 TESTFN i32x4_neg(i32x4 vec) {
-  return wasm_i32x4_neg(vec);
-// return -vec;
-}
-int32_t TESTFN i32x4_any_true(i32x4 vec) {
-  return wasm_i32x4_any_true(vec);
-//  return __builtin_wasm_any_true_i32x4(vec);
-}
-int32_t TESTFN i32x4_all_true(i32x4 vec) {
-  return wasm_i32x4_all_true(vec);
-//  return __builtin_wasm_all_true_i32x4(vec);
-}
-i32x4 TESTFN i32x4_shl(i32x4 vec, int32_t shift) {
-  return wasm_i32x4_shl(vec, shift);
-//  return vec << shift;
-}
-i32x4 TESTFN i32x4_shr_s(i32x4 vec, int32_t shift) {
-  return wasm_i32x4_shr(vec, shift);
-//  return vec >> shift;
-}
-u32x4 TESTFN i32x4_shr_u(u32x4 vec, int32_t shift) {
-  return wasm_u32x4_shr(vec, shift);
-//  return (i32x4)((u32x4)vec >> shift);
-}
-i32x4 TESTFN i32x4_add(i32x4 x, i32x4 y) {
-  return wasm_i32x4_add(x, y);
-//  return x + y;
-}
-i32x4 TESTFN i32x4_sub(i32x4 x, i32x4 y) {
-  return wasm_i32x4_sub(x, y);
-//  return x - y;
-}
-i32x4 TESTFN i32x4_mul(i32x4 x, i32x4 y) {
-  return wasm_i32x4_mul(x, y);
-//  return x * y;
-}
-i64x2 TESTFN i64x2_neg(i64x2 vec) {
-  return wasm_i64x2_neg(vec);
-//  return -vec;
-}
-#ifdef __wasm_unimplemented_simd128__
-bool TESTFN i64x2_any_true(i64x2 vec) {
-  return wasm_i64x2_any_true(vec);
-//  return __builtin_wasm_any_true_i64x2(vec);
-}
-bool TESTFN i64x2_all_true(i64x2 vec) {
-  return wasm_i64x2_all_true(vec);
-//  return __builtin_wasm_all_true_i64x2(vec);
-}
-#endif // __wasm_unimplemented_simd128__
-i64x2 TESTFN i64x2_shl(i64x2 vec, int32_t shift) {
-  return wasm_i64x2_shl(vec, shift);
-//  return vec << shift;
-}
-
-i64x2 TESTFN i64x2_shr_s(i64x2 vec, int32_t shift) {
-  return wasm_i64x2_shr(vec, shift);
-//  return vec >> shift;
-}
-u64x2 TESTFN i64x2_shr_u(u64x2 vec, int32_t shift) {
-  return wasm_u64x2_shr(vec, shift);
-//  return (i64x2)((u64x2)vec >> shift);
-}
-i64x2 TESTFN i64x2_add(i64x2 x, i64x2 y) {
-  return wasm_i64x2_add(x, y);
-//  return x + y;
-}
-i64x2 TESTFN i64x2_sub(i64x2 x, i64x2 y) {
-  return wasm_i64x2_sub(x, y);
-//  return x - y;
-}
-f32x4 TESTFN f32x4_abs(f32x4 vec) {
-  return wasm_f32x4_abs(vec);
-//  return __builtin_wasm_abs_f32x4(vec);
-}
-f32x4 TESTFN f32x4_neg(f32x4 vec) {
-  return wasm_f32x4_neg(vec);
-//  return -vec;
-}
-#ifdef __wasm_unimplemented_simd128__
-f32x4 TESTFN f32x4_sqrt(f32x4 vec) {
-  return wasm_f32x4_sqrt(vec);
-//  return __builtin_wasm_sqrt_f32x4(vec);
-}
-#endif // __wasm_unimplemented_simd128__
-f32x4 TESTFN f32x4_add(f32x4 x, f32x4 y) {
-  return wasm_f32x4_add(x,y);
-//  return x + y;
-}
-f32x4 TESTFN f32x4_sub(f32x4 x, f32x4 y) {
-  return wasm_f32x4_sub(x, y);
-//  return x - y;
-}
-f32x4 TESTFN f32x4_mul(f32x4 x, f32x4 y) {
-  return wasm_f32x4_mul(x, y);
-//  return x * y;
-}
-f32x4 TESTFN f32x4_div(f32x4 x, f32x4 y) {
-  return wasm_f32x4_div(x, y);
-//  return x / y;
-}
-f32x4 TESTFN f32x4_min(f32x4 x, f32x4 y) {
-  return wasm_f32x4_min(f32x4)
-//  return __builtin_wasm_min_f32x4(x, y);
-}
-f32x4 TESTFN f32x4_max(f32x4 x, f32x4 y) {
-  return wasm_f32x4_max(x, y);
-//  return __builtin_wasm_max_f32x4(x, y);
-}
-#ifdef __wasm_unimplemented_simd128__
-f64x2 TESTFN f64x2_abs(f64x2 vec) {
-  return __builtin_wasm_abs_f64x2(vec);
-}
-#endif // __wasm_unimplemented_simd128__
-f64x2 TESTFN f64x2_neg(f64x2 vec) {
-  return -vec;
-}
-#ifdef __wasm_unimplemented_simd128__
-f64x2 TESTFN f64x2_sqrt(f64x2 vec) {
-  return __builtin_wasm_sqrt_f64x2(vec);
-}
-#endif // __wasm_unimplemented_simd128__
-f64x2 TESTFN f64x2_add(f64x2 x, f64x2 y) {
-  return x + y;
-}
-f64x2 TESTFN f64x2_sub(f64x2 x, f64x2 y) {
-  return x - y;
-}
-f64x2 TESTFN f64x2_mul(f64x2 x, f64x2 y) {
-  return x * y;
-}
-f64x2 TESTFN f64x2_div(f64x2 x, f64x2 y) {
-  return x / y;
-}
-#ifdef __wasm_unimplemented_simd128__
-f64x2 TESTFN f64x2_min(f64x2 x, f64x2 y) {
-  return __builtin_wasm_min_f64x2(x, y);
-}
-f64x2 TESTFN f64x2_max(f64x2 x, f64x2 y) {
-  return __builtin_wasm_max_f64x2(x, y);
-}
-#endif // __wasm_unimplemented_simd128__
-i32x4 TESTFN i32x4_trunc_s_f32x4_sat(f32x4 vec) {
-  return __builtin_wasm_trunc_saturate_s_i32x4_f32x4(vec);
-}
-i32x4 TESTFN i32x4_trunc_u_f32x4_sat(f32x4 vec) {
-  return __builtin_wasm_trunc_saturate_u_i32x4_f32x4(vec);
-}
-#ifdef __wasm_unimplemented_simd128__
-i64x2 TESTFN i64x2_trunc_s_f64x2_sat(f64x2 vec) {
-  return __builtin_wasm_trunc_saturate_s_i64x2_f64x2(vec);
-}
-i64x2 TESTFN i64x2_trunc_u_f64x2_sat(f64x2 vec) {
-  return __builtin_wasm_trunc_saturate_u_i64x2_f64x2(vec);
-}
-#endif // __wasm_unimplemented_simd128__
-f32x4 TESTFN f32x4_convert_s_i32x4(i32x4 vec) {
-  return __builtin_convertvector(vec, f32x4);
-}
-f32x4 TESTFN f32x4_convert_u_i32x4(i32x4 vec) {
-  return __builtin_convertvector((u32x4)vec, f32x4);
-}
-f64x2 TESTFN f64x2_convert_s_i64x2(i64x2 vec) {
-  return __builtin_convertvector(vec, f64x2);
-}
-f64x2 TESTFN f64x2_convert_u_i64x2(i64x2 vec) {
-  return __builtin_convertvector((u64x2)vec, f64x2);
-}
-
-static int failures = 0;
-
-#define formatter(x) _Generic((x),                      \
-                              char: "%d",               \
-                              unsigned char: "%d",      \
-                              short: "%d",              \
-                              int64_t: "%ld",           \
-                              int32_t: "%d",            \
-                              uint32_t: "%d",           \
-                              float: "%f",              \
-                              double: "%f"              \
-  )
-
-#define err(x) fprintf(stderr, formatter(x), x)
-
-#define eq(a, b) ({                             \
-      bool anan = _Generic((a),                 \
-                           float: isnan(a),     \
-                           double: isnan(a),    \
-                           default: false);     \
-      bool bnan = _Generic((b),                 \
-                           float: isnan(b),     \
-                           double: isnan(b),    \
-                           default: false);     \
-      ((anan && bnan) || (!anan && a == b));    \
-    })
-
-#define expect_eq(_a, _b) ({                                    \
-      __typeof__(_a) a = (_a), b = (_b);                        \
-      if (!eq(a, b)) {                                          \
-        failures++;                                             \
-        fprintf(stderr, "line %d: expected ", __LINE__);        \
-        err(b);                                                 \
-        fprintf(stderr, ", got ");                              \
-        err(a);                                                 \
-        fprintf(stderr, "\n");                                  \
-      }                                                         \
-    })
-
-#define expect_vec(_a, _b) ({                                   \
-      __typeof__(_a) a = (_a), b = (_b);                        \
-      bool err = false;                                         \
-      size_t lanes = _Generic((a),                              \
-                              u8x16: 16,                        \
-                              i8x16: 16,                        \
-                              i16x8: 8,                         \
-                              i32x4: 4,                         \
-                              i64x2: 2,                         \
-                              f32x4: 4,                         \
-                              f64x2: 2);                        \
-      for (size_t i = 0; i < lanes; i++) {                      \
-        if (!eq(a[i], b[i])) {                                  \
-          err = true;                                           \
-          break;                                                \
-        }                                                       \
-      }                                                         \
-      if (err) {                                                \
-        failures++;                                             \
-        fprintf(stderr, "line %d: expected {", __LINE__);       \
-        for (size_t i = 0; i < lanes - 1; i++) {                \
-          err(b[i]);                                            \
-          fprintf(stderr, ", ");                                \
-        }                                                       \
-        err(b[lanes - 1]);                                      \
-        fprintf(stderr, "}, got {");                            \
-        for (size_t i = 0; i < lanes - 1; i++) {                \
-          err(a[i]);                                            \
-          fprintf(stderr, ", ");                                \
-        }                                                       \
-        err(a[lanes - 1]);                                      \
-        fprintf(stderr, "}\n");                                 \
-      }                                                         \
-    })
-
-int EMSCRIPTEN_KEEPALIVE main(int argc, char** argv) {
-  {
-    i8x16 vec = {3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3};
-    expect_vec(i8x16_load(&vec),
-              ((i8x16){3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3}));
-    i8x16_store(&vec, (i8x16){7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7});
-    expect_vec(i8x16_load(&vec),
-              ((i8x16){7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7}));
-  }
-  expect_vec(i32x4_const(), ((i32x4){1, 2, 3, 4}));
-  expect_vec(
-    i8x16_shuffle_interleave_bytes(
-      (i8x16){1, 0, 3, 0, 5, 0, 7, 0, 9, 0, 11, 0, 13, 0, 15, 0},
-      (i8x16){0, 2, 0, 4, 0, 6, 0, 8, 0, 10, 0, 12, 0, 14, 0, 16}
-    ),
-    ((i8x16){1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16})
-  );
-  expect_vec(i32x4_shuffle_reverse((i32x4){1, 2, 3, 4}), ((i32x4){4, 3, 2, 1}));
-
-  // i8x16 lane accesses
-  expect_vec(i8x16_splat(5), ((i8x16){5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5}));
-  expect_vec(i8x16_splat(257), ((i8x16){1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1}));
-  expect_eq(i8x16_extract_lane_s_first((i8x16){-1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0}), -1);
-  expect_eq(i8x16_extract_lane_s_last((i8x16){0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, -1}), -1);
-#ifdef __wasm_unimplemented_simd128__
-  expect_eq(i8x16_extract_lane_u_first((i8x16){-1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0}), 255);
-  expect_eq(i8x16_extract_lane_u_last((i8x16){0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, -1}), 255);
-#endif // __wasm_unimplemented_simd128__
-  expect_vec(
-    i8x16_replace_lane_first((i8x16){0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0}, 7),
-    ((i8x16){7, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0})
-  );
-  expect_vec(
-    i8x16_replace_lane_last((i8x16){0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0}, 7),
-    ((i8x16){0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 7})
-  );
-
-  // i16x8 lane accesses
-  expect_vec(i16x8_splat(5), ((i16x8){5, 5, 5, 5, 5, 5, 5, 5}));
-  expect_vec(i16x8_splat(65537), ((i16x8){1, 1, 1, 1, 1, 1, 1, 1}));
-  expect_eq(i16x8_extract_lane_s_first((i16x8){-1, 0, 0, 0, 0, 0, 0, 0}), -1);
-  expect_eq(i16x8_extract_lane_s_last((i16x8){0, 0, 0, 0, 0, 0, 0, -1}), -1);
-#ifdef __wasm_unimplemented_simd128__
-  expect_eq(i16x8_extract_lane_u_first((i16x8){-1, 0, 0, 0, 0, 0, 0, 0}), 65535);
-  expect_eq(i16x8_extract_lane_u_last((i16x8){0, 0, 0, 0, 0, 0, 0, -1}), 65535);
-#endif // __wasm_unimplemented_simd128__
-  expect_vec(i16x8_replace_lane_first((i16x8){0, 0, 0, 0, 0, 0, 0, 0}, 7), ((i16x8){7, 0, 0, 0, 0, 0, 0, 0}));
-  expect_vec(i16x8_replace_lane_last((i16x8){0, 0, 0, 0, 0, 0, 0, 0}, 7), ((i16x8){0, 0, 0, 0, 0, 0, 0, 7}));
-
-  // i32x4 lane accesses
-  expect_vec(i32x4_splat(-5), ((i32x4){-5, -5, -5, -5}));
-  expect_eq(i32x4_extract_lane_first((i32x4){-5, 0, 0, 0}), -5);
-  expect_eq(i32x4_extract_lane_last((i32x4){0, 0, 0, -5}), -5);
-  expect_vec(i32x4_replace_lane_first((i32x4){0, 0, 0, 0}, 53), ((i32x4){53, 0, 0, 0}));
-  expect_vec(i32x4_replace_lane_last((i32x4){0, 0, 0, 0}, 53), ((i32x4){0, 0, 0, 53}));
-
-  // i64x2 lane accesses
-  expect_vec(i64x2_splat(-5), ((i64x2){-5, -5}));
-#ifdef __wasm_unimplemented_simd128__
-  expect_eq(i64x2_extract_lane_first((i64x2){-5, 0}), -5);
-  expect_eq(i64x2_extract_lane_last((i64x2){0, -5}), -5);
-  expect_vec(i64x2_replace_lane_first((i64x2){0, 0}, 53), ((i64x2){53, 0}));
-  expect_vec(i64x2_replace_lane_last((i64x2){0, 0}, 53), ((i64x2){0, 53}));
-#endif // __wasm_unimplemented_simd128__
-
-  // f32x4 lane accesses
-  expect_vec(f32x4_splat(-5), ((f32x4){-5, -5, -5, -5}));
-  expect_eq(f32x4_extract_lane_first((f32x4){-5, 0, 0, 0}), -5);
-  expect_eq(f32x4_extract_lane_last((f32x4){0, 0, 0, -5}), -5);
-  expect_vec(f32x4_replace_lane_first((f32x4){0, 0, 0, 0}, 53), ((f32x4){53, 0, 0, 0}));
-  expect_vec(f32x4_replace_lane_last((f32x4){0, 0, 0, 0}, 53), ((f32x4){0, 0, 0, 53}));
-
-  // f64x2 lane accesses
-  expect_vec(f64x2_splat(-5), ((f64x2){-5, -5}));
-#ifdef __wasm_unimplemented_simd128__
-  expect_eq(f64x2_extract_lane_first((f64x2){-5, 0}), -5);
-  expect_eq(f64x2_extract_lane_last((f64x2){0, -5}), -5);
-  expect_vec(f64x2_replace_lane_first((f64x2){0, 0}, 53), ((f64x2){53, 0}));
-  expect_vec(f64x2_replace_lane_last((f64x2){0, 0}, 53), ((f64x2){0, 53}));
-#endif // __wasm_unimplemented_simd128__
-
-  // i8x16 comparisons
-  expect_vec(
-    i8x16_eq(
-      (i8x16){0, 127, 13, 128,  1,  13, 129,  42, 0, 127, 255, 42,   1,  13, 129,  42},
-      (i8x16){0, 255, 13, 42, 129, 127,   0, 128, 0, 255,  13, 42, 129, 127,   0, 128}
-    ),
-    ((i8x16){-1, 0, -1, 0, 0, 0, 0, 0, -1, 0, 0, -1, 0, 0, 0, 0})
-  );
-  expect_vec(
-    i8x16_ne(
-      (i8x16){0, 127, 13, 128,  1,  13, 129,  42, 0, 127, 255, 42,   1,  13, 129,  42},
-      (i8x16){0, 255, 13, 42, 129, 127,   0, 128, 0, 255,  13, 42, 129, 127,   0, 128}
-    ),
-    ((i8x16){0, -1, 0, -1, -1, -1, -1, -1, 0, -1, -1, 0, -1, -1, -1, -1})
-  );
-  expect_vec(
-    i8x16_lt_s(
-      (i8x16){0, 127, 13, 128,  1,  13, 129,  42, 0, 127, 255, 42,   1,  13, 129,  42},
-      (i8x16){0, 255, 13, 42, 129, 127,   0, 128, 0, 255,  13, 42, 129, 127,   0, 128}
-    ),
-    ((i8x16){0, 0, 0, -1, 0, -1, -1, 0, 0, 0, -1, 0, 0, -1, -1, 0})
-  );
-  expect_vec(
-    i8x16_lt_u(
-      (i8x16){0, 127, 13, 128,  1,  13, 129,  42, 0, 127, 255, 42,   1,  13, 129,  42},
-      (i8x16){0, 255, 13, 42, 129, 127,   0, 128, 0, 255,  13, 42, 129, 127,   0, 128}
-    ),
-    ((i8x16){0, -1, 0, 0, -1, -1, 0, -1, 0, -1, 0, 0, -1, -1, 0, -1})
-  );
-  expect_vec(
-    i8x16_gt_s(
-      (i8x16){0, 127, 13, 128,  1,  13, 129,  42, 0, 127, 255, 42,   1,  13, 129,  42},
-      (i8x16){0, 255, 13, 42, 129, 127,   0, 128, 0, 255,  13, 42, 129, 127,   0, 128}
-    ),
-    ((i8x16){0, -1, 0, 0, -1, 0, 0, -1, 0, -1, 0, 0, -1, 0, 0, -1})
-  );
-  expect_vec(
-    i8x16_gt_u(
-      (i8x16){0, 127, 13, 128,  1,  13, 129,  42, 0, 127, 255, 42,   1,  13, 129,  42},
-      (i8x16){0, 255, 13, 42, 129, 127,   0, 128, 0, 255,  13, 42, 129, 127,   0, 128}
-    ),
-    ((i8x16){0, 0, 0, -1, 0, 0, -1, 0, 0, 0, -1, 0, 0, 0, -1, 0})
-  );
-  expect_vec(
-    i8x16_le_s(
-      (i8x16){0, 127, 13, 128,  1,  13, 129,  42, 0, 127, 255, 42,   1,  13, 129,  42},
-      (i8x16){0, 255, 13, 42, 129, 127,   0, 128, 0, 255,  13, 42, 129, 127,   0, 128}
-    ),
-    ((i8x16){-1, 0, -1, -1, 0, -1, -1, 0, -1, 0, -1, -1, 0, -1, -1, 0})
-  );
-  // bugs.chromium.org/p/v8/issues/detail?id=8635
-  // expect_vec(
-  //   i8x16_le_u(
-  //     (i8x16){0, 127, 13, 128,  1,  13, 129,  42, 0, 127, 255, 42,   1,  13, 129,  42},
-  //     (i8x16){0, 255, 13, 42, 129, 127,   0, 128, 0, 255,  13, 42, 129, 127,   0, 128}
-  //   ),
-  //   ((i8x16){-1, -1, -1, 0, -1, -1, 0, -1, -1, -1, 0, -1, -1, -1, 0, -1})
-  // );
-  expect_vec(
-    i8x16_ge_s(
-      (i8x16){0, 127, 13, 128,  1,  13, 129,  42, 0, 127, 255, 42,   1,  13, 129,  42},
-      (i8x16){0, 255, 13, 42, 129, 127,   0, 128, 0, 255,  13, 42, 129, 127,   0, 128}
-    ),
-    ((i8x16){-1, -1, -1, 0, -1, 0, 0, -1, -1, -1, 0, -1, -1, 0, 0, -1})
-  );
-  // expect_vec(
-  //   i8x16_ge_u(
-  //     (i8x16){0, 127, 13, 128,  1,  13, 129,  42, 0, 127, 255, 42,   1,  13, 129,  42},
-  //     (i8x16){0, 255, 13, 42, 129, 127,   0, 128, 0, 255,  13, 42, 129, 127,   0, 128}
-  //   ),
-  //   ((i8x16){-1, 0, -1, -1, 0, 0, -1, 0, -1, 0, -1, -1, 0, 0, -1, 0})
-  // );
-
-  // i16x8 comparisons
-  expect_vec(
-    i16x8_eq(
-      (i16x8){0, 32767, 13, -32768,      1, -32767,     42, -25536},
-      (i16x8){0,    13,  1,  32767, -32767,     42, -25536,  32767}
-    ),
-    ((i16x8){-1, 0, 0, 0, 0, 0, 0, 0})
-  );
-  expect_vec(
-    i16x8_ne(
-      (i16x8){0, 32767, 13, -32768,      1, -32767,     42, -25536},
-      (i16x8){0,    13,  1,  32767, -32767,     42, -25536,  32767}
-    ),
-    ((i16x8){0, -1, -1, -1, -1, -1, -1, -1})
-  );
-  expect_vec(
-    i16x8_lt_s(
-      (i16x8){0, 32767, 13, -32768,      1, -32767,     42, -25536},
-      (i16x8){0,    13,  1,  32767, -32767,     42, -25536,  32767}
-    ),
-    ((i16x8){0, 0, 0, -1, 0, -1, 0, -1})
-  );
-  expect_vec(
-    i16x8_lt_u(
-      (i16x8){0, 32767, 13, -32768,      1, -32767,     42, -25536},
-      (i16x8){0,    13,  1,  32767, -32767,     42, -25536,  32767}
-    ),
-    ((i16x8){0, 0, 0, 0, -1, 0, -1, 0})
-  );
-  expect_vec(
-    i16x8_gt_s(
-      (i16x8){0, 32767, 13, -32768,      1, -32767,     42, -25536},
-      (i16x8){0,    13,  1,  32767, -32767,     42, -25536,  32767}
-    ),
-    ((i16x8){0, -1, -1, 0, -1, 0, -1, 0})
-  );
-  expect_vec(
-    i16x8_gt_u(
-      (i16x8){0, 32767, 13, -32768,      1, -32767,     42, -25536},
-      (i16x8){0,    13,  1,  32767, -32767,     42, -25536,  32767}
-    ),
-    ((i16x8){0, -1, -1, -1, 0, -1, 0, -1})
-  );
-  expect_vec(
-    i16x8_le_s(
-      (i16x8){0, 32767, 13, -32768,      1, -32767,     42, -25536},
-      (i16x8){0,    13,  1,  32767, -32767,     42, -25536,  32767}
-    ),
-    ((i16x8){-1, 0, 0, -1, 0, -1, 0, -1})
-  );
-  expect_vec(
-    i16x8_le_u(
-      (i16x8){0, 32767, 13, -32768,      1, -32767,     42, -25536},
-      (i16x8){0,    13,  1,  32767, -32767,     42, -25536,  32767}
-    ),
-    ((i16x8){-1, 0, 0, 0, -1, 0, -1, 0})
-  );
-  expect_vec(
-    i16x8_ge_s(
-      (i16x8){0, 32767, 13, -32768,      1, -32767,     42, -25536},
-      (i16x8){0,    13,  1,  32767, -32767,     42, -25536,  32767}
-    ),
-    ((i16x8){-1, -1, -1, 0, -1, 0, -1, 0})
-  );
-  expect_vec(
-    i16x8_ge_u(
-      (i16x8){0, 32767, 13, -32768,      1, -32767,     42, -25536},
-      (i16x8){0,    13,  1,  32767, -32767,     42, -25536,  32767}
-    ),
-    ((i16x8){-1, -1, -1, -1, 0, -1, 0, -1})
-  );
-
-  // i342x4 comparisons
-  expect_vec(
-    i32x4_eq((i32x4){0, -1, 53, -7}, (i32x4){0, 53, -7, -1}), ((i32x4){-1, 0, 0, 0})
-  );
-  expect_vec(
-    i32x4_ne((i32x4){0, -1, 53, -7}, (i32x4){0, 53, -7, -1}), ((i32x4){0, -1, -1, -1})
-  );
-  expect_vec(
-    i32x4_lt_s((i32x4){0, -1, 53, -7}, (i32x4){0, 53, -7, -1}), ((i32x4){0, -1, 0, -1})
-  );
-  expect_vec(
-    i32x4_lt_u((i32x4){0, -1, 53, -7}, (i32x4){0, 53, -7, -1}), ((i32x4){0, 0, -1, -1})
-  );
-  expect_vec(
-    i32x4_gt_s((i32x4){0, -1, 53, -7}, (i32x4){0, 53, -7, -1}), ((i32x4){0, 0, -1, 0})
-  );
-  expect_vec(
-    i32x4_gt_u((i32x4){0, -1, 53, -7}, (i32x4){0, 53, -7, -1}), ((i32x4){0, -1, 0, 0})
-  );
-  expect_vec(
-    i32x4_le_s((i32x4){0, -1, 53, -7}, (i32x4){0, 53, -7, -1}), ((i32x4){-1, -1, 0, -1})
-  );
-  expect_vec(
-    i32x4_le_u((i32x4){0, -1, 53, -7}, (i32x4){0, 53, -7, -1}), ((i32x4){-1, 0, -1, -1})
-  );
-  expect_vec(
-    i32x4_ge_s((i32x4){0, -1, 53, -7}, (i32x4){0, 53, -7, -1}), ((i32x4){-1, 0, -1, 0})
-  );
-  expect_vec(
-    i32x4_ge_u((i32x4){0, -1, 53, -7}, (i32x4){0, 53, -7, -1}), ((i32x4){-1, -1, 0, 0})
-  );
-
-  // f32x4 comparisons
-  expect_vec(
-    f32x4_eq((f32x4){0, -1, 1, 0}, (f32x4){0, 0, -1, 1}), ((i32x4){-1, 0, 0, 0})
-  );
-  expect_vec(
-    f32x4_ne((f32x4){0, -1, 1, 0}, (f32x4){0, 0, -1, 1}), ((i32x4){0, -1, -1, -1})
-  );
-  expect_vec(
-    f32x4_lt((f32x4){0, -1, 1, 0}, (f32x4){0, 0, -1, 1}), ((i32x4){0, -1, 0, -1})
-  );
-  expect_vec(
-    f32x4_gt((f32x4){0, -1, 1, 0}, (f32x4){0, 0, -1, 1}), ((i32x4){0, 0, -1, 0})
-  );
-  expect_vec(
-    f32x4_le((f32x4){0, -1, 1, 0}, (f32x4){0, 0, -1, 1}), ((i32x4){-1, -1, 0, -1})
-  );
-  expect_vec(
-    f32x4_ge((f32x4){0, -1, 1, 0}, (f32x4){0, 0, -1, 1}), ((i32x4){-1, 0, -1, 0})
-  );
-  expect_vec(
-    f32x4_eq((f32x4){NAN, 0, NAN, INFINITY}, (f32x4){0, NAN, NAN, INFINITY}),
-    ((i32x4){0, 0, 0, -1})
-  );
-  expect_vec(
-    f32x4_ne((f32x4){NAN, 0, NAN, INFINITY}, (f32x4){0, NAN, NAN, INFINITY}),
-    ((i32x4){-1, -1, -1, 0})
-  );
-  expect_vec(
-    f32x4_lt((f32x4){NAN, 0, NAN, INFINITY}, (f32x4){0, NAN, NAN, INFINITY}),
-    ((i32x4){0, 0, 0, 0})
-  );
-  expect_vec(
-    f32x4_gt((f32x4){NAN, 0, NAN, INFINITY}, (f32x4){0, NAN, NAN, INFINITY}),
-    ((i32x4){0, 0, 0, 0})
-  );
-  expect_vec(
-    f32x4_le((f32x4){NAN, 0, NAN, INFINITY}, (f32x4){0, NAN, NAN, INFINITY}),
-    ((i32x4){0, 0, 0, -1})
-  );
-  expect_vec(
-    f32x4_ge((f32x4){NAN, 0, NAN, INFINITY}, (f32x4){0, NAN, NAN, INFINITY}),
-    ((i32x4){0, 0, 0, -1})
-  );
-  expect_vec(
-    f32x4_eq((f32x4){-INFINITY, 0, NAN, -INFINITY}, (f32x4){0, INFINITY, INFINITY, NAN}),
-    ((i32x4){0, 0, 0, 0})
-  );
-  expect_vec(
-    f32x4_ne((f32x4){-INFINITY, 0, NAN, -INFINITY}, (f32x4){0, INFINITY, INFINITY, NAN}),
-    ((i32x4){-1, -1, -1, -1})
-  );
-  expect_vec(
-    f32x4_lt((f32x4){-INFINITY, 0, NAN, -INFINITY}, (f32x4){0, INFINITY, INFINITY, NAN}),
-    ((i32x4){-1, -1, 0, 0})
-  );
-  expect_vec(
-    f32x4_gt((f32x4){-INFINITY, 0, NAN, -INFINITY}, (f32x4){0, INFINITY, INFINITY, NAN}),
-    ((i32x4){0, 0, 0, 0})
-  );
-  expect_vec(
-    f32x4_le((f32x4){-INFINITY, 0, NAN, -INFINITY}, (f32x4){0, INFINITY, INFINITY, NAN}),
-    ((i32x4){-1, -1, 0, 0})
-  );
-  expect_vec(
-    f32x4_ge((f32x4){-INFINITY, 0, NAN, -INFINITY}, (f32x4){0, INFINITY, INFINITY, NAN}),
-    ((i32x4){0, 0, 0, 0})
-  );
-
-  // f64x2 comparisons
-  expect_vec(f64x2_eq((f64x2){0, 1}, (f64x2){0, 0}), ((i64x2){-1, 0}));
-  expect_vec(f64x2_ne((f64x2){0, 1}, (f64x2){0, 0}), ((i64x2){0, -1}));
-  expect_vec(f64x2_lt((f64x2){0, 1}, (f64x2){0, 0}), ((i64x2){0, 0}));
-  expect_vec(f64x2_gt((f64x2){0, 1}, (f64x2){0, 0}), ((i64x2){0, -1}));
-  expect_vec(f64x2_le((f64x2){0, 1}, (f64x2){0, 0}), ((i64x2){-1, 0}));
-  expect_vec(f64x2_ge((f64x2){0, 1}, (f64x2){0, 0}), ((i64x2){-1, -1}));
-  expect_vec(f64x2_eq((f64x2){NAN, 0}, (f64x2){INFINITY, INFINITY}), ((i64x2){0, 0}));
-  expect_vec(f64x2_ne((f64x2){NAN, 0}, (f64x2){INFINITY, INFINITY}), ((i64x2){-1, -1}));
-  expect_vec(f64x2_lt((f64x2){NAN, 0}, (f64x2){INFINITY, INFINITY}), ((i64x2){0, -1}));
-  expect_vec(f64x2_gt((f64x2){NAN, 0}, (f64x2){INFINITY, INFINITY}), ((i64x2){0, 0}));
-  expect_vec(f64x2_le((f64x2){NAN, 0}, (f64x2){INFINITY, INFINITY}), ((i64x2){0, -1}));
-  expect_vec(f64x2_ge((f64x2){NAN, 0}, (f64x2){INFINITY, INFINITY}), ((i64x2){0, 0}));
-
-  // bitwise operations
-  expect_vec(i8x16_not((i8x16)(i32x4){0, -1, 0, -1}), (i8x16)((i32x4){-1, 0, -1, 0}));
-  expect_vec(
-    i8x16_and((i8x16)(i32x4){0, 0, -1, -1}, (i8x16)(i32x4){0, -1, 0, -1}),
-    (i8x16)((i32x4){0, 0, 0, -1})
-  );
-  expect_vec(
-    i8x16_or((i8x16)(i32x4){0, 0, -1, -1}, (i8x16)(i32x4){0, -1, 0, -1}),
-    (i8x16)((i32x4){0, -1, -1, -1})
-  );
-  expect_vec(
-    i8x16_xor((i8x16)(i32x4){0, 0, -1, -1}, (i8x16)(i32x4){0, -1, 0, -1}),
-    (i8x16)((i32x4){0, -1, -1, 0})
-  );
-  expect_vec(
-    i8x16_bitselect(
-      (i8x16)(i32x4){0xAAAAAAAA, 0xAAAAAAAA, 0xAAAAAAAA, 0xAAAAAAAA},
-      (i8x16)(i32x4){0xBBBBBBBB, 0xBBBBBBBB, 0xBBBBBBBB, 0xBBBBBBBB},
-      (i8x16)(i32x4){0xF0F0F0F0, 0xFFFFFFFF, 0x00000000, 0xFF00FF00}
-    ),
-    (i8x16)((i32x4){0xABABABAB, 0xAAAAAAAA, 0xBBBBBBBB, 0xAABBAABB})
-  );
-
-  // i8x16 arithmetic
-  expect_vec(
-    i8x16_neg((i8x16){0, 1, 42, -3, -56, 127, -128, -126, 0, -1, -42, 3, 56, -127, -128, 126}),
-    ((i8x16){0, -1, -42, 3, 56, -127, -128, 126, 0, 1, 42, -3, -56, 127, -128, -126})
-  );
-  expect_eq(i8x16_any_true((i8x16){0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0}), 0);
-  expect_eq(i8x16_any_true((i8x16){0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0}), 1);
-  expect_eq(i8x16_any_true((i8x16){1, 1, 1, 1, 1, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1}), 1);
-  expect_eq(i8x16_any_true((i8x16){1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1}), 1);
-  expect_eq(i8x16_all_true((i8x16){0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0}), 0);
-  expect_eq(i8x16_all_true((i8x16){0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0}), 0);
-  expect_eq(i8x16_all_true((i8x16){1, 1, 1, 1, 1, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1}), 0);
-  expect_eq(i8x16_all_true((i8x16){1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1}), 1);
-  expect_vec(
-    i8x16_shl((i8x16){0, 1, 2, 4, 8, 16, 32, 64, -128, 3, 6, 12, 24, 48, 96, -64}, 1),
-    ((i8x16){0, 2, 4, 8, 16, 32, 64, -128, 0, 6, 12, 24, 48, 96, -64, -128})
-  );
-  expect_vec(
-    i8x16_shl((i8x16){0, 1, 2, 4, 8, 16, 32, 64, -128, 3, 6, 12, 24, 48, 96, -64}, 8),
-    ((i8x16){0, 1, 2, 4, 8, 16, 32, 64, -128, 3, 6, 12, 24, 48, 96, -64})
-  );
-  expect_vec(
-    i8x16_shr_u((i8x16){0, 1, 2, 4, 8, 16, 32, 64, -128, 3, 6, 12, 24, 48, 96, -64}, 1),
-    ((i8x16){0, 0, 1, 2, 4, 8, 16, 32, 64, 1, 3, 6, 12, 24, 48, 96})
-  );
-  expect_vec(
-    i8x16_shr_u((i8x16){0, 1, 2, 4, 8, 16, 32, 64, -128, 3, 6, 12, 24, 48, 96, -64}, 8),
-    ((i8x16){0, 1, 2, 4, 8, 16, 32, 64, -128, 3, 6, 12, 24, 48, 96, -64})
-  );
-  expect_vec(
-    i8x16_shr_s((i8x16){0, 1, 2, 4, 8, 16, 32, 64, -128, 3, 6, 12, 24, 48, 96, -64}, 1),
-    ((i8x16){0, 0, 1, 2, 4, 8, 16, 32, -64, 1, 3, 6, 12, 24, 48, -32})
-  );
-  expect_vec(
-    i8x16_shr_s((i8x16){0, 1, 2, 4, 8, 16, 32, 64, -128, 3, 6, 12, 24, 48, 96, -64}, 8),
-    ((i8x16){0, 1, 2, 4, 8, 16, 32, 64, -128, 3, 6, 12, 24, 48, 96, -64})
-  );
-  expect_vec(
-    i8x16_add(
-      (i8x16){0, 42, 255, 128, 127, 129, 6, 29, 103, 196, 231, 142, 17, 250, 1, 73},
-      (i8x16){3, 231, 1, 128, 129, 6, 103, 17, 42, 29, 73, 42, 0, 255, 127, 142}
-    ),
-    ((i8x16){3, 17, 0, 0, 0, 135, 109, 46, 145, 225, 48, 184, 17, 249, 128, 215})
-  );
-  expect_vec(
-    i8x16_add_saturate_s(
-      (i8x16){0, 42, 255, 128, 127, 129, 6, 29, 103, 196, 231, 142, 17, 250, 1, 73},
-      (i8x16){3, 231, 1, 128, 129, 6, 103, 17, 42, 29, 73, 42, 0, 255, 127, 142}
-    ),
-    ((i8x16){3, 17, 0, 128, 0, 135, 109, 46, 127, 225, 48, 184, 17, 249, 127, 215})
-  );
-  expect_vec(
-    i8x16_add_saturate_u(
-      (i8x16){0, 42, 255, 128, 127, 129, 6, 29, 103, 196, 231, 142, 17, 250, 1, 73},
-      (i8x16){3, 231, 1, 128, 129, 6, 103, 17, 42, 29, 73, 42, 0, 255, 127, 142}
-    ),
-    ((i8x16){3, 255, 255, 255, 255, 135, 109, 46, 145, 225, 255, 184, 17, 255, 128, 215})
-  );
-  expect_vec(
-    i8x16_sub(
-      (i8x16){0, 42, 255, 128, 127, 129, 6, 29, 103, 196, 231, 142, 17, 250, 1, 73},
-      (i8x16){3, 231, 1, 128, 129, 6, 103, 17, 42, 29, 73, 42, 0, 255, 127, 142}
-    ),
-    ((i8x16){253, 67, 254, 0, 254, 123, 159, 12, 61, 167, 158, 100, 17, 251, 130, 187})
-  );
-  expect_vec(
-    i8x16_sub_saturate_s(
-      (i8x16){0, 42, 255, 128, 127, 129, 6, 29, 103, 196, 231, 142, 17, 250, 1, 73},
-      (i8x16){3, 231, 1, 128, 129, 6, 103, 17, 42, 29, 73, 42, 0, 255, 127, 142}
-    ),
-    ((i8x16){253, 67, 254, 0, 127, 128, 159, 12, 61, 167, 158, 128, 17, 251, 130, 127})
-  );
-  expect_vec(
-    i8x16_sub_saturate_u(
-      (i8x16){0, 42, 255, 128, 127, 129, 6, 29, 103, 196, 231, 142, 17, 250, 1, 73},
-      (i8x16){3, 231, 1, 128, 129, 6, 103, 17, 42, 29, 73, 42, 0, 255, 127, 142}
-    ),
-    ((i8x16){0, 0, 254, 0, 0, 123, 0, 12, 61, 167, 158, 100, 17, 0, 0, 0})
-  );
-  expect_vec(
-    i8x16_mul(
-      (i8x16){0, 42, 255, 128, 127, 129, 6, 29, 103, 196, 231, 142, 17, 250, 1, 73},
-      (i8x16){3, 231, 1, 128, 129, 6, 103, 17, 42, 29, 73, 42, 0, 255, 127, 142}
-    ),
-    ((i8x16){0, 230, 255, 0, 255, 6, 106, 237, 230, 52, 223, 76, 0, 6, 127, 126})
-  );
-
-  // i16x8 arithmetic
-  expect_vec(
-    i16x8_neg((i16x8){0, 1, 42, -3, -56, 32767, -32768, 32766}),
-    ((i16x8){0, -1, -42, 3, 56, -32767, -32768, -32766})
-  );
-  expect_eq(i16x8_any_true((i16x8){0, 0, 0, 0, 0, 0, 0, 0}), 0);
-  expect_eq(i16x8_any_true((i16x8){0, 0, 1, 0, 0, 0, 0, 0}), 1);
-  expect_eq(i16x8_any_true((i16x8){1, 1, 1, 1, 1, 0, 1, 1}), 1);
-  expect_eq(i16x8_any_true((i16x8){1, 1, 1, 1, 1, 1, 1, 1}), 1);
-  expect_eq(i16x8_all_true((i16x8){0, 0, 0, 0, 0, 0, 0, 0}), 0);
-  expect_eq(i16x8_all_true((i16x8){0, 0, 1, 0, 0, 0, 0, 0}), 0);
-  expect_eq(i16x8_all_true((i16x8){1, 1, 1, 1, 1, 0, 1, 1}), 0);
-  expect_eq(i16x8_all_true((i16x8){1, 1, 1, 1, 1, 1, 1, 1}), 1);
-  expect_vec(
-    i16x8_shl((i16x8){0, 8, 16, 128, 256, 2048, 4096, -32768}, 1),
-    ((i16x8){0, 16, 32, 256, 512, 4096, 8192, 0})
-  );
-  expect_vec(
-    i16x8_shl((i16x8){0, 8, 16, 128, 256, 2048, 4096, -32768}, 16),
-    ((i16x8){0, 8, 16, 128, 256, 2048, 4096, -32768})
-  );
-  expect_vec(
-    i16x8_shr_u((i16x8){0, 8, 16, 128, 256, 2048, 4096, -32768}, 1),
-    ((i16x8){0, 4, 8, 64, 128, 1024, 2048, 16384})
-  );
-  expect_vec(
-    i16x8_shr_u((i16x8){0, 8, 16, 128, 256, 2048, 4096, -32768}, 16),
-    ((i16x8){0, 8, 16, 128, 256, 2048, 4096, -32768})
-  );
-  expect_vec(
-    i16x8_shr_s((i16x8){0, 8, 16, 128, 256, 2048, 4096, -32768}, 1),
-    ((i16x8){0, 4, 8, 64, 128, 1024, 2048, -16384})
-  );
-  expect_vec(
-    i16x8_shr_s((i16x8){0, 8, 16, 128, 256, 2048, 4096, -32768}, 16),
-    ((i16x8){0, 8, 16, 128, 256, 2048, 4096, -32768})
-  );
-  expect_vec(
-    i16x8_add(
-      (i16x8){0, -256, -32768, 32512, -32512, -6400, -1536, 32766},
-      (i16x8){768, 1, -32768, -32512, 1536, 18688, -256, 2}
-    ),
-    ((i16x8){768, -255, 0, 0, -30976, 12288, -1792, -32768})
-  );
-  expect_vec(
-    i16x8_add_saturate_s(
-      (i16x8){0, -256, -32768, 32512, -32512, -6400, -1536, 32766},
-      (i16x8){768, 1, -32768, -32512, 1536, 18688, -256, 2}
-    ),
-    ((i16x8){768, -255, -32768, 0, -30976, 12288, -1792, 32767})
-  );
-  expect_vec(
-    i16x8_add_saturate_u(
-      (i16x8){0, -256, -32768, 32512, -32512, -6400, -1536, 32766},
-      (i16x8){768, 1, -32768, -32512, 1536, 18688, -256, 2}
-    ),
-    ((i16x8){768, -255, -1, -1, -30976, -1, -1, -32768})
-  );
-  expect_vec(
-    i16x8_sub(
-      (i16x8){0, -256, -32768, 32512, -32512, -6400, -1536, 32766},
-      (i16x8){768, 1, -32768, -32512, 1536, 18688, -256, 2}
-    ),
-    ((i16x8){-768, -257, 0, -512, 31488, -25088, -1280, 32764})
-  );
-  expect_vec(
-    i16x8_sub_saturate_s(
-      (i16x8){0, -256, -32768, 32512, -32512, -6400, -1536, 32766},
-      (i16x8){768, 1, -32768, -32512, 1536, 18688, -256, 2}
-    ),
-    ((i16x8){-768, -257, 0, 32767, -32768, -25088, -1280, 32764})
-  );
-  expect_vec(
-    i16x8_sub_saturate_u(
-      (i16x8){0, -256, -32768, 32512, -32512, -6400, -1536, 32766},
-      (i16x8){768, 1, -32768, -32512, 1536, 18688, -256, 2}
-    ),
-    ((i16x8){0, -257, 0, 0, 31488, -25088, 0, 32764})
-  );
-  expect_vec(
-    i16x8_mul(
-      (i16x8){0, -256, -32768, 32512, -32512, -6400, -1536, 32766},
-      (i16x8){768, 1, -32768, -32512, 1536, 18688, -256, 2}
-    ),
-    ((i16x8){0, -256, 0, 0, 0, 0, 0, -4})
-  );
-
-  // i32x4 arithmetic
-  expect_vec(i32x4_neg((i32x4){0, 1, 0x80000000, 0x7fffffff}), ((i32x4){0, -1, 0x80000000, 0x80000001}));
-  expect_eq(i32x4_any_true((i32x4){0, 0, 0, 0}), 0);
-  expect_eq(i32x4_any_true((i32x4){0, 0, 1, 0}), 1);
-  expect_eq(i32x4_any_true((i32x4){1, 0, 1, 1}), 1);
-  expect_eq(i32x4_any_true((i32x4){1, 1, 1, 1}), 1);
-  expect_eq(i32x4_all_true((i32x4){0, 0, 0, 0}), 0);
-  expect_eq(i32x4_all_true((i32x4){0, 0, 1, 0}), 0);
-  expect_eq(i32x4_all_true((i32x4){1, 0, 1, 1}), 0);
-  expect_eq(i32x4_all_true((i32x4){1, 1, 1, 1}), 1);
-  expect_vec(i32x4_shl((i32x4){1, 0x40000000, 0x80000000, -1}, 1), ((i32x4){2, 0x80000000, 0, -2}));
-  expect_vec(i32x4_shl((i32x4){1, 0x40000000, 0x80000000, -1}, 32), ((i32x4){1, 0x40000000, 0x80000000, -1}));
-  expect_vec(i32x4_shr_s((i32x4){1, 0x40000000, 0x80000000, -1}, 1), ((i32x4){0, 0x20000000, 0xc0000000, -1}));
-  expect_vec(i32x4_shr_s((i32x4){1, 0x40000000, 0x80000000, -1}, 32), ((i32x4){1, 0x40000000, 0x80000000, -1}));
-  expect_vec(i32x4_shr_u((i32x4){1, 0x40000000, 0x80000000, -1}, 1), ((i32x4){0, 0x20000000, 0x40000000, 0x7fffffff}));
-  expect_vec(i32x4_shr_u((i32x4){1, 0x40000000, 0x80000000, -1}, 32), ((i32x4){1, 0x40000000, 0x80000000, -1}));
-  expect_vec(i32x4_add((i32x4){0, 0x80000001, 42, 5}, (i32x4){0, 0x80000001, 5, 42}), ((i32x4){0, 2, 47, 47}));
-  expect_vec(i32x4_sub((i32x4){0, 2, 47, 47}, (i32x4){0, 0x80000001, 42, 5}), ((i32x4){0, 0x80000001, 5, 42}));
-  expect_vec(i32x4_mul((i32x4){0, 0x80000001, 42, 5}, (i32x4){0, 0x80000001, 42, 5}), ((i32x4){0, 1, 1764, 25}));
-
-
-  // i64x2 arithmetic
-  expect_vec(i64x2_neg((i64x2){0x8000000000000000, 42}), ((i64x2){0x8000000000000000, -42}));
-#ifdef __wasm_unimplemented_simd128__
-  expect_eq(i64x2_any_true((i64x2){0, 0}), 0);
-  expect_eq(i64x2_any_true((i64x2){1, 0}), 1);
-  expect_eq(i64x2_any_true((i64x2){1, 1}), 1);
-  expect_eq(i64x2_all_true((i64x2){0, 0}), 0);
-  expect_eq(i64x2_all_true((i64x2){1, 0}), 0);
-  expect_eq(i64x2_all_true((i64x2){1, 1}), 1);
-#endif // __wasm_unimplemented_simd128__
-  expect_vec(i64x2_shl((i64x2){1, 0x8000000000000000}, 1), ((i64x2){2, 0}));
-  expect_vec(i64x2_shl((i64x2){1, 0x8000000000000000}, 64), ((i64x2){1, 0x8000000000000000}));
-  expect_vec(i64x2_shr_s((i64x2){1, 0x8000000000000000}, 1), ((i64x2){0, 0xc000000000000000}));
-  expect_vec(i64x2_shr_s((i64x2){1, 0x8000000000000000}, 64), ((i64x2){1, 0x8000000000000000}));
-  expect_vec(i64x2_shr_u((i64x2){1, 0x8000000000000000}, 1), ((i64x2){0, 0x4000000000000000}));
-  expect_vec(i64x2_shr_u((i64x2){1, 0x8000000000000000}, 64), ((i64x2){1, 0x8000000000000000}));
-  expect_vec(i64x2_add((i64x2){0x8000000000000001, 42}, (i64x2){0x8000000000000001, 0}), ((i64x2){2, 42}));
-  expect_vec(i64x2_sub((i64x2){2, 42}, (i64x2){0x8000000000000001, 0}), ((i64x2){0x8000000000000001, 42}));
-
-  // f32x4 arithmetic
-  expect_vec(f32x4_abs((f32x4){-0., NAN, -INFINITY, 5}), ((f32x4){0, NAN, INFINITY, 5}));
-  expect_vec(f32x4_neg((f32x4){-0., NAN, -INFINITY, 5}), ((f32x4){0, -NAN, INFINITY, -5}));
-#ifdef __wasm_unimplemented_simd128__
-  expect_vec(f32x4_sqrt((f32x4){-0., NAN, INFINITY, 4}), ((f32x4){-0., NAN, INFINITY, 2}));
-#endif // __wasm_unimplemented_simd128__
-  expect_vec(f32x4_add((f32x4){NAN, -NAN, INFINITY, 42}, (f32x4){42, INFINITY, INFINITY, 1}), ((f32x4){NAN, -NAN, INFINITY, 43}));
-  expect_vec(f32x4_sub((f32x4){NAN, -NAN, INFINITY, 42}, (f32x4){42, INFINITY, -INFINITY, 1}), ((f32x4){NAN, -NAN, INFINITY, 41}));
-  expect_vec(f32x4_mul((f32x4){NAN, -NAN, INFINITY, 42}, (f32x4){42, INFINITY, INFINITY, 2}), ((f32x4){NAN, -NAN, INFINITY, 84}));
-  expect_vec(f32x4_div((f32x4){NAN, -NAN, INFINITY, 42}, (f32x4){42, INFINITY, 2, 2}), ((f32x4){NAN, -NAN, INFINITY, 21}));
-  // expect_vec(f32x4_min((f32x4){-0., 0, NAN, 5}, (f32x4){0, -0., 5, NAN}), ((f32x4){-0., -0., NAN, NAN}));
-  // expect_vec(f32x4_max((f32x4){-0., 0, NAN, 5}, (f32x4){0, -0., 5, NAN}), ((f32x4){0, 0, NAN, NAN}));
-
-  // f64x2 arithmetic
-#ifdef __wasm_unimplemented_simd128__
-  expect_vec(f64x2_abs((f64x2){-0., NAN}), ((f64x2){0, NAN}));
-  expect_vec(f64x2_abs((f64x2){-INFINITY, 5}), ((f64x2){INFINITY, 5}));
-#endif // __wasm_unimplemented_simd128__
-  expect_vec(f64x2_neg((f64x2){-0., NAN}), ((f64x2){0, -NAN}));
-  expect_vec(f64x2_neg((f64x2){-INFINITY, 5}), ((f64x2){INFINITY, -5}));
-#ifdef __wasm_unimplemented_simd128__
-  expect_vec(f64x2_sqrt((f64x2){-0., NAN}), ((f64x2){-0., NAN}));
-  expect_vec(f64x2_sqrt((f64x2){INFINITY, 4}), ((f64x2){INFINITY, 2}));
-#endif // __wasm_unimplemented_simd128__
-  expect_vec(f64x2_add((f64x2){NAN, -NAN}, (f64x2){42, INFINITY}), ((f64x2){NAN, -NAN}));
-  expect_vec(f64x2_add((f64x2){INFINITY, 42}, (f64x2){INFINITY, 1}), ((f64x2){INFINITY, 43}));
-  expect_vec(f64x2_sub((f64x2){NAN, -NAN}, (f64x2){42, INFINITY}), ((f64x2){NAN, -NAN}));
-  expect_vec(f64x2_sub((f64x2){INFINITY, 42}, (f64x2){-INFINITY, 1}), ((f64x2){INFINITY, 41}));
-  expect_vec(f64x2_mul((f64x2){NAN, -NAN}, (f64x2){42, INFINITY}), ((f64x2){NAN, -NAN}));
-  expect_vec(f64x2_mul((f64x2){INFINITY, 42}, (f64x2){INFINITY, 2}), ((f64x2){INFINITY, 84}));
-  expect_vec(f64x2_div((f64x2){NAN, -NAN}, (f64x2){42, INFINITY}), ((f64x2){NAN, -NAN}));
-  expect_vec(f64x2_div((f64x2){INFINITY, 42}, (f64x2){2, 2}), ((f64x2){INFINITY, 21}));
-#ifdef __wasm_unimplemented_simd128__
-  expect_vec(f64x2_min((f64x2){-0., 0}, (f64x2){0, -0}), ((f64x2){-0., -0}));
-  expect_vec(f64x2_min((f64x2){NAN, 5}, (f64x2){5, NAN}), ((f64x2){NAN, NAN}));
-  expect_vec(f64x2_max((f64x2){-0., 0}, (f64x2){0, -0}), ((f64x2){0, 0}));
-  expect_vec(f64x2_max((f64x2){NAN, 5}, (f64x2){5, NAN}), ((f64x2){NAN, NAN}));
-#endif // __wasm_unimplemented_simd128__
-
-  // conversions
-  expect_vec(i32x4_trunc_s_f32x4_sat((f32x4){42, NAN, INFINITY, -INFINITY}), ((i32x4){42, 0, 2147483647, -2147483648ll}));
-  expect_vec(i32x4_trunc_u_f32x4_sat((f32x4){42, NAN, INFINITY, -INFINITY}), ((i32x4){42, 0, 4294967295ull, 0}));
-#ifdef __wasm_unimplemented_simd128__
-  expect_vec(i64x2_trunc_s_f64x2_sat((f64x2){42, NAN}), ((i64x2){42, 0}));
-  expect_vec(i64x2_trunc_s_f64x2_sat((f64x2){INFINITY, -INFINITY}), ((i64x2){9223372036854775807ll, -9223372036854775807ll - 1}));
-  expect_vec(i64x2_trunc_u_f64x2_sat((f64x2){42, NAN}), ((i64x2){42, 0}));
-  expect_vec(i64x2_trunc_u_f64x2_sat((f64x2){INFINITY, -INFINITY}), ((i64x2){18446744073709551615ull, 0}));
-#endif // __wasm_unimplemented_simd128__
-  expect_vec(f32x4_convert_s_i32x4((i32x4){0, -1, 2147483647, -2147483647 - 1}), ((f32x4){0, -1, 2147483648., -2147483648.}));
-  expect_vec(f32x4_convert_u_i32x4((i32x4){0, -1, 2147483647, -2147483647 - 1}), ((f32x4){0, 4294967296., 2147483648., 2147483648.}));
-  expect_vec(f64x2_convert_s_i64x2((i64x2){0, -1}), ((f64x2){0, -1}));
-  expect_vec(f64x2_convert_s_i64x2((i64x2){9223372036854775807, -9223372036854775807 - 1}), ((f64x2){9223372036854775807., -9223372036854775808.}));
-  expect_vec(f64x2_convert_u_i64x2((i64x2){0, -1}), ((f64x2){0, 18446744073709551616.}));
-  expect_vec(f64x2_convert_u_i64x2((i64x2){9223372036854775807 , -9223372036854775808.}), ((f64x2){9223372036854775807., 9223372036854775808.}));
-
-  if (failures == 0) {
-    printf("Success!\n");
-  } else {
-    printf("Failed :(\n");
-  }
-}
+#include <stdint.h>
+#include <stdio.h>
+#include <math.h>
+#include <emscripten.h>
+#include <simd128.h>
+
+#define TESTFN EMSCRIPTEN_KEEPALIVE __attribute__((noinline))
+
+i8x16 TESTFN i8x16_load(i8x16 *ptr) {
+  return (i8x16) wasm_v128_load(ptr);
+}
+void TESTFN i8x16_store(i8x16 *ptr, i8x16 vec) {
+  i8x16_store(ptr, vec);
+}
+v128 TESTFN i32x4_const(void) {
+  return wasm_v128_const(0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15);
+}
+i8x16 TESTFN i8x16_shuffle_interleave_bytes(i8x16 x, i8x16 y) {
+  return __builtin_shufflevector(x, y, 0, 17, 2, 19, 4, 21, 6, 23, 8, 25, 10, 27, 12, 29, 14, 31);
+}
+i32x4 TESTFN i32x4_shuffle_reverse(i32x4 vec) {
+  return __builtin_shufflevector(vec, vec, 3, 2, 1, 0);
+}
+i8x16 TESTFN i8x16_splat(int32_t x) {
+  return wasm_i8x16_splat(x);
+}
+int32_t TESTFN i8x16_extract_lane_s_first(i8x16 vec) {
+      return wasm_i8x16_extract_lane(vec, 0);
+}
+int32_t TESTFN i8x16_extract_lane_s_last(i8x16 vec) {
+        return wasm_i8x16_extract_lane(vec, 15);
+}
+#ifdef __wasm_unimplemented_simd128__
+uint32_t TESTFN i8x16_extract_lane_u_first(i8x16 vec) {
+  return wasm_u8x16_extract_lane(vec, 0);
+}
+uint32_t TESTFN i8x16_extract_lane_u_last(i8x16 vec) {
+  return wasm_u8x16_extract_lane(vec, 15);
+}
+#endif // __wasm_unimplemented_simd128__
+i8x16 TESTFN i8x16_replace_lane_first(i8x16 vec, int32_t val) {
+  return wasm_i8x16_replace_lane(vec, 0, val);
+}
+i8x16 TESTFN i8x16_replace_lane_last(i8x16 vec, int32_t val) {
+  return wasm_i8x16_replace_lane(vec, 15, val);
+}
+i16x8 TESTFN i16x8_splat(int32_t x) {
+  return wasm_i16x8_splat(x);
+}
+int32_t TESTFN i16x8_extract_lane_s_first(i16x8 vec) {
+  return wasm_i16x8_extract_lane(vec, 0);
+}
+int32_t TESTFN i16x8_extract_lane_s_last(i16x8 vec) {
+  return wasm_i16x8_extract_lane(vec, 7);
+}
+#ifdef __wasm_unimplemented_simd128__
+int32_t TESTFN i16x8_extract_lane_u_first(i16x8 vec) {
+    return wasm_u16x8_extract_lane(vec, 0);
+}
+int32_t TESTFN i16x8_extract_lane_u_last(i16x8 vec) {
+  return wasm_u16x8_extract_lane(vec, 7);
+}
+#endif // __wasm_unimplemented_simd128__
+i16x8 TESTFN i16x8_replace_lane_first(i16x8 vec, int32_t val) {
+  return wasm_i16x8_replace_lane(vec, 0, val);
+}
+i16x8 TESTFN i16x8_replace_lane_last(i16x8 vec, int32_t val) {
+  return wasm_i16x8_replace_lane(vec, 7, val);
+}
+i32x4 TESTFN i32x4_splat(int32_t x) {
+  return wasm_i32x4_splat(x);
+}
+int32_t TESTFN i32x4_extract_lane_first(i32x4 vec) {
+  return wasm_i32x4_extract_lane(vec, 0);
+}
+int32_t TESTFN i32x4_extract_lane_last(i32x4 vec) {
+  return wasm_i32x4_extract_lane(vec, 3);
+}
+i32x4 TESTFN i32x4_replace_lane_first(i32x4 vec, int32_t val) {
+  return wasm_i32x4_replace_lane(vec, 0, val);
+}
+i32x4 TESTFN i32x4_replace_lane_last(i32x4 vec, int32_t val) {
+  return wasm_i32x4_replace_lane(vec, 3, val);
+}
+i64x2 TESTFN i64x2_splat(int64_t x) {
+  return wasm_i64x2_splat(x);
+}
+#ifdef __wasm_unimplemented_simd128__
+int64_t TESTFN i64x2_extract_lane_first(i64x2 vec) {
+  return wasm_i64x2_extract_lane(vec, 0);
+}
+int64_t TESTFN i64x2_extract_lane_last(i64x2 vec) {
+  return wasm_i64x2_extract_lane(vec, 1);
+}
+i64x2 TESTFN i64x2_replace_lane_first(i64x2 vec, int64_t val) {
+  return wasm_i64x2_replace_lane(vec, 0, val);
+}
+i64x2 TESTFN i64x2_replace_lane_last(i64x2 vec, int64_t val) {
+  return wasm_i64x2_replace_lane(vec, 1, val);
+}
+#endif // __wasm_unimplemented_simd128__
+f32x4 TESTFN f32x4_splat(float x) {
+  return wasm_f32x4_splat(x);
+}
+float TESTFN f32x4_extract_lane_first(f32x4 vec) {
+  return wasm_f32x4_extract_lane(vec, 0);
+}
+float TESTFN f32x4_extract_lane_last(f32x4 vec) {
+  return wasm_f32x4_extract_lane(vec, 3);
+}
+f32x4 TESTFN f32x4_replace_lane_first(f32x4 vec, float val) {
+  return wasm_f32x4_replace_lane(vec, 0, val);
+}
+f32x4 TESTFN f32x4_replace_lane_last(f32x4 vec, float val) {
+  return wasm_f32x4_replace_lane(vec, 3, val);
+}
+#ifdef __wasm_unimplemented_simd128__
+f64x2 TESTFN f64x2_splat(int64_t x) {
+  return wasm_f64x2_splat((double ) x);
+}
+double TESTFN f64x2_extract_lane_first(f64x2 vec) {
+    return wasm_f64x2_extract_lane(vec, 0);
+}
+double TESTFN f64x2_extract_lane_last(f64x2 vec) {
+  return wasm_f64x2_extract_lane(vec, 1);
+}
+f64x2 TESTFN f64x2_replace_lane_first(f64x2 vec, double val) {
+  return wasm_f64x2_replace_lane(vec, 0, val);
+}
+f64x2 TESTFN f64x2_replace_lane_last(f64x2 vec, double val) {
+  return wasm_f64x2_replace_lane(vec, 1, val);
+}
+#endif // __wasm_unimplemented_simd128__
+u8x16 TESTFN i8x16_eq(i8x16 x, i8x16 y) {
+  return wasm_i8x16_eq(x, y);
+}
+u8x16 TESTFN i8x16_ne(i8x16 x, i8x16 y) {
+  return wasm_i8x16_ne(x, y);
+}
+u8x16 TESTFN i8x16_lt_s(i8x16 x, i8x16 y) {
+  return wasm_i8x16_lt(x, y);
+}
+u8x16 TESTFN i8x16_lt_u(u8x16 x, u8x16 y) {
+  return wasm_u8x16_lt(x, y);
+}
+u8x16 TESTFN i8x16_gt_s(i8x16 x, i8x16 y) {
+  return wasm_i8x16_gt(x, y);
+}
+u8x16 TESTFN i8x16_gt_u(u8x16 x, u8x16 y) {
+  return wasm_u8x16_gt(x,y);
+}
+u8x16 TESTFN i8x16_le_s(i8x16 x, i8x16 y) {
+  return wasm_i8x16_le(x,y);
+}
+u8x16 TESTFN u8x16_le_u(u8x16 x, u8x16 y) {
+   return wasm_u8x16_le(x, y);
+}
+u8x16 TESTFN i8x16_ge_s(i8x16 x, i8x16 y) {
+  return wasm_i8x16_ge(x, y);
+}
+u8x16  TESTFN i8x16_ge_u(u8x16 x, u8x16 y) {
+  return wasm_u8x16_ge(x, y);
+}
+u16x8 TESTFN i16x8_eq(i16x8 x, i16x8 y) {
+  return wasm_i16x8_eq(x,y);
+}
+u16x8 TESTFN i16x8_ne(i16x8 x, i16x8 y) {
+  return wasm_i16x8_ne(x,y);
+}
+u16x8 TESTFN i16x8_lt_s(i16x8 x, i16x8 y) {
+  return wasm_i16x8_lt(x,y);
+}
+u16x8 TESTFN i16x8_lt_u(u16x8 x, u16x8 y) {
+    return wasm_u16x8_lt(x,y);
+}
+u16x8 TESTFN i16x8_gt_s(i16x8 x, i16x8 y) {
+  return wasm_i16x8_gt(x,y);
+}
+u16x8 TESTFN i16x8_gt_u(u16x8 x, u16x8 y) {
+  return wasm_u16x8_gt(x,y);
+}
+u16x8 TESTFN i16x8_le_s(i16x8 x, i16x8 y) {
+  return wasm_i16x8_le(x, y);
+}
+u16x8 TESTFN i16x8_le_u(u16x8 x, u16x8 y) {
+  return wasm_u16x8_le(x, y);
+}
+u16x8 TESTFN i16x8_ge_s(i16x8 x, i16x8 y) {
+  return wasm_i16x8_ge(x, y);
+}
+u16x8  TESTFN i16x8_ge_u(u16x8 x, u16x8 y) {
+  return wasm_u16x8_ge(x, y);
+}
+u32x4 TESTFN i32x4_eq(i32x4 x, i32x4 y) {
+  return wasm_i32x4_eq(x, y);
+}
+u32x4 TESTFN i32x4_ne(i32x4 x, i32x4 y) {
+  return wasm_i32x4_ne(x, y);
+}
+u32x4 TESTFN i32x4_lt_s(i32x4 x, i32x4 y) {
+  return wasm_i32x4_lt(x, y);
+}
+u32x4 TESTFN i32x4_lt_u(u32x4 x, u32x4 y) {
+  return wasm_u32x4_lt(x, y);
+}
+u32x4 TESTFN i32x4_gt_s(i32x4 x, i32x4 y) {
+  return wasm_i32x4_gt(x, y);
+}
+u32x4 TESTFN i32x4_gt_u(u32x4 x, u32x4 y) {
+  return wasm_u32x4_gt(x, y);
+}
+u32x4 TESTFN i32x4_le_s(i32x4 x, i32x4 y) {
+  return wasm_i32x4_le(x, y);
+}
+u32x4 TESTFN i32x4_le_u(u32x4 x, u32x4 y) {
+  return wasm_u32x4_le(x, y);
+}
+u32x4 TESTFN i32x4_ge_s(i32x4 x, i32x4 y) {
+  return wasm_i32x4_ge(x, y);
+}
+u32x4  TESTFN i32x4_ge_u(u32x4 x, u32x4 y) {
+  return wasm_u32x4_ge(x, y);
+}
+u32x4 TESTFN f32x4_eq(f32x4 x, f32x4 y) {
+  return wasm_f32x4_eq(x,y);
+}
+u32x4 TESTFN f32x4_ne(f32x4 x, f32x4 y) {
+  return wasm_f32x4_ne(x, y);
+}
+u32x4 TESTFN f32x4_lt(f32x4 x, f32x4 y) {
+  return wasm_f32x4_lt(x, y);
+}
+u32x4 TESTFN f32x4_gt(f32x4 x, f32x4 y) {
+  return wasm_f32x4_gt(x,y);
+}
+u32x4 TESTFN f32x4_le(f32x4 x, f32x4 y) {
+  return wasm_f32x4_le(x, y);
+}
+u32x4 TESTFN f32x4_ge(f32x4 x, f32x4 y) {
+  return wasm_f32x4_ge(x, y);
+}
+#ifdef __wasm_undefined_simd128__
+u64x2 TESTFN f64x2_eq(f64x2 x, f64x2 y) {
+  return wasm_f64x2_eq(x,y);
+}
+u64x2 TESTFN f64x2_ne(f64x2 x, f64x2 y) {
+  return wasm_f64x2_ne(x,y);
+}
+u64x2 TESTFN f64x2_lt(f64x2 x, f64x2 y) {
+  return wasm_f64x2_lt(x,y);
+}
+u64x2 TESTFN f64x2_gt(f64x2 x, f64x2 y) {
+  return wasm_f64x2_gt(x, y);
+}
+u64x2 TESTFN f64x2_le(f64x2 x, f64x2 y) {
+  return wasm_f64x2_le(x, y);
+}
+u64x2 TESTFN f64x2_ge(f64x2 x, f64x2 y) {
+  return wasm_f64x2_ge(x, y);
+}
+#endif // __wasm_undefined_simd128__
+v128 TESTFN v128_not(v128 vec) {
+  return wasm_v128_not(vec);
+}
+v128 TESTFN v128_and(v128 x, v128 y) {
+  return wasm_v128_and(x, y);
+}
+v128 TESTFN v128_or(v128 x, v128 y) {
+  return wasm_v128_or(x,y);
+}
+v128 TESTFN v128_xor(v128 x, v128 y) {
+  return wasm_v128_xor(x,y);
+}
+v128 TESTFN v128_bitselect(v128 x, v128 y, v128 cond) {
+  return wasm_v128_bitselect(x, y, cond);
+}
+i8x16 TESTFN i8x16_neg(i8x16 vec) {
+  return wasm_i8x16_neg(vec);
+}
+int32_t TESTFN i8x16_any_true(i8x16 vec) {
+  return wasm_i8x16_any_true(vec);
+}
+int32_t TESTFN i8x16_all_true(i8x16 vec) {
+  return wasm_i8x16_all_true(vec);
+}
+i8x16 TESTFN i8x16_shl(i8x16 vec, int32_t shift) {
+  return wasm_i8x16_shl(vec, shift);
+}
+i8x16 TESTFN i8x16_shr_s(i8x16 vec, int32_t shift) {
+  return wasm_i8x16_shr(vec, shift);
+}
+u8x16 TESTFN i8x16_shr_u(u8x16 vec, int32_t shift) {
+  return wasm_u8x16_shr(vec, shift);
+}
+i8x16 TESTFN i8x16_add(i8x16 x, i8x16 y) {
+  return wasm_i8x16_add(x,y);
+}
+i8x16 TESTFN i8x16_add_saturate_s(i8x16 x, i8x16 y) {
+  return wasm_i8x16_add_saturate(x, y);
+}
+u8x16 TESTFN i8x16_add_saturate_u(u8x16 x, u8x16 y) {
+  return wasm_u8x16_add_saturate(x, y);
+}
+i8x16 TESTFN i8x16_sub(i8x16 x, i8x16 y) {
+  return wasm_i8x16_sub(x,y);
+}
+i8x16 TESTFN i8x16_sub_saturate_s(i8x16 x, i8x16 y) {
+  return wasm_i8x16_sub_saturate(x, y);
+}
+u8x16 TESTFN i8x16_sub_saturate_u(u8x16 x, u8x16 y) {
+  return wasm_u8x16_sub_saturate(x, y);
+}
+i8x16 TESTFN i8x16_mul(i8x16 x, i8x16 y) {
+  return wasm_i8x16_mul(x, y);
+}
+i16x8 TESTFN i16x8_neg(i16x8 vec) {
+  return wasm_i16x8_neg(vec);
+}
+bool TESTFN i16x8_any_true(i16x8 vec) {
+  return wasm_i16x8_any_true(vec);
+}
+int32_t TESTFN i16x8_all_true(i16x8 vec) {
+  return wasm_i16x8_all_true(vec);
+}
+i16x8 TESTFN i16x8_shl(i16x8 vec, int32_t shift) {
+  return wasm_i16x8_shl(vec, shift);
+}
+i16x8 TESTFN i16x8_shr_s(i16x8 vec, int32_t shift) {
+  return wasm_i16x8_shr(vec, shift);
+}
+u16x8 TESTFN i16x8_shr_u(u16x8 vec, int32_t shift) {
+  return wasm_u16x8_shr(vec, shift);
+}
+i16x8 TESTFN i16x8_add(i16x8 x, i16x8 y) {
+  return wasm_i16x8_add(x, y);
+}
+i16x8 TESTFN i16x8_add_saturate_s(i16x8 x, i16x8 y) {
+  return wasm_i16x8_add_saturate(x, y);
+}
+u16x8 TESTFN i16x8_add_saturate_u(u16x8 x, u16x8 y) {
+  return wasm_u16x8_add_saturate(x, y);
+}
+i16x8 TESTFN i16x8_sub(i16x8 x, i16x8 y) {
+  return wasm_i16x8_sub(x, y);
+}
+i16x8 TESTFN i16x8_sub_saturate_s(i16x8 x, i16x8 y) {
+  return wasm_i16x8_sub_saturate(x,y);
+}
+u16x8 TESTFN i16x8_sub_saturate_u(u16x8 x, u16x8 y) {
+  return wasm_u16x8_sub_saturate(x, y);
+}
+i16x8 TESTFN i16x8_mul(i16x8 x, i16x8 y) {
+  return wasm_i16x8_mul(x, y);
+}
+i32x4 TESTFN i32x4_neg(i32x4 vec) {
+  return wasm_i32x4_neg(vec);
+}
+int32_t TESTFN i32x4_any_true(i32x4 vec) {
+  return wasm_i32x4_any_true(vec);
+}
+int32_t TESTFN i32x4_all_true(i32x4 vec) {
+  return wasm_i32x4_all_true(vec);
+}
+i32x4 TESTFN i32x4_shl(i32x4 vec, int32_t shift) {
+  return wasm_i32x4_shl(vec, shift);
+}
+i32x4 TESTFN i32x4_shr_s(i32x4 vec, int32_t shift) {
+  return wasm_i32x4_shr(vec, shift);
+}
+u32x4 TESTFN i32x4_shr_u(u32x4 vec, int32_t shift) {
+  return wasm_u32x4_shr(vec, shift);
+}
+i32x4 TESTFN i32x4_add(i32x4 x, i32x4 y) {
+  return wasm_i32x4_add(x, y);
+}
+i32x4 TESTFN i32x4_sub(i32x4 x, i32x4 y) {
+  return wasm_i32x4_sub(x, y);
+}
+i32x4 TESTFN i32x4_mul(i32x4 x, i32x4 y) {
+  return wasm_i32x4_mul(x, y);
+}
+i64x2 TESTFN i64x2_neg(i64x2 vec) {
+  return wasm_i64x2_neg(vec);
+}
+#ifdef __wasm_unimplemented_simd128__
+bool TESTFN i64x2_any_true(i64x2 vec) {
+  return wasm_i64x2_any_true(vec);
+}
+bool TESTFN i64x2_all_true(i64x2 vec) {
+  return wasm_i64x2_all_true(vec);
+}
+#endif // __wasm_unimplemented_simd128__
+i64x2 TESTFN i64x2_shl(i64x2 vec, int32_t shift) {
+  return wasm_i64x2_shl(vec, shift);
+}
+
+i64x2 TESTFN i64x2_shr_s(i64x2 vec, int32_t shift) {
+  return wasm_i64x2_shr(vec, shift);
+}
+u64x2 TESTFN i64x2_shr_u(u64x2 vec, int32_t shift) {
+  return wasm_u64x2_shr(vec, shift);
+}
+i64x2 TESTFN i64x2_add(i64x2 x, i64x2 y) {
+  return wasm_i64x2_add(x, y);
+}
+i64x2 TESTFN i64x2_sub(i64x2 x, i64x2 y) {
+  return wasm_i64x2_sub(x, y);
+}
+f32x4 TESTFN f32x4_abs(f32x4 vec) {
+  return wasm_f32x4_abs(vec);
+}
+f32x4 TESTFN f32x4_neg(f32x4 vec) {
+  return wasm_f32x4_neg(vec);
+}
+#ifdef __wasm_unimplemented_simd128__
+f32x4 TESTFN f32x4_sqrt(f32x4 vec) {
+  return wasm_f32x4_sqrt(vec);
+}
+#endif // __wasm_unimplemented_simd128__
+f32x4 TESTFN f32x4_add(f32x4 x, f32x4 y) {
+  return wasm_f32x4_add(x,y);
+}
+f32x4 TESTFN f32x4_sub(f32x4 x, f32x4 y) {
+  return wasm_f32x4_sub(x, y);
+}
+f32x4 TESTFN f32x4_mul(f32x4 x, f32x4 y) {
+  return wasm_f32x4_mul(x, y);
+}
+f32x4 TESTFN f32x4_div(f32x4 x, f32x4 y) {
+  return wasm_f32x4_div(x, y);
+}
+f32x4 TESTFN f32x4_min(f32x4 x, f32x4 y) {
+  return wasm_f32x4_min(x, y);
+}
+f32x4 TESTFN f32x4_max(f32x4 x, f32x4 y) {
+  return wasm_f32x4_max(x, y);
+}
+#ifdef __wasm_unimplemented_simd128__
+f64x2 TESTFN f64x2_abs(f64x2 vec) {
+  return __builtin_wasm_abs_f64x2(vec);
+}
+#endif // __wasm_unimplemented_simd128__
+f64x2 TESTFN f64x2_neg(f64x2 vec) {
+  return -vec;
+}
+#ifdef __wasm_unimplemented_simd128__
+f64x2 TESTFN f64x2_sqrt(f64x2 vec) {
+  return __builtin_wasm_sqrt_f64x2(vec);
+}
+#endif // __wasm_unimplemented_simd128__
+f64x2 TESTFN f64x2_add(f64x2 x, f64x2 y) {
+  return x + y;
+}
+f64x2 TESTFN f64x2_sub(f64x2 x, f64x2 y) {
+  return x - y;
+}
+f64x2 TESTFN f64x2_mul(f64x2 x, f64x2 y) {
+  return x * y;
+}
+f64x2 TESTFN f64x2_div(f64x2 x, f64x2 y) {
+  return x / y;
+}
+#ifdef __wasm_unimplemented_simd128__
+f64x2 TESTFN f64x2_min(f64x2 x, f64x2 y) {
+  return __builtin_wasm_min_f64x2(x, y);
+}
+f64x2 TESTFN f64x2_max(f64x2 x, f64x2 y) {
+  return __builtin_wasm_max_f64x2(x, y);
+}
+#endif // __wasm_unimplemented_simd128__
+i32x4 TESTFN i32x4_trunc_s_f32x4_sat(f32x4 vec) {
+  return __builtin_wasm_trunc_saturate_s_i32x4_f32x4(vec);
+}
+i32x4 TESTFN i32x4_trunc_u_f32x4_sat(f32x4 vec) {
+  return __builtin_wasm_trunc_saturate_u_i32x4_f32x4(vec);
+}
+#ifdef __wasm_unimplemented_simd128__
+i64x2 TESTFN i64x2_trunc_s_f64x2_sat(f64x2 vec) {
+  return __builtin_wasm_trunc_saturate_s_i64x2_f64x2(vec);
+}
+i64x2 TESTFN i64x2_trunc_u_f64x2_sat(f64x2 vec) {
+  return __builtin_wasm_trunc_saturate_u_i64x2_f64x2(vec);
+}
+#endif // __wasm_unimplemented_simd128__
+f32x4 TESTFN f32x4_convert_s_i32x4(i32x4 vec) {
+  return __builtin_convertvector(vec, f32x4);
+}
+f32x4 TESTFN f32x4_convert_u_i32x4(i32x4 vec) {
+  return __builtin_convertvector((u32x4)vec, f32x4);
+}
+f64x2 TESTFN f64x2_convert_s_i64x2(i64x2 vec) {
+  return __builtin_convertvector(vec, f64x2);
+}
+f64x2 TESTFN f64x2_convert_u_i64x2(i64x2 vec) {
+  return __builtin_convertvector((u64x2)vec, f64x2);
+}
+
+static int failures = 0;
+
+#define formatter(x) _Generic((x),                        \
+                              int8_t: "%d",               \
+                              uint8_t: "%d",              \
+                              int16_t: "%d",              \
+                              uint16_t: "%d",             \
+                              int32_t: "%d",              \
+                              uint32_t: "%d",             \
+                              int64_t: "%ld",             \
+                              uint64_t: "%ld",            \
+                              bool: "%d",                 \
+                              float: "%f",                \
+                              double: "%f"                \
+  )
+
+#define err(x) fprintf(stderr, formatter(x), x)
+
+#define eq(a, b) ({                             \
+      bool anan = _Generic((a),                 \
+                           float: isnan(a),     \
+                           double: isnan(a),    \
+                           default: false);     \
+      bool bnan = _Generic((b),                 \
+                           float: isnan(b),     \
+                           double: isnan(b),    \
+                           default: false);     \
+      ((anan && bnan) || (!anan && a == b));    \
+    })
+
+#define expect_eq(_a, _b) __extension__({                       \
+      __typeof__(_a) a = (_a), b = (_b);                        \
+      if (!eq(a, b)) {                                          \
+        failures++;                                             \
+        fprintf(stderr, "line %d: expected ", __LINE__);        \
+        err(b);                                                 \
+        fprintf(stderr, ", got ");                              \
+        err(a);                                                 \
+        fprintf(stderr, "\n");                                  \
+      }                                                         \
+    })
+
+#define expect_vec(_a, _b) __extension__({                      \
+      __typeof__(_a) a = (_a), b = (_b);                        \
+      bool err = false;                                         \
+      size_t lanes = _Generic((a),                              \
+                              u8x16: 16,                        \
+                              i8x16: 16,                        \
+                              i16x8: 8,                         \
+                              u16x8: 8,                         \
+                              i32x4: 4,                         \
+                              u32x4: 4,                         \
+                              i64x2: 2,                         \
+                              u64x2: 2,                         \
+                              f32x4: 4,                         \
+                              f64x2: 2);                        \
+      for (size_t i = 0; i < lanes; i++) {                      \
+        if (!eq(a[i], b[i])) {                                  \
+          err = true;                                           \
+          break;                                                \
+        }                                                       \
+      }                                                         \
+      if (err) {                                                \
+        failures++;                                             \
+        fprintf(stderr, "line %d: expected {", __LINE__);       \
+        for (size_t i = 0; i < lanes - 1; i++) {                \
+          err(b[i]);                                            \
+          fprintf(stderr, ", ");                                \
+        }                                                       \
+        err(b[lanes - 1]);                                      \
+        fprintf(stderr, "}, got {");                            \
+        for (size_t i = 0; i < lanes - 1; i++) {                \
+          err(a[i]);                                            \
+          fprintf(stderr, ", ");                                \
+        }                                                       \
+        err(a[lanes - 1]);                                      \
+        fprintf(stderr, "}\n");                                 \
+      }                                                         \
+    })
+
+int EMSCRIPTEN_KEEPALIVE main(int argc, char** argv) {
+  {
+    i8x16 vec = {3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3};
+    expect_vec(i8x16_load(&vec),
+              (__extension__(i8x16){3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3}));
+    i8x16_store(&vec, __extension__(i8x16){7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7});
+    expect_vec(i8x16_load(&vec),
+              (__extension__(i8x16){7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7}));
+  }
+  expect_vec(i32x4_const(), ((v128)((i32x4){1, 2, 3, 4})));
+  expect_vec(
+    i8x16_shuffle_interleave_bytes(
+      (i8x16){1, 0, 3, 0, 5, 0, 7, 0, 9, 0, 11, 0, 13, 0, 15, 0},
+      (i8x16){0, 2, 0, 4, 0, 6, 0, 8, 0, 10, 0, 12, 0, 14, 0, 16}
+    ),
+    ((i8x16){1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16})
+  );
+  expect_vec(i32x4_shuffle_reverse((i32x4){1, 2, 3, 4}), ((i32x4){4, 3, 2, 1}));
+
+  // i8x16 lane accesses
+  expect_vec(i8x16_splat(5), ((i8x16){5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5}));
+  expect_vec(i8x16_splat(257), ((i8x16){1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1}));
+  expect_eq(i8x16_extract_lane_s_first((i8x16){-1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0}), -1);
+  expect_eq(i8x16_extract_lane_s_last((i8x16){0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, -1}), -1);
+#ifdef __wasm_unimplemented_simd128__
+  expect_eq(i8x16_extract_lane_u_first((i8x16){-1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0}), 255);
+  expect_eq(i8x16_extract_lane_u_last((i8x16){0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, -1}), 255);
+#endif // __wasm_unimplemented_simd128__
+  expect_vec(
+    i8x16_replace_lane_first((i8x16){0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0}, 7),
+    ((i8x16){7, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0})
+  );
+  expect_vec(
+    i8x16_replace_lane_last((i8x16){0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0}, 7),
+    ((i8x16){0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 7})
+  );
+
+  // i16x8 lane accesses
+  expect_vec(i16x8_splat(5), ((i16x8){5, 5, 5, 5, 5, 5, 5, 5}));
+  expect_vec(i16x8_splat(65537), ((i16x8){1, 1, 1, 1, 1, 1, 1, 1}));
+  expect_eq(i16x8_extract_lane_s_first((i16x8){-1, 0, 0, 0, 0, 0, 0, 0}), -1);
+  expect_eq(i16x8_extract_lane_s_last((i16x8){0, 0, 0, 0, 0, 0, 0, -1}), -1);
+#ifdef __wasm_unimplemented_simd128__
+  expect_eq(i16x8_extract_lane_u_first((i16x8){-1, 0, 0, 0, 0, 0, 0, 0}), 65535);
+  expect_eq(i16x8_extract_lane_u_last((i16x8){0, 0, 0, 0, 0, 0, 0, -1}), 65535);
+#endif // __wasm_unimplemented_simd128__
+  expect_vec(i16x8_replace_lane_first((i16x8){0, 0, 0, 0, 0, 0, 0, 0}, 7), ((i16x8){7, 0, 0, 0, 0, 0, 0, 0}));
+  expect_vec(i16x8_replace_lane_last((i16x8){0, 0, 0, 0, 0, 0, 0, 0}, 7), ((i16x8){0, 0, 0, 0, 0, 0, 0, 7}));
+
+  // i32x4 lane accesses
+  expect_vec(i32x4_splat(-5), ((i32x4){-5, -5, -5, -5}));
+  expect_eq(i32x4_extract_lane_first((i32x4){-5, 0, 0, 0}), -5);
+  expect_eq(i32x4_extract_lane_last((i32x4){0, 0, 0, -5}), -5);
+  expect_vec(i32x4_replace_lane_first((i32x4){0, 0, 0, 0}, 53), ((i32x4){53, 0, 0, 0}));
+  expect_vec(i32x4_replace_lane_last((i32x4){0, 0, 0, 0}, 53), ((i32x4){0, 0, 0, 53}));
+
+  // i64x2 lane accesses
+  expect_vec(i64x2_splat(-5), ((i64x2){-5, -5}));
+#ifdef __wasm_unimplemented_simd128__
+  expect_eq(i64x2_extract_lane_first((i64x2){-5, 0}), -5);
+  expect_eq(i64x2_extract_lane_last((i64x2){0, -5}), -5);
+  expect_vec(i64x2_replace_lane_first((i64x2){0, 0}, 53), ((i64x2){53, 0}));
+  expect_vec(i64x2_replace_lane_last((i64x2){0, 0}, 53), ((i64x2){0, 53}));
+#endif // __wasm_unimplemented_simd128__
+
+  // f32x4 lane accesses
+  expect_vec(f32x4_splat(-5), ((f32x4){-5, -5, -5, -5}));
+  expect_eq(f32x4_extract_lane_first((f32x4){-5, 0, 0, 0}), -5);
+  expect_eq(f32x4_extract_lane_last((f32x4){0, 0, 0, -5}), -5);
+  expect_vec(f32x4_replace_lane_first((f32x4){0, 0, 0, 0}, 53), ((f32x4){53, 0, 0, 0}));
+  expect_vec(f32x4_replace_lane_last((f32x4){0, 0, 0, 0}, 53), ((f32x4){0, 0, 0, 53}));
+
+#ifdef __wasm_unimplemented_simd128__
+  // f64x2 lane accesses
+  expect_vec(f64x2_splat(-5), ((f64x2){-5, -5}));
+  expect_eq(f64x2_extract_lane_first((f64x2){-5, 0}), -5);
+  expect_eq(f64x2_extract_lane_last((f64x2){0, -5}), -5);
+  expect_vec(f64x2_replace_lane_first((f64x2){0, 0}, 53), ((f64x2){53, 0}));
+  expect_vec(f64x2_replace_lane_last((f64x2){0, 0}, 53), ((f64x2){0, 53}));
+#endif // __wasm_unimplemented_simd128__
+
+  // i8x16 comparisons
+  expect_vec(
+    i8x16_eq(
+      (i8x16){0, 127, 13, 128,  1,  13, 129,  42, 0, 127, 255, 42,   1,  13, 129,  42},
+      (i8x16){0, 255, 13, 42, 129, 127,   0, 128, 0, 255,  13, 42, 129, 127,   0, 128}
+    ),
+    ((u8x16){-1, 0, -1, 0, 0, 0, 0, 0, -1, 0, 0, -1, 0, 0, 0, 0})
+  );
+  expect_vec(
+    i8x16_ne(
+      (i8x16){0, 127, 13, 128,  1,  13, 129,  42, 0, 127, 255, 42,   1,  13, 129,  42},
+      (i8x16){0, 255, 13, 42, 129, 127,   0, 128, 0, 255,  13, 42, 129, 127,   0, 128}
+    ),
+    ((u8x16){0, -1, 0, -1, -1, -1, -1, -1, 0, -1, -1, 0, -1, -1, -1, -1})
+  );
+  expect_vec(
+    i8x16_lt_s(
+      (i8x16){0, 127, 13, 128,  1,  13, 129,  42, 0, 127, 255, 42,   1,  13, 129,  42},
+      (i8x16){0, 255, 13, 42, 129, 127,   0, 128, 0, 255,  13, 42, 129, 127,   0, 128}
+    ),
+    ((u8x16){0, 0, 0, -1, 0, -1, -1, 0, 0, 0, -1, 0, 0, -1, -1, 0})
+  );
+  expect_vec(
+    i8x16_lt_u(
+      (u8x16){0, 127, 13, 128,  1,  13, 129,  42, 0, 127, 255, 42,   1,  13, 129,  42},
+      (u8x16){0, 255, 13, 42, 129, 127,   0, 128, 0, 255,  13, 42, 129, 127,   0, 128}
+    ),
+    ((u8x16){0, -1, 0, 0, -1, -1, 0, -1, 0, -1, 0, 0, -1, -1, 0, -1})
+  );
+  expect_vec(
+    i8x16_gt_s(
+      (i8x16){0, 127, 13, 128,  1,  13, 129,  42, 0, 127, 255, 42,   1,  13, 129,  42},
+      (i8x16){0, 255, 13, 42, 129, 127,   0, 128, 0, 255,  13, 42, 129, 127,   0, 128}
+    ),
+    ((u8x16){0, -1, 0, 0, -1, 0, 0, -1, 0, -1, 0, 0, -1, 0, 0, -1})
+  );
+  expect_vec(
+    i8x16_gt_u(
+      (u8x16){0, 127, 13, 128,  1,  13, 129,  42, 0, 127, 255, 42,   1,  13, 129,  42},
+      (u8x16){0, 255, 13, 42, 129, 127,   0, 128, 0, 255,  13, 42, 129, 127,   0, 128}
+    ),
+    ((u8x16){0, 0, 0, -1, 0, 0, -1, 0, 0, 0, -1, 0, 0, 0, -1, 0})
+  );
+  expect_vec(
+    i8x16_le_s(
+      (i8x16){0, 127, 13, 128,  1,  13, 129,  42, 0, 127, 255, 42,   1,  13, 129,  42},
+      (i8x16){0, 255, 13, 42, 129, 127,   0, 128, 0, 255,  13, 42, 129, 127,   0, 128}
+    ),
+    ((u8x16){-1, 0, -1, -1, 0, -1, -1, 0, -1, 0, -1, -1, 0, -1, -1, 0})
+  );
+  // bugs.chromium.org/p/v8/issues/detail?id=8635
+  // expect_vec(
+  //   i8x16_le_u(
+  //     (i8x16){0, 127, 13, 128,  1,  13, 129,  42, 0, 127, 255, 42,   1,  13, 129,  42},
+  //     (i8x16){0, 255, 13, 42, 129, 127,   0, 128, 0, 255,  13, 42, 129, 127,   0, 128}
+  //   ),
+  //   ((i8x16){-1, -1, -1, 0, -1, -1, 0, -1, -1, -1, 0, -1, -1, -1, 0, -1})
+  // );
+  expect_vec(
+    i8x16_ge_s(
+      (i8x16){0, 127, 13, 128,  1,  13, 129,  42, 0, 127, 255, 42,   1,  13, 129,  42},
+      (i8x16){0, 255, 13, 42, 129, 127,   0, 128, 0, 255,  13, 42, 129, 127,   0, 128}
+    ),
+    ((u8x16){-1, -1, -1, 0, -1, 0, 0, -1, -1, -1, 0, -1, -1, 0, 0, -1})
+  );
+  // expect_vec(
+  //   i8x16_ge_u(
+  //     (i8x16){0, 127, 13, 128,  1,  13, 129,  42, 0, 127, 255, 42,   1,  13, 129,  42},
+  //     (i8x16){0, 255, 13, 42, 129, 127,   0, 128, 0, 255,  13, 42, 129, 127,   0, 128}
+  //   ),
+  //   ((i8x16){-1, 0, -1, -1, 0, 0, -1, 0, -1, 0, -1, -1, 0, 0, -1, 0})
+  // );
+
+  // i16x8 comparisons
+  expect_vec(
+    i16x8_eq(
+      (i16x8){0, 32767, 13, -32768,      1, -32767,     42, -25536},
+      (i16x8){0,    13,  1,  32767, -32767,     42, -25536,  32767}
+    ),
+    ((u16x8){-1, 0, 0, 0, 0, 0, 0, 0})
+  );
+  expect_vec(
+    i16x8_ne(
+      (i16x8){0, 32767, 13, -32768,      1, -32767,     42, -25536},
+      (i16x8){0,    13,  1,  32767, -32767,     42, -25536,  32767}
+    ),
+    ((u16x8){0, -1, -1, -1, -1, -1, -1, -1})
+  );
+  expect_vec(
+    i16x8_lt_s(
+      (i16x8){0, 32767, 13, -32768,      1, -32767,     42, -25536},
+      (i16x8){0,    13,  1,  32767, -32767,     42, -25536,  32767}
+    ),
+    ((u16x8){0, 0, 0, -1, 0, -1, 0, -1})
+  );
+  expect_vec(
+    i16x8_lt_u(
+      (u16x8){0, 32767, 13, -32768,      1, -32767,     42, -25536},
+      (u16x8){0,    13,  1,  32767, -32767,     42, -25536,  32767}
+    ),
+    ((u16x8){0, 0, 0, 0, -1, 0, -1, 0})
+  );
+  expect_vec(
+    i16x8_gt_s(
+      (i16x8){0, 32767, 13, -32768,      1, -32767,     42, -25536},
+      (i16x8){0,    13,  1,  32767, -32767,     42, -25536,  32767}
+    ),
+    ((u16x8){0, -1, -1, 0, -1, 0, -1, 0})
+  );
+  expect_vec(
+    i16x8_gt_u(
+      (u16x8){0, 32767, 13, -32768,      1, -32767,     42, -25536},
+      (u16x8){0,    13,  1,  32767, -32767,     42, -25536,  32767}
+    ),
+    ((u16x8){0, -1, -1, -1, 0, -1, 0, -1})
+  );
+  expect_vec(
+    i16x8_le_s(
+      (i16x8){0, 32767, 13, -32768,      1, -32767,     42, -25536},
+      (i16x8){0,    13,  1,  32767, -32767,     42, -25536,  32767}
+    ),
+    ((u16x8){-1, 0, 0, -1, 0, -1, 0, -1})
+  );
+  expect_vec(
+    i16x8_le_u(
+      (u16x8){0, 32767, 13, -32768,      1, -32767,     42, -25536},
+      (u16x8){0,    13,  1,  32767, -32767,     42, -25536,  32767}
+    ),
+    ((u16x8){-1, 0, 0, 0, -1, 0, -1, 0})
+  );
+  expect_vec(
+    i16x8_ge_s(
+      (i16x8){0, 32767, 13, -32768,      1, -32767,     42, -25536},
+      (i16x8){0,    13,  1,  32767, -32767,     42, -25536,  32767}
+    ),
+    ((u16x8){-1, -1, -1, 0, -1, 0, -1, 0})
+  );
+  expect_vec(
+    i16x8_ge_u(
+      (u16x8){0, 32767, 13, -32768,      1, -32767,     42, -25536},
+      (u16x8){0,    13,  1,  32767, -32767,     42, -25536,  32767}
+    ),
+    ((u16x8){-1, -1, -1, -1, 0, -1, 0, -1})
+  );
+
+  // i342x4 comparisons
+  expect_vec(
+    i32x4_eq((i32x4){0, -1, 53, -7}, (i32x4){0, 53, -7, -1}), ((u32x4){-1, 0, 0, 0})
+  );
+  expect_vec(
+    i32x4_ne((i32x4){0, -1, 53, -7}, (i32x4){0, 53, -7, -1}), ((u32x4){0, -1, -1, -1})
+  );
+  expect_vec(
+    i32x4_lt_s((i32x4){0, -1, 53, -7}, (i32x4){0, 53, -7, -1}), ((u32x4){0, -1, 0, -1})
+  );
+  expect_vec(
+    i32x4_lt_u((u32x4){0, -1, 53, -7}, (u32x4){0, 53, -7, -1}), ((u32x4){0, 0, -1, -1})
+  );
+  expect_vec(
+    i32x4_gt_s((i32x4){0, -1, 53, -7}, (i32x4){0, 53, -7, -1}), ((u32x4){0, 0, -1, 0})
+  );
+  expect_vec(
+    i32x4_gt_u((u32x4){0, -1, 53, -7}, (u32x4){0, 53, -7, -1}), ((u32x4){0, -1, 0, 0})
+  );
+  expect_vec(
+    i32x4_le_s((i32x4){0, -1, 53, -7}, (i32x4){0, 53, -7, -1}), ((u32x4){-1, -1, 0, -1})
+  );
+  expect_vec(
+    i32x4_le_u((u32x4){0, -1, 53, -7}, (u32x4){0, 53, -7, -1}), ((u32x4){-1, 0, -1, -1})
+  );
+  expect_vec(
+    i32x4_ge_s((i32x4){0, -1, 53, -7}, (i32x4){0, 53, -7, -1}), ((u32x4){-1, 0, -1, 0})
+  );
+  expect_vec(
+    i32x4_ge_u((u32x4){0, -1, 53, -7}, (u32x4){0, 53, -7, -1}), ((u32x4){-1, -1, 0, 0})
+  );
+
+  // f32x4 comparisons
+  expect_vec(
+    f32x4_eq((f32x4){0, -1, 1, 0}, (f32x4){0, 0, -1, 1}), ((u32x4){-1, 0, 0, 0})
+  );
+  expect_vec(
+    f32x4_ne((f32x4){0, -1, 1, 0}, (f32x4){0, 0, -1, 1}), ((u32x4){0, -1, -1, -1})
+  );
+  expect_vec(
+    f32x4_lt((f32x4){0, -1, 1, 0}, (f32x4){0, 0, -1, 1}), ((u32x4){0, -1, 0, -1})
+  );
+  expect_vec(
+    f32x4_gt((f32x4){0, -1, 1, 0}, (f32x4){0, 0, -1, 1}), ((u32x4){0, 0, -1, 0})
+  );
+  expect_vec(
+    f32x4_le((f32x4){0, -1, 1, 0}, (f32x4){0, 0, -1, 1}), ((u32x4){-1, -1, 0, -1})
+  );
+  expect_vec(
+    f32x4_ge((f32x4){0, -1, 1, 0}, (f32x4){0, 0, -1, 1}), ((u32x4){-1, 0, -1, 0})
+  );
+  expect_vec(
+    f32x4_eq((f32x4){NAN, 0, NAN, INFINITY}, (f32x4){0, NAN, NAN, INFINITY}),
+    ((u32x4){0, 0, 0, -1})
+  );
+  expect_vec(
+    f32x4_ne((f32x4){NAN, 0, NAN, INFINITY}, (f32x4){0, NAN, NAN, INFINITY}),
+    ((u32x4){-1, -1, -1, 0})
+  );
+  expect_vec(
+    f32x4_lt((f32x4){NAN, 0, NAN, INFINITY}, (f32x4){0, NAN, NAN, INFINITY}),
+    ((u32x4){0, 0, 0, 0})
+  );
+  expect_vec(
+    f32x4_gt((f32x4){NAN, 0, NAN, INFINITY}, (f32x4){0, NAN, NAN, INFINITY}),
+    ((u32x4){0, 0, 0, 0})
+  );
+  expect_vec(
+    f32x4_le((f32x4){NAN, 0, NAN, INFINITY}, (f32x4){0, NAN, NAN, INFINITY}),
+    ((u32x4){0, 0, 0, -1})
+  );
+  expect_vec(
+    f32x4_ge((f32x4){NAN, 0, NAN, INFINITY}, (f32x4){0, NAN, NAN, INFINITY}),
+    ((u32x4){0, 0, 0, -1})
+  );
+  expect_vec(
+    f32x4_eq((f32x4){-INFINITY, 0, NAN, -INFINITY}, (f32x4){0, INFINITY, INFINITY, NAN}),
+    ((u32x4){0, 0, 0, 0})
+  );
+  expect_vec(
+    f32x4_ne((f32x4){-INFINITY, 0, NAN, -INFINITY}, (f32x4){0, INFINITY, INFINITY, NAN}),
+    ((u32x4){-1, -1, -1, -1})
+  );
+  expect_vec(
+    f32x4_lt((f32x4){-INFINITY, 0, NAN, -INFINITY}, (f32x4){0, INFINITY, INFINITY, NAN}),
+    ((u32x4){-1, -1, 0, 0})
+  );
+  expect_vec(
+    f32x4_gt((f32x4){-INFINITY, 0, NAN, -INFINITY}, (f32x4){0, INFINITY, INFINITY, NAN}),
+    ((u32x4){0, 0, 0, 0})
+  );
+  expect_vec(
+    f32x4_le((f32x4){-INFINITY, 0, NAN, -INFINITY}, (f32x4){0, INFINITY, INFINITY, NAN}),
+    ((u32x4){-1, -1, 0, 0})
+  );
+  expect_vec(
+    f32x4_ge((f32x4){-INFINITY, 0, NAN, -INFINITY}, (f32x4){0, INFINITY, INFINITY, NAN}),
+    ((u32x4){0, 0, 0, 0})
+  );
+
+#ifdef __wasm_undefined_simd128__
+  // f64x2 comparisons
+  expect_vec(f64x2_eq((f64x2){0, 1}, (f64x2){0, 0}), ((u64x2){-1, 0}));
+  expect_vec(f64x2_ne((f64x2){0, 1}, (f64x2){0, 0}), ((u64x2){0, -1}));
+  expect_vec(f64x2_lt((f64x2){0, 1}, (f64x2){0, 0}), ((u64x2){0, 0}));
+  expect_vec(f64x2_gt((f64x2){0, 1}, (f64x2){0, 0}), ((u64x2){0, -1}));
+  expect_vec(f64x2_le((f64x2){0, 1}, (f64x2){0, 0}), ((u64x2){-1, 0}));
+  expect_vec(f64x2_ge((f64x2){0, 1}, (f64x2){0, 0}), ((u64x2){-1, -1}));
+  expect_vec(f64x2_eq((f64x2){NAN, 0}, (f64x2){INFINITY, INFINITY}), ((u64x2){0, 0}));
+  expect_vec(f64x2_ne((f64x2){NAN, 0}, (f64x2){INFINITY, INFINITY}), ((u64x2){-1, -1}));
+  expect_vec(f64x2_lt((f64x2){NAN, 0}, (f64x2){INFINITY, INFINITY}), ((u64x2){0, -1}));
+  expect_vec(f64x2_gt((f64x2){NAN, 0}, (f64x2){INFINITY, INFINITY}), ((u64x2){0, 0}));
+  expect_vec(f64x2_le((f64x2){NAN, 0}, (f64x2){INFINITY, INFINITY}), ((u64x2){0, -1}));
+  expect_vec(f64x2_ge((f64x2){NAN, 0}, (f64x2){INFINITY, INFINITY}), ((u64x2){0, 0}));
+#endif // __wasm_undefined_simd128__
+
+  // bitwise operations
+  expect_vec(v128_not((v128)(i32x4){0, -1, 0, -1}), (v128)((i32x4){-1, 0, -1, 0}));
+  expect_vec(
+    v128_and((v128)(i32x4){0, 0, -1, -1}, (v128)(i32x4){0, -1, 0, -1}),
+    (v128)((i32x4){0, 0, 0, -1})
+  );
+  expect_vec(
+    v128_or((v128)(i32x4){0, 0, -1, -1}, (v128)(i32x4){0, -1, 0, -1}),
+    (v128)((i32x4){0, -1, -1, -1})
+  );
+  expect_vec(
+    v128_xor((v128)(i32x4){0, 0, -1, -1}, (v128)(i32x4){0, -1, 0, -1}),
+    (v128)((i32x4){0, -1, -1, 0})
+  );
+  expect_vec(
+    v128_bitselect(
+      (v128)(i32x4){0xAAAAAAAA, 0xAAAAAAAA, 0xAAAAAAAA, 0xAAAAAAAA},
+      (v128)(i32x4){0xBBBBBBBB, 0xBBBBBBBB, 0xBBBBBBBB, 0xBBBBBBBB},
+      (v128)(i32x4){0xF0F0F0F0, 0xFFFFFFFF, 0x00000000, 0xFF00FF00}
+    ),
+    (v128)((i32x4){0xABABABAB, 0xAAAAAAAA, 0xBBBBBBBB, 0xAABBAABB})
+  );
+
+  // i8x16 arithmetic
+  expect_vec(
+    i8x16_neg((i8x16){0, 1, 42, -3, -56, 127, -128, -126, 0, -1, -42, 3, 56, -127, -128, 126}),
+    ((i8x16){0, -1, -42, 3, 56, -127, -128, 126, 0, 1, 42, -3, -56, 127, -128, -126})
+  );
+  expect_eq(i8x16_any_true((i8x16){0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0}), 0);
+  expect_eq(i8x16_any_true((i8x16){0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0}), 1);
+  expect_eq(i8x16_any_true((i8x16){1, 1, 1, 1, 1, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1}), 1);
+  expect_eq(i8x16_any_true((i8x16){1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1}), 1);
+  expect_eq(i8x16_all_true((i8x16){0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0}), 0);
+  expect_eq(i8x16_all_true((i8x16){0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0}), 0);
+  expect_eq(i8x16_all_true((i8x16){1, 1, 1, 1, 1, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1}), 0);
+  expect_eq(i8x16_all_true((i8x16){1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1}), 1);
+  expect_vec(
+    i8x16_shl((i8x16){0, 1, 2, 4, 8, 16, 32, 64, -128, 3, 6, 12, 24, 48, 96, -64}, 1),
+    ((i8x16){0, 2, 4, 8, 16, 32, 64, -128, 0, 6, 12, 24, 48, 96, -64, -128})
+  );
+  expect_vec(
+    i8x16_shl((i8x16){0, 1, 2, 4, 8, 16, 32, 64, -128, 3, 6, 12, 24, 48, 96, -64}, 8),
+    ((i8x16){0, 1, 2, 4, 8, 16, 32, 64, -128, 3, 6, 12, 24, 48, 96, -64})
+  );
+  expect_vec(
+    i8x16_shr_u((u8x16){0, 1, 2, 4, 8, 16, 32, 64, -128, 3, 6, 12, 24, 48, 96, -64}, 1),
+    ((u8x16){0, 0, 1, 2, 4, 8, 16, 32, 64, 1, 3, 6, 12, 24, 48, 96})
+  );
+  expect_vec(
+    i8x16_shr_u((u8x16){0, 1, 2, 4, 8, 16, 32, 64, -128, 3, 6, 12, 24, 48, 96, -64}, 8),
+    ((u8x16){0, 1, 2, 4, 8, 16, 32, 64, -128, 3, 6, 12, 24, 48, 96, -64})
+  );
+  expect_vec(
+    i8x16_shr_s((i8x16){0, 1, 2, 4, 8, 16, 32, 64, -128, 3, 6, 12, 24, 48, 96, -64}, 1),
+    ((i8x16){0, 0, 1, 2, 4, 8, 16, 32, -64, 1, 3, 6, 12, 24, 48, -32})
+  );
+  expect_vec(
+    i8x16_shr_s((i8x16){0, 1, 2, 4, 8, 16, 32, 64, -128, 3, 6, 12, 24, 48, 96, -64}, 8),
+    ((i8x16){0, 1, 2, 4, 8, 16, 32, 64, -128, 3, 6, 12, 24, 48, 96, -64})
+  );
+  expect_vec(
+    i8x16_add(
+      (i8x16){0, 42, 255, 128, 127, 129, 6, 29, 103, 196, 231, 142, 17, 250, 1, 73},
+      (i8x16){3, 231, 1, 128, 129, 6, 103, 17, 42, 29, 73, 42, 0, 255, 127, 142}
+    ),
+    ((i8x16){3, 17, 0, 0, 0, 135, 109, 46, 145, 225, 48, 184, 17, 249, 128, 215})
+  );
+  expect_vec(
+    i8x16_add_saturate_s(
+      (i8x16){0, 42, 255, 128, 127, 129, 6, 29, 103, 196, 231, 142, 17, 250, 1, 73},
+      (i8x16){3, 231, 1, 128, 129, 6, 103, 17, 42, 29, 73, 42, 0, 255, 127, 142}
+    ),
+    ((i8x16){3, 17, 0, 128, 0, 135, 109, 46, 127, 225, 48, 184, 17, 249, 127, 215})
+  );
+  expect_vec(
+    i8x16_add_saturate_u(
+      (u8x16){0, 42, 255, 128, 127, 129, 6, 29, 103, 196, 231, 142, 17, 250, 1, 73},
+      (u8x16){3, 231, 1, 128, 129, 6, 103, 17, 42, 29, 73, 42, 0, 255, 127, 142}
+    ),
+    ((u8x16){3, 255, 255, 255, 255, 135, 109, 46, 145, 225, 255, 184, 17, 255, 128, 215})
+  );
+  expect_vec(
+    i8x16_sub(
+      (i8x16){0, 42, 255, 128, 127, 129, 6, 29, 103, 196, 231, 142, 17, 250, 1, 73},
+      (i8x16){3, 231, 1, 128, 129, 6, 103, 17, 42, 29, 73, 42, 0, 255, 127, 142}
+    ),
+    ((i8x16){253, 67, 254, 0, 254, 123, 159, 12, 61, 167, 158, 100, 17, 251, 130, 187})
+  );
+  expect_vec(
+    i8x16_sub_saturate_s(
+      (i8x16){0, 42, 255, 128, 127, 129, 6, 29, 103, 196, 231, 142, 17, 250, 1, 73},
+      (i8x16){3, 231, 1, 128, 129, 6, 103, 17, 42, 29, 73, 42, 0, 255, 127, 142}
+    ),
+    ((i8x16){253, 67, 254, 0, 127, 128, 159, 12, 61, 167, 158, 128, 17, 251, 130, 127})
+  );
+  expect_vec(
+    i8x16_sub_saturate_u(
+      (u8x16){0, 42, 255, 128, 127, 129, 6, 29, 103, 196, 231, 142, 17, 250, 1, 73},
+      (u8x16){3, 231, 1, 128, 129, 6, 103, 17, 42, 29, 73, 42, 0, 255, 127, 142}
+    ),
+    ((u8x16){0, 0, 254, 0, 0, 123, 0, 12, 61, 167, 158, 100, 17, 0, 0, 0})
+  );
+  expect_vec(
+    i8x16_mul(
+      (i8x16){0, 42, 255, 128, 127, 129, 6, 29, 103, 196, 231, 142, 17, 250, 1, 73},
+      (i8x16){3, 231, 1, 128, 129, 6, 103, 17, 42, 29, 73, 42, 0, 255, 127, 142}
+    ),
+    ((i8x16){0, 230, 255, 0, 255, 6, 106, 237, 230, 52, 223, 76, 0, 6, 127, 126})
+  );
+
+  // i16x8 arithmetic
+  expect_vec(
+    i16x8_neg((i16x8){0, 1, 42, -3, -56, 32767, -32768, 32766}),
+    ((i16x8){0, -1, -42, 3, 56, -32767, -32768, -32766})
+  );
+  expect_eq(i16x8_any_true((i16x8){0, 0, 0, 0, 0, 0, 0, 0}), 0);
+  expect_eq(i16x8_any_true((i16x8){0, 0, 1, 0, 0, 0, 0, 0}), 1);
+  expect_eq(i16x8_any_true((i16x8){1, 1, 1, 1, 1, 0, 1, 1}), 1);
+  expect_eq(i16x8_any_true((i16x8){1, 1, 1, 1, 1, 1, 1, 1}), 1);
+  expect_eq(i16x8_all_true((i16x8){0, 0, 0, 0, 0, 0, 0, 0}), 0);
+  expect_eq(i16x8_all_true((i16x8){0, 0, 1, 0, 0, 0, 0, 0}), 0);
+  expect_eq(i16x8_all_true((i16x8){1, 1, 1, 1, 1, 0, 1, 1}), 0);
+  expect_eq(i16x8_all_true((i16x8){1, 1, 1, 1, 1, 1, 1, 1}), 1);
+  expect_vec(
+    i16x8_shl((i16x8){0, 8, 16, 128, 256, 2048, 4096, -32768}, 1),
+    ((i16x8){0, 16, 32, 256, 512, 4096, 8192, 0})
+  );
+  expect_vec(
+    i16x8_shl((i16x8){0, 8, 16, 128, 256, 2048, 4096, -32768}, 16),
+    ((i16x8){0, 8, 16, 128, 256, 2048, 4096, -32768})
+  );
+  expect_vec(
+    i16x8_shr_u((u16x8){0, 8, 16, 128, 256, 2048, 4096, -32768}, 1),
+    ((u16x8){0, 4, 8, 64, 128, 1024, 2048, 16384})
+  );
+  expect_vec(
+    i16x8_shr_u((u16x8){0, 8, 16, 128, 256, 2048, 4096, -32768}, 16),
+    ((u16x8){0, 8, 16, 128, 256, 2048, 4096, -32768})
+  );
+  expect_vec(
+    i16x8_shr_s((i16x8){0, 8, 16, 128, 256, 2048, 4096, -32768}, 1),
+    ((i16x8){0, 4, 8, 64, 128, 1024, 2048, -16384})
+  );
+  expect_vec(
+    i16x8_shr_s((i16x8){0, 8, 16, 128, 256, 2048, 4096, -32768}, 16),
+    ((i16x8){0, 8, 16, 128, 256, 2048, 4096, -32768})
+  );
+  expect_vec(
+    i16x8_add(
+      (i16x8){0, -256, -32768, 32512, -32512, -6400, -1536, 32766},
+      (i16x8){768, 1, -32768, -32512, 1536, 18688, -256, 2}
+    ),
+    ((i16x8){768, -255, 0, 0, -30976, 12288, -1792, -32768})
+  );
+  expect_vec(
+    i16x8_add_saturate_s(
+      (i16x8){0, -256, -32768, 32512, -32512, -6400, -1536, 32766},
+      (i16x8){768, 1, -32768, -32512, 1536, 18688, -256, 2}
+    ),
+    ((i16x8){768, -255, -32768, 0, -30976, 12288, -1792, 32767})
+  );
+  expect_vec(
+    i16x8_add_saturate_u(
+      (u16x8){0, -256, -32768, 32512, -32512, -6400, -1536, 32766},
+      (u16x8){768, 1, -32768, -32512, 1536, 18688, -256, 2}
+    ),
+    ((u16x8){768, -255, -1, -1, -30976, -1, -1, -32768})
+  );
+  expect_vec(
+    i16x8_sub(
+      (i16x8){0, -256, -32768, 32512, -32512, -6400, -1536, 32766},
+      (i16x8){768, 1, -32768, -32512, 1536, 18688, -256, 2}
+    ),
+    ((i16x8){-768, -257, 0, -512, 31488, -25088, -1280, 32764})
+  );
+  expect_vec(
+    i16x8_sub_saturate_s(
+      (i16x8){0, -256, -32768, 32512, -32512, -6400, -1536, 32766},
+      (i16x8){768, 1, -32768, -32512, 1536, 18688, -256, 2}
+    ),
+    ((i16x8){-768, -257, 0, 32767, -32768, -25088, -1280, 32764})
+  );
+  expect_vec(
+    i16x8_sub_saturate_u(
+      (u16x8){0, -256, -32768, 32512, -32512, -6400, -1536, 32766},
+      (u16x8){768, 1, -32768, -32512, 1536, 18688, -256, 2}
+    ),
+    ((u16x8){0, -257, 0, 0, 31488, -25088, 0, 32764})
+  );
+  expect_vec(
+    i16x8_mul(
+      (i16x8){0, -256, -32768, 32512, -32512, -6400, -1536, 32766},
+      (i16x8){768, 1, -32768, -32512, 1536, 18688, -256, 2}
+    ),
+    ((i16x8){0, -256, 0, 0, 0, 0, 0, -4})
+  );
+
+  // i32x4 arithmetic
+  expect_vec(i32x4_neg((i32x4){0, 1, 0x80000000, 0x7fffffff}), ((i32x4){0, -1, 0x80000000, 0x80000001}));
+  expect_eq(i32x4_any_true((i32x4){0, 0, 0, 0}), 0);
+  expect_eq(i32x4_any_true((i32x4){0, 0, 1, 0}), 1);
+  expect_eq(i32x4_any_true((i32x4){1, 0, 1, 1}), 1);
+  expect_eq(i32x4_any_true((i32x4){1, 1, 1, 1}), 1);
+  expect_eq(i32x4_all_true((i32x4){0, 0, 0, 0}), 0);
+  expect_eq(i32x4_all_true((i32x4){0, 0, 1, 0}), 0);
+  expect_eq(i32x4_all_true((i32x4){1, 0, 1, 1}), 0);
+  expect_eq(i32x4_all_true((i32x4){1, 1, 1, 1}), 1);
+  expect_vec(i32x4_shl((i32x4){1, 0x40000000, 0x80000000, -1}, 1), ((i32x4){2, 0x80000000, 0, -2}));
+  expect_vec(i32x4_shl((i32x4){1, 0x40000000, 0x80000000, -1}, 32), ((i32x4){1, 0x40000000, 0x80000000, -1}));
+  expect_vec(i32x4_shr_s((i32x4){1, 0x40000000, 0x80000000, -1}, 1), ((i32x4){0, 0x20000000, 0xc0000000, -1}));
+  expect_vec(i32x4_shr_s((i32x4){1, 0x40000000, 0x80000000, -1}, 32), ((i32x4){1, 0x40000000, 0x80000000, -1}));
+  expect_vec(i32x4_shr_u((u32x4){1, 0x40000000, 0x80000000, -1}, 1), ((u32x4){0, 0x20000000, 0x40000000, 0x7fffffff}));
+  expect_vec(i32x4_shr_u((u32x4){1, 0x40000000, 0x80000000, -1}, 32), ((u32x4){1, 0x40000000, 0x80000000, -1}));
+  expect_vec(i32x4_add((i32x4){0, 0x80000001, 42, 5}, (i32x4){0, 0x80000001, 5, 42}), ((i32x4){0, 2, 47, 47}));
+  expect_vec(i32x4_sub((i32x4){0, 2, 47, 47}, (i32x4){0, 0x80000001, 42, 5}), ((i32x4){0, 0x80000001, 5, 42}));
+  expect_vec(i32x4_mul((i32x4){0, 0x80000001, 42, 5}, (i32x4){0, 0x80000001, 42, 5}), ((i32x4){0, 1, 1764, 25}));
+
+
+  // i64x2 arithmetic
+  expect_vec(i64x2_neg((i64x2){0x8000000000000000, 42}), ((i64x2){0x8000000000000000, -42}));
+#ifdef __wasm_unimplemented_simd128__
+  expect_eq(i64x2_any_true((i64x2){0, 0}), 0);
+  expect_eq(i64x2_any_true((i64x2){1, 0}), 1);
+  expect_eq(i64x2_any_true((i64x2){1, 1}), 1);
+  expect_eq(i64x2_all_true((i64x2){0, 0}), 0);
+  expect_eq(i64x2_all_true((i64x2){1, 0}), 0);
+  expect_eq(i64x2_all_true((i64x2){1, 1}), 1);
+#endif // __wasm_unimplemented_simd128__
+  expect_vec(i64x2_shl((i64x2){1, 0x8000000000000000}, 1), ((i64x2){2, 0}));
+  expect_vec(i64x2_shl((i64x2){1, 0x8000000000000000}, 64), ((i64x2){1, 0x8000000000000000}));
+  expect_vec(i64x2_shr_s((i64x2){1, 0x8000000000000000}, 1), ((i64x2){0, 0xc000000000000000}));
+  expect_vec(i64x2_shr_s((i64x2){1, 0x8000000000000000}, 64), ((i64x2){1, 0x8000000000000000}));
+  expect_vec(i64x2_shr_u((u64x2){1, 0x8000000000000000}, 1), ((u64x2){0, 0x4000000000000000}));
+  expect_vec(i64x2_shr_u((u64x2){1, 0x8000000000000000}, 64), ((u64x2){1, 0x8000000000000000}));
+  expect_vec(i64x2_add((i64x2){0x8000000000000001, 42}, (i64x2){0x8000000000000001, 0}), ((i64x2){2, 42}));
+  expect_vec(i64x2_sub((i64x2){2, 42}, (i64x2){0x8000000000000001, 0}), ((i64x2){0x8000000000000001, 42}));
+
+  // f32x4 arithmetic
+  expect_vec(f32x4_abs((f32x4){-0., NAN, -INFINITY, 5}), ((f32x4){0, NAN, INFINITY, 5}));
+  expect_vec(f32x4_neg((f32x4){-0., NAN, -INFINITY, 5}), ((f32x4){0, -NAN, INFINITY, -5}));
+#ifdef __wasm_unimplemented_simd128__
+  expect_vec(f32x4_sqrt((f32x4){-0., NAN, INFINITY, 4}), ((f32x4){-0., NAN, INFINITY, 2}));
+#endif // __wasm_unimplemented_simd128__
+  expect_vec(f32x4_add((f32x4){NAN, -NAN, INFINITY, 42}, (f32x4){42, INFINITY, INFINITY, 1}), ((f32x4){NAN, -NAN, INFINITY, 43}));
+  expect_vec(f32x4_sub((f32x4){NAN, -NAN, INFINITY, 42}, (f32x4){42, INFINITY, -INFINITY, 1}), ((f32x4){NAN, -NAN, INFINITY, 41}));
+  expect_vec(f32x4_mul((f32x4){NAN, -NAN, INFINITY, 42}, (f32x4){42, INFINITY, INFINITY, 2}), ((f32x4){NAN, -NAN, INFINITY, 84}));
+  expect_vec(f32x4_div((f32x4){NAN, -NAN, INFINITY, 42}, (f32x4){42, INFINITY, 2, 2}), ((f32x4){NAN, -NAN, INFINITY, 21}));
+  // expect_vec(f32x4_min((f32x4){-0., 0, NAN, 5}, (f32x4){0, -0., 5, NAN}), ((f32x4){-0., -0., NAN, NAN}));
+  // expect_vec(f32x4_max((f32x4){-0., 0, NAN, 5}, (f32x4){0, -0., 5, NAN}), ((f32x4){0, 0, NAN, NAN}));
+
+  // f64x2 arithmetic
+#ifdef __wasm_unimplemented_simd128__
+  expect_vec(f64x2_abs((f64x2){-0., NAN}), ((f64x2){0, NAN}));
+  expect_vec(f64x2_abs((f64x2){-INFINITY, 5}), ((f64x2){INFINITY, 5}));
+#endif // __wasm_unimplemented_simd128__
+  expect_vec(f64x2_neg((f64x2){-0., NAN}), ((f64x2){0, -NAN}));
+  expect_vec(f64x2_neg((f64x2){-INFINITY, 5}), ((f64x2){INFINITY, -5}));
+#ifdef __wasm_unimplemented_simd128__
+  expect_vec(f64x2_sqrt((f64x2){-0., NAN}), ((f64x2){-0., NAN}));
+  expect_vec(f64x2_sqrt((f64x2){INFINITY, 4}), ((f64x2){INFINITY, 2}));
+#endif // __wasm_unimplemented_simd128__
+  expect_vec(f64x2_add((f64x2){NAN, -NAN}, (f64x2){42, INFINITY}), ((f64x2){NAN, -NAN}));
+  expect_vec(f64x2_add((f64x2){INFINITY, 42}, (f64x2){INFINITY, 1}), ((f64x2){INFINITY, 43}));
+  expect_vec(f64x2_sub((f64x2){NAN, -NAN}, (f64x2){42, INFINITY}), ((f64x2){NAN, -NAN}));
+  expect_vec(f64x2_sub((f64x2){INFINITY, 42}, (f64x2){-INFINITY, 1}), ((f64x2){INFINITY, 41}));
+  expect_vec(f64x2_mul((f64x2){NAN, -NAN}, (f64x2){42, INFINITY}), ((f64x2){NAN, -NAN}));
+  expect_vec(f64x2_mul((f64x2){INFINITY, 42}, (f64x2){INFINITY, 2}), ((f64x2){INFINITY, 84}));
+  expect_vec(f64x2_div((f64x2){NAN, -NAN}, (f64x2){42, INFINITY}), ((f64x2){NAN, -NAN}));
+  expect_vec(f64x2_div((f64x2){INFINITY, 42}, (f64x2){2, 2}), ((f64x2){INFINITY, 21}));
+#ifdef __wasm_unimplemented_simd128__
+  expect_vec(f64x2_min((f64x2){-0., 0}, (f64x2){0, -0}), ((f64x2){-0., -0}));
+  expect_vec(f64x2_min((f64x2){NAN, 5}, (f64x2){5, NAN}), ((f64x2){NAN, NAN}));
+  expect_vec(f64x2_max((f64x2){-0., 0}, (f64x2){0, -0}), ((f64x2){0, 0}));
+  expect_vec(f64x2_max((f64x2){NAN, 5}, (f64x2){5, NAN}), ((f64x2){NAN, NAN}));
+#endif // __wasm_unimplemented_simd128__
+
+  // conversions
+  expect_vec(i32x4_trunc_s_f32x4_sat((f32x4){42, NAN, INFINITY, -INFINITY}), ((i32x4){42, 0, 2147483647, -2147483648ll}));
+  expect_vec(i32x4_trunc_u_f32x4_sat((f32x4){42, NAN, INFINITY, -INFINITY}), ((i32x4){42, 0, 4294967295ull, 0}));
+#ifdef __wasm_unimplemented_simd128__
+  expect_vec(i64x2_trunc_s_f64x2_sat((f64x2){42, NAN}), ((i64x2){42, 0}));
+  expect_vec(i64x2_trunc_s_f64x2_sat((f64x2){INFINITY, -INFINITY}), ((i64x2){9223372036854775807ll, -9223372036854775807ll - 1}));
+  expect_vec(i64x2_trunc_u_f64x2_sat((f64x2){42, NAN}), ((i64x2){42, 0}));
+  expect_vec(i64x2_trunc_u_f64x2_sat((f64x2){INFINITY, -INFINITY}), ((i64x2){18446744073709551615ull, 0}));
+#endif // __wasm_unimplemented_simd128__
+  expect_vec(f32x4_convert_s_i32x4((i32x4){0, -1, 2147483647, -2147483647 - 1}), ((f32x4){0, -1, 2147483648., -2147483648.}));
+  expect_vec(f32x4_convert_u_i32x4((i32x4){0, -1, 2147483647, -2147483647 - 1}), ((f32x4){0, 4294967296., 2147483648., 2147483648.}));
+  expect_vec(f64x2_convert_s_i64x2((i64x2){0, -1}), ((f64x2){0, -1}));
+  expect_vec(f64x2_convert_s_i64x2((i64x2){9223372036854775807, -9223372036854775807 - 1}), ((f64x2){9223372036854775807., -9223372036854775808.}));
+  expect_vec(f64x2_convert_u_i64x2((i64x2){0, -1}), ((f64x2){0, 18446744073709551616.}));
+  expect_vec(f64x2_convert_u_i64x2((i64x2){9223372036854775807 , -9223372036854775808.}), ((f64x2){9223372036854775807., 9223372036854775808.}));
+
+  if (failures == 0) {
+    printf("Success!\n");
+  } else {
+    printf("Failed :(\n");
+  }
+}

From cfa587daca43f9390ecec12ebb98f9b1c6b82d27 Mon Sep 17 00:00:00 2001
From: Thomas Lively <tlively@google.com>
Date: Mon, 6 May 2019 21:54:26 -0700
Subject: [PATCH 03/16] Fix tests and unimplemented-simd128 build

TODO: truncation, conversion, and shufflevector
---
 system/include/simd128.h          | 28 ++++++++++++++--------------
 tests/test_wasm_intrinsics_simd.c | 14 +++++++-------
 2 files changed, 21 insertions(+), 21 deletions(-)

diff --git a/system/include/simd128.h b/system/include/simd128.h
index 6ddeb07c13923..08ee960ec1ab5 100644
--- a/system/include/simd128.h
+++ b/system/include/simd128.h
@@ -39,12 +39,12 @@ static __inline__ void __DEFAULT_FN_ATTRS wasm_v128_store(v128* mem, v128 a) {
 }
 
 // v128 wasm_v128_constant(...)
-static __inline__ v128 __DEFAULT_FN_ATTRS wasm_v128_const(int8_t c15,
-  int8_t c14, int8_t c13, int8_t c12, int8_t c11, int8_t c10,
-  int8_t c9,  int8_t c8,  int8_t c7,  int8_t c6,  int8_t c5,
-  int8_t c4,  int8_t c3,  int8_t c2,  int8_t c1,  int8_t c0) {
-  return (i8x16){c0, c1, c2,  c3,  c4,  c5,  c6,  c7,
-                 c8, c9, c10, c11, c12, c13, c14, c15};
+static __inline__ v128 __DEFAULT_FN_ATTRS wasm_v128_const(int8_t c0,
+  int8_t c1, int8_t c2, int8_t c3, int8_t c4, int8_t c5,
+  int8_t c6,  int8_t c7,  int8_t c8,  int8_t c9,  int8_t c10,
+  int8_t c11,  int8_t c12,  int8_t c13,  int8_t c14,  int8_t c15) {
+  return (v128){c0, c1, c2,  c3,  c4,  c5,  c6,  c7,
+                c8, c9, c10, c11, c12, c13, c14, c15};
 }
 
 // i8x16 wasm_i8x16_splat(int8_t a)
@@ -58,7 +58,7 @@ static __inline__ i8x16 __DEFAULT_FN_ATTRS wasm_i8x16_splat(int8_t a) {
 
 // int8_t wasm_u8x16_extract_lane(u8x16 a, imm)
 #define wasm_u8x16_extract_lane(a, b) \
-  (__builtin_wasm_extract_lane_u_i8x16((__i8x16))(a), b))
+  (__builtin_wasm_extract_lane_u_i8x16((__i8x16)(a), b))
 
 // i8x16 wasm_i8x16_replace_lane(i8x16 a, imm i, int8_t b)
 #define wasm_i8x16_replace_lane(a, i, b) \
@@ -105,7 +105,7 @@ static __inline__ i64x2 __DEFAULT_FN_ATTRS wasm_i64x2_splat(int64_t a) {
 
 // i8x16 wasm_i64x2_replace_lane(i8x16 a, imm i, int64_t b)
 #define wasm_i64x2_replace_lane(a, i, b) \
-  ((i64x2)__builtin_wasm_replace_lane_i64x2((__i64x1)(a), i, b))
+  ((i64x2)__builtin_wasm_replace_lane_i64x2((__i64x2)(a), i, b))
 
 // f32x4 wasm_f32x4_splat(float a)
 static __inline__ f32x4 __DEFAULT_FN_ATTRS wasm_f32x4_splat(float a) {
@@ -119,7 +119,7 @@ static __inline__ f32x4 __DEFAULT_FN_ATTRS wasm_f32x4_splat(float a) {
 #define wasm_f32x4_replace_lane(a, i, b) \
   (__builtin_wasm_replace_lane_f32x4(a, i, b))
 
-#ifdef __wasm_undefined_simd128__
+#ifdef __wasm_unimplemented_simd128__
 
 // f64x2 wasm_f64x2_splat(double a)
 static __inline__ f64x2 __DEFAULT_FN_ATTRS wasm_f64x2_splat(double a) {
@@ -133,7 +133,7 @@ static __inline__ f64x2 __DEFAULT_FN_ATTRS wasm_f64x2_splat(double a) {
 #define wasm_f64x2_replace_lane(a, i, b) \
   (__builtin_wasm_replace_lane_f64x2(a, i, b))
 
-#endif // __wasm_undefined_simd128__
+#endif // __wasm_unimplemented_simd128__
 
 // i8x16 wasm_i8x16_eq(i8x16 a, i8x16 b)
 static __inline__ u8x16 __DEFAULT_FN_ATTRS wasm_i8x16_eq(i8x16 a, i8x16 b) {
@@ -314,7 +314,7 @@ static __inline__ u32x4 __DEFAULT_FN_ATTRS wasm_f32x4_ge(f32x4 a, f32x4 b) {
   return (u32x4)(a >= b);
 }
 
-#ifdef __wasm_undefined_simd128__
+#ifdef __wasm_unimplemented_simd128__
 
 // i64x2 wasm_f64x2_eq(f64x2 a, f64x2 b)
 static __inline__ u64x2 __DEFAULT_FN_ATTRS wasm_f64x2_eq(f64x2 a, f64x2 b) {
@@ -346,7 +346,7 @@ static __inline__ u64x2 __DEFAULT_FN_ATTRS wasm_f64x2_ge(f64x2 a, f64x2 b) {
   return (u64x2)(a >= b);
 }
 
-#endif // __wasm_undefined_simd128__
+#endif // __wasm_unimplemented_simd128__
 
 // v128 wasm_v128_not(v128 a)
 static __inline__ v128 __DEFAULT_FN_ATTRS wasm_v128_not(v128 a) {
@@ -633,7 +633,7 @@ static __inline__ f32x4 __DEFAULT_FN_ATTRS wasm_f32x4_max(f32x4 a, f32x4 b) {
   return __builtin_wasm_max_f32x4(a, b);
 }
 
-#ifdef __wasm_undefined_simd128__
+#ifdef __wasm_unimplemented_simd128__
 
 // f64x2  wasm_f64x2_abs(f64x2 a)
 static __inline__ f64x2 __DEFAULT_FN_ATTRS wasm_f64x2_abs(f64x2 a) {
@@ -680,7 +680,7 @@ static __inline__ f64x2 __DEFAULT_FN_ATTRS wasm_f64x2_max(f64x2 a, f64x2 b) {
   return __builtin_wasm_max_f64x2(a, b);
 }
 
-#endif
+#endif // __wasm_unimplemented_simd128__
 
 // // f32x4 wasm_convert_f32x4_i32x4(i32x4 a)
 // #define wasm_convert_f32x4_i32x4(v) (__builtin_convertvector(v, f32x4))
diff --git a/tests/test_wasm_intrinsics_simd.c b/tests/test_wasm_intrinsics_simd.c
index 659005743b3d4..dce6a417ab1ea 100644
--- a/tests/test_wasm_intrinsics_simd.c
+++ b/tests/test_wasm_intrinsics_simd.c
@@ -10,10 +10,10 @@ i8x16 TESTFN i8x16_load(i8x16 *ptr) {
   return (i8x16) wasm_v128_load(ptr);
 }
 void TESTFN i8x16_store(i8x16 *ptr, i8x16 vec) {
-  i8x16_store(ptr, vec);
+  wasm_v128_store(ptr, vec);
 }
 v128 TESTFN i32x4_const(void) {
-  return wasm_v128_const(0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15);
+  return wasm_v128_const(1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16);
 }
 i8x16 TESTFN i8x16_shuffle_interleave_bytes(i8x16 x, i8x16 y) {
   return __builtin_shufflevector(x, y, 0, 17, 2, 19, 4, 21, 6, 23, 8, 25, 10, 27, 12, 29, 14, 31);
@@ -437,7 +437,7 @@ f32x4 TESTFN f32x4_max(f32x4 x, f32x4 y) {
 }
 #ifdef __wasm_unimplemented_simd128__
 f64x2 TESTFN f64x2_abs(f64x2 vec) {
-  return __builtin_wasm_abs_f64x2(vec);
+  return wasm_f64x2_abs(vec);
 }
 #endif // __wasm_unimplemented_simd128__
 f64x2 TESTFN f64x2_neg(f64x2 vec) {
@@ -445,7 +445,7 @@ f64x2 TESTFN f64x2_neg(f64x2 vec) {
 }
 #ifdef __wasm_unimplemented_simd128__
 f64x2 TESTFN f64x2_sqrt(f64x2 vec) {
-  return __builtin_wasm_sqrt_f64x2(vec);
+  return wasm_f64x2_sqrt(vec);
 }
 #endif // __wasm_unimplemented_simd128__
 f64x2 TESTFN f64x2_add(f64x2 x, f64x2 y) {
@@ -462,10 +462,10 @@ f64x2 TESTFN f64x2_div(f64x2 x, f64x2 y) {
 }
 #ifdef __wasm_unimplemented_simd128__
 f64x2 TESTFN f64x2_min(f64x2 x, f64x2 y) {
-  return __builtin_wasm_min_f64x2(x, y);
+  return wasm_f64x2_min(x, y);
 }
 f64x2 TESTFN f64x2_max(f64x2 x, f64x2 y) {
-  return __builtin_wasm_max_f64x2(x, y);
+  return wasm_f64x2_max(x, y);
 }
 #endif // __wasm_unimplemented_simd128__
 i32x4 TESTFN i32x4_trunc_s_f32x4_sat(f32x4 vec) {
@@ -584,7 +584,7 @@ int EMSCRIPTEN_KEEPALIVE main(int argc, char** argv) {
     expect_vec(i8x16_load(&vec),
               (__extension__(i8x16){7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7}));
   }
-  expect_vec(i32x4_const(), ((v128)((i32x4){1, 2, 3, 4})));
+  expect_vec(i32x4_const(), ((v128){1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16}));
   expect_vec(
     i8x16_shuffle_interleave_bytes(
       (i8x16){1, 0, 3, 0, 5, 0, 7, 0, 9, 0, 11, 0, 13, 0, 15, 0},

From bc0bdcb5a1881d498b3aa372623e1e37746dbf0f Mon Sep 17 00:00:00 2001
From: Thomas Lively <tlively@google.com>
Date: Tue, 7 May 2019 17:50:23 -0700
Subject: [PATCH 04/16] Finish implementing instructions and clean up

---
 system/include/simd128.h          |  82 +++++++++++---
 tests/test_core.py                |   2 +-
 tests/test_wasm_intrinsics_simd.c | 172 ++++++++++++++++++++++--------
 3 files changed, 195 insertions(+), 61 deletions(-)

diff --git a/system/include/simd128.h b/system/include/simd128.h
index 08ee960ec1ab5..c7d8308ec05a2 100644
--- a/system/include/simd128.h
+++ b/system/include/simd128.h
@@ -12,11 +12,16 @@ typedef int16_t i16x8 __attribute__((__vector_size__(16)));
 typedef uint16_t u16x8 __attribute__((__vector_size__(16)));
 typedef int32_t i32x4 __attribute__((__vector_size__(16)));
 typedef uint32_t u32x4 __attribute__((__vector_size__(16)));
+typedef float f32x4 __attribute__((__vector_size__(16)));
+
+#ifdef __wasm_unimplemented_simd128__
+
 typedef int64_t i64x2 __attribute__((__vector_size__(16)));
 typedef uint64_t u64x2 __attribute__((__vector_size__(16)));
-typedef float f32x4 __attribute__((__vector_size__(16)));
 typedef double f64x2 __attribute__((__vector_size__(16)));
 
+#endif // __wasm_unimplemented_simd128__
+
 // Internal types
 typedef char __v128 __attribute__((__vector_size__(16)));
 typedef char  __i8x16 __attribute__((__vector_size__(16)));
@@ -28,14 +33,13 @@ typedef long long __i64x2 __attribute__((__vector_size__(16)));
 #define __DEFAULT_FN_ATTRS __attribute__((__always_inline__, __nodebug__))
 
 // v128 wasm_v128_load(v128* mem)
-static __inline__ i8x16 __DEFAULT_FN_ATTRS wasm_v128_load(i8x16* mem) {
-  return __extension__(i8x16)(*mem);
+static __inline__ v128 __DEFAULT_FN_ATTRS wasm_v128_load(v128* mem) {
+  return (*mem);
 }
 
 // wasm_v128_store(v128 *mem, v128 a)
 static __inline__ void __DEFAULT_FN_ATTRS wasm_v128_store(v128* mem, v128 a) {
-  *(i8x16*)mem = a;
-  return;
+  *mem = a;
 }
 
 // v128 wasm_v128_constant(...)
@@ -74,10 +78,12 @@ static __inline__ i16x8 __DEFAULT_FN_ATTRS wasm_i16x8_splat(int16_t a) {
   (__builtin_wasm_extract_lane_s_i16x8((__i16x8)(a), b))
 
 #ifdef __wasm_unimplemented_simd128__
+
 // int16_t wasm_u16x8_extract_lane(u16x8 a, imm)
 #define wasm_u16x8_extract_lane(a, b) \
   (__builtin_wasm_extract_lane_u_i16x8(a, b))
-#endif
+
+#endif // __wasm_unimplemented_simd128__
 
 // i16x8 wasm_i16x8_replace_lane(i16x8 a, imm i, int16_t b)
 #define wasm_i16x8_replace_lane(a, i, b) \
@@ -95,11 +101,15 @@ static __inline__ i32x4 __DEFAULT_FN_ATTRS wasm_i32x4_splat(int32_t a) {
 #define wasm_i32x4_replace_lane(a, i, b) \
     ((i32x4)__builtin_wasm_replace_lane_i32x4((__i32x4)(a), i, b))
 
+#ifdef __wasm_unimplemented_simd128__
+
 // i64x2 wasm_i64x2_splat(int64_t a)
 static __inline__ i64x2 __DEFAULT_FN_ATTRS wasm_i64x2_splat(int64_t a) {
   return (i64x2){a, a};
 }
 
+#endif // __wasm_unimplemented_simd128__
+
 // int64_t wasm_i64x2_extract_lane(i8x16, imm)
 #define wasm_i64x2_extract_lane(a, b) (__builtin_wasm_extract_lane_i64x2(a, b))
 
@@ -548,6 +558,8 @@ static __inline__ i32x4 __DEFAULT_FN_ATTRS wasm_i32x4_mul(i32x4 a, i32x4 b) {
   return a * b;
 }
 
+#ifdef __wasm_unimplemented_simd128__
+
 // i64x2 wasm_i64x2_neg(i64x2 a)
 static __inline__ i64x2 __DEFAULT_FN_ATTRS wasm_i64x2_neg(i64x2 a) {
   return -a;
@@ -588,6 +600,8 @@ static __inline__ i64x2 __DEFAULT_FN_ATTRS wasm_i64x2_sub(i64x2 a, i64x2 b) {
   return a - b;
 }
 
+#endif // __wasm_unimplemented_simd128__
+
 // f32x4  wasm_f32x4_abs(f32x4 a)
 static __inline__ f32x4 __DEFAULT_FN_ATTRS wasm_f32x4_abs(f32x4 a) {
   return __builtin_wasm_abs_f32x4(a);
@@ -682,18 +696,52 @@ static __inline__ f64x2 __DEFAULT_FN_ATTRS wasm_f64x2_max(f64x2 a, f64x2 b) {
 
 #endif // __wasm_unimplemented_simd128__
 
-// // f32x4 wasm_convert_f32x4_i32x4(i32x4 a)
-// #define wasm_convert_f32x4_i32x4(v) (__builtin_convertvector(v, f32x4))
+static __inline__ i32x4 __DEFAULT_FN_ATTRS wasm_trunc_saturate_i32x4_f32x4(f32x4 a) {
+  return (i32x4)__builtin_wasm_trunc_saturate_s_i32x4_f32x4(a);
+}
+
+static __inline__ u32x4 __DEFAULT_FN_ATTRS wasm_trunc_saturate_u32x4_f32x4(f32x4 a) {
+  return (u32x4)__builtin_wasm_trunc_saturate_u_i32x4_f32x4(a);
+}
+
+#ifdef __wasm_unimplemented_simd128__
+
+static __inline__ i64x2 __DEFAULT_FN_ATTRS wasm_trunc_saturate_i64x2_f64x2(f64x2 a) {
+  return __builtin_wasm_trunc_saturate_s_i64x2_f64x2(a);
+}
+
+static __inline__ u64x2 __DEFAULT_FN_ATTRS wasm_trunc_saturate_u64x2_f64x2(f64x2 a) {
+  return (u64x2)__builtin_wasm_trunc_saturate_s_i64x2_f64x2(a);
+}
+
+#endif // __wasm_unimplemented_simd128__
+
+// f32x4 wasm_convert_f32x4_i32x4(i32x4 a)
+static __inline__ f32x4 __DEFAULT_FN_ATTRS wasm_convert_f32x4_i32x4(i32x4 v) {
+  return __builtin_convertvector(v, f32x4);
+}
 
-// // f32x4 wasm_convert_f32x4_u32x4(u32x4 a)
-// #define wasm_convert_f32x4_u32x4(v) (__builtin_convertvector(v, f32x4))
+// f32x4 wasm_convert_f32x4_u32x4(u32x4 a)
+static __inline__ f32x4 __DEFAULT_FN_ATTRS wasm_convert_f32x4_u32x4(u32x4 v) {
+  return __builtin_convertvector(v, f32x4);
+}
 
-// // f64x2 wasm_convert_f64x2_i64x2(i64x2 a)
-// #define wasm_convert_f64x2_i64x2(v) (__builtin_convertvector(v, f64x2))
+#ifdef __wasm_unimplemented_simd128__
 
-// // f64x2 wasm_convert_f64x2_u64x2(u64x2 a)
-// #define wasm_convert_f64x2_u64x2(v) (__builtin_convertvector(v, f64x2))
+// f64x2 wasm_convert_f64x2_i64x2(i64x2 a)
+static __inline__ f64x2 __DEFAULT_FN_ATTRS wasm_convert_f64x2_i64x2(i64x2 v) {
+  return __builtin_convertvector(v, f64x2);
+}
+
+// f64x2 wasm_convert_f64x2_u64x2(u64x2 a)
+static __inline__ f64x2 __DEFAULT_FN_ATTRS wasm_convert_f64x2_u64x2(u64x2 v) {
+  return __builtin_convertvector(v, f64x2);
+}
+
+#endif // __wasm_unimplemented_simd128__
 
-// not sure how this should work with variable input
-// #define wasm_i8x16_shuffle(a, b) \
-//  (__builtin_shufflevector(a, b, 0, 1, 2, 3, 4, 5, 6, 7))
+#define wasm_v8x16_shuffle(a, b, c0, c1, c2, c3, c4, c5, c6, c7,        \
+                           c8, c9, c10, c11, c12, c13, c14, c15)        \
+  ((v128)(__builtin_shufflevector((v128)(a), (v128)(b),                 \
+                                  c0, c1, c2, c3, c4, c5, c6, c7,       \
+                                  c8, c9, c10, c11, c12, c13, c14, c15)))
diff --git a/tests/test_core.py b/tests/test_core.py
index 03299b1e3b772..c10f8f1661b32 100644
--- a/tests/test_core.py
+++ b/tests/test_core.py
@@ -5495,7 +5495,7 @@ def test_wasm_builtin_simd(self, js_engines):
 
   @wasm_simd
   def test_wasm_intrinsics_simd(self, js_engines):
-    self.emcc_args.append('-Wpedantic')
+    self.emcc_args.extend(['-Wpedantic', '-Werror', '-Wall'])
     self.do_run(open(path_from_root('tests', 'test_wasm_intrinsics_simd.c')).read(), 'Success!',
                 js_engines=js_engines)
     self.emcc_args.append('-munimplemented-simd128')
diff --git a/tests/test_wasm_intrinsics_simd.c b/tests/test_wasm_intrinsics_simd.c
index dce6a417ab1ea..a5635c005a21c 100644
--- a/tests/test_wasm_intrinsics_simd.c
+++ b/tests/test_wasm_intrinsics_simd.c
@@ -15,29 +15,33 @@ void TESTFN i8x16_store(i8x16 *ptr, i8x16 vec) {
 v128 TESTFN i32x4_const(void) {
   return wasm_v128_const(1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16);
 }
-i8x16 TESTFN i8x16_shuffle_interleave_bytes(i8x16 x, i8x16 y) {
-  return __builtin_shufflevector(x, y, 0, 17, 2, 19, 4, 21, 6, 23, 8, 25, 10, 27, 12, 29, 14, 31);
+v128 TESTFN i8x16_shuffle_interleave_bytes(i8x16 x, i8x16 y) {
+  return wasm_v8x16_shuffle(x, y, 0, 17, 2, 19, 4, 21, 6, 23, 8, 25, 10, 27, 12, 29, 14, 31);
 }
-i32x4 TESTFN i32x4_shuffle_reverse(i32x4 vec) {
-  return __builtin_shufflevector(vec, vec, 3, 2, 1, 0);
+v128 TESTFN i32x4_shuffle_reverse(i32x4 vec) {
+  return wasm_v8x16_shuffle(vec, vec, 12, 13, 14, 15, 8, 9, 10, 11, 4, 5, 6, 7, 0, 1, 2, 3);
 }
 i8x16 TESTFN i8x16_splat(int32_t x) {
   return wasm_i8x16_splat(x);
 }
 int32_t TESTFN i8x16_extract_lane_s_first(i8x16 vec) {
-      return wasm_i8x16_extract_lane(vec, 0);
+  return wasm_i8x16_extract_lane(vec, 0);
 }
 int32_t TESTFN i8x16_extract_lane_s_last(i8x16 vec) {
-        return wasm_i8x16_extract_lane(vec, 15);
+  return wasm_i8x16_extract_lane(vec, 15);
 }
+
 #ifdef __wasm_unimplemented_simd128__
+
 uint32_t TESTFN i8x16_extract_lane_u_first(i8x16 vec) {
   return wasm_u8x16_extract_lane(vec, 0);
 }
 uint32_t TESTFN i8x16_extract_lane_u_last(i8x16 vec) {
   return wasm_u8x16_extract_lane(vec, 15);
 }
+
 #endif // __wasm_unimplemented_simd128__
+
 i8x16 TESTFN i8x16_replace_lane_first(i8x16 vec, int32_t val) {
   return wasm_i8x16_replace_lane(vec, 0, val);
 }
@@ -53,14 +57,18 @@ int32_t TESTFN i16x8_extract_lane_s_first(i16x8 vec) {
 int32_t TESTFN i16x8_extract_lane_s_last(i16x8 vec) {
   return wasm_i16x8_extract_lane(vec, 7);
 }
+
 #ifdef __wasm_unimplemented_simd128__
+
 int32_t TESTFN i16x8_extract_lane_u_first(i16x8 vec) {
     return wasm_u16x8_extract_lane(vec, 0);
 }
 int32_t TESTFN i16x8_extract_lane_u_last(i16x8 vec) {
   return wasm_u16x8_extract_lane(vec, 7);
 }
+
 #endif // __wasm_unimplemented_simd128__
+
 i16x8 TESTFN i16x8_replace_lane_first(i16x8 vec, int32_t val) {
   return wasm_i16x8_replace_lane(vec, 0, val);
 }
@@ -82,10 +90,12 @@ i32x4 TESTFN i32x4_replace_lane_first(i32x4 vec, int32_t val) {
 i32x4 TESTFN i32x4_replace_lane_last(i32x4 vec, int32_t val) {
   return wasm_i32x4_replace_lane(vec, 3, val);
 }
+
+#ifdef __wasm_unimplemented_simd128__
+
 i64x2 TESTFN i64x2_splat(int64_t x) {
   return wasm_i64x2_splat(x);
 }
-#ifdef __wasm_unimplemented_simd128__
 int64_t TESTFN i64x2_extract_lane_first(i64x2 vec) {
   return wasm_i64x2_extract_lane(vec, 0);
 }
@@ -98,7 +108,9 @@ i64x2 TESTFN i64x2_replace_lane_first(i64x2 vec, int64_t val) {
 i64x2 TESTFN i64x2_replace_lane_last(i64x2 vec, int64_t val) {
   return wasm_i64x2_replace_lane(vec, 1, val);
 }
+
 #endif // __wasm_unimplemented_simd128__
+
 f32x4 TESTFN f32x4_splat(float x) {
   return wasm_f32x4_splat(x);
 }
@@ -114,7 +126,9 @@ f32x4 TESTFN f32x4_replace_lane_first(f32x4 vec, float val) {
 f32x4 TESTFN f32x4_replace_lane_last(f32x4 vec, float val) {
   return wasm_f32x4_replace_lane(vec, 3, val);
 }
+
 #ifdef __wasm_unimplemented_simd128__
+
 f64x2 TESTFN f64x2_splat(int64_t x) {
   return wasm_f64x2_splat((double ) x);
 }
@@ -130,7 +144,9 @@ f64x2 TESTFN f64x2_replace_lane_first(f64x2 vec, double val) {
 f64x2 TESTFN f64x2_replace_lane_last(f64x2 vec, double val) {
   return wasm_f64x2_replace_lane(vec, 1, val);
 }
+
 #endif // __wasm_unimplemented_simd128__
+
 u8x16 TESTFN i8x16_eq(i8x16 x, i8x16 y) {
   return wasm_i8x16_eq(x, y);
 }
@@ -239,7 +255,9 @@ u32x4 TESTFN f32x4_le(f32x4 x, f32x4 y) {
 u32x4 TESTFN f32x4_ge(f32x4 x, f32x4 y) {
   return wasm_f32x4_ge(x, y);
 }
+
 #ifdef __wasm_undefined_simd128__
+
 u64x2 TESTFN f64x2_eq(f64x2 x, f64x2 y) {
   return wasm_f64x2_eq(x,y);
 }
@@ -258,7 +276,9 @@ u64x2 TESTFN f64x2_le(f64x2 x, f64x2 y) {
 u64x2 TESTFN f64x2_ge(f64x2 x, f64x2 y) {
   return wasm_f64x2_ge(x, y);
 }
+
 #endif // __wasm_undefined_simd128__
+
 v128 TESTFN v128_not(v128 vec) {
   return wasm_v128_not(vec);
 }
@@ -379,17 +399,25 @@ i32x4 TESTFN i32x4_sub(i32x4 x, i32x4 y) {
 i32x4 TESTFN i32x4_mul(i32x4 x, i32x4 y) {
   return wasm_i32x4_mul(x, y);
 }
+
+#ifdef __wasm_unimplemented_simd128__
+
+
 i64x2 TESTFN i64x2_neg(i64x2 vec) {
   return wasm_i64x2_neg(vec);
 }
+
 #ifdef __wasm_unimplemented_simd128__
+
 bool TESTFN i64x2_any_true(i64x2 vec) {
   return wasm_i64x2_any_true(vec);
 }
 bool TESTFN i64x2_all_true(i64x2 vec) {
   return wasm_i64x2_all_true(vec);
 }
+
 #endif // __wasm_unimplemented_simd128__
+
 i64x2 TESTFN i64x2_shl(i64x2 vec, int32_t shift) {
   return wasm_i64x2_shl(vec, shift);
 }
@@ -406,17 +434,24 @@ i64x2 TESTFN i64x2_add(i64x2 x, i64x2 y) {
 i64x2 TESTFN i64x2_sub(i64x2 x, i64x2 y) {
   return wasm_i64x2_sub(x, y);
 }
+
+#endif // __wasm_unimplemented_simd128__
+
 f32x4 TESTFN f32x4_abs(f32x4 vec) {
   return wasm_f32x4_abs(vec);
 }
 f32x4 TESTFN f32x4_neg(f32x4 vec) {
   return wasm_f32x4_neg(vec);
 }
+
 #ifdef __wasm_unimplemented_simd128__
+
 f32x4 TESTFN f32x4_sqrt(f32x4 vec) {
   return wasm_f32x4_sqrt(vec);
 }
+
 #endif // __wasm_unimplemented_simd128__
+
 f32x4 TESTFN f32x4_add(f32x4 x, f32x4 y) {
   return wasm_f32x4_add(x,y);
 }
@@ -435,19 +470,18 @@ f32x4 TESTFN f32x4_min(f32x4 x, f32x4 y) {
 f32x4 TESTFN f32x4_max(f32x4 x, f32x4 y) {
   return wasm_f32x4_max(x, y);
 }
+
 #ifdef __wasm_unimplemented_simd128__
+
 f64x2 TESTFN f64x2_abs(f64x2 vec) {
   return wasm_f64x2_abs(vec);
 }
-#endif // __wasm_unimplemented_simd128__
 f64x2 TESTFN f64x2_neg(f64x2 vec) {
   return -vec;
 }
-#ifdef __wasm_unimplemented_simd128__
 f64x2 TESTFN f64x2_sqrt(f64x2 vec) {
   return wasm_f64x2_sqrt(vec);
 }
-#endif // __wasm_unimplemented_simd128__
 f64x2 TESTFN f64x2_add(f64x2 x, f64x2 y) {
   return x + y;
 }
@@ -460,41 +494,51 @@ f64x2 TESTFN f64x2_mul(f64x2 x, f64x2 y) {
 f64x2 TESTFN f64x2_div(f64x2 x, f64x2 y) {
   return x / y;
 }
-#ifdef __wasm_unimplemented_simd128__
 f64x2 TESTFN f64x2_min(f64x2 x, f64x2 y) {
   return wasm_f64x2_min(x, y);
 }
 f64x2 TESTFN f64x2_max(f64x2 x, f64x2 y) {
   return wasm_f64x2_max(x, y);
 }
+
 #endif // __wasm_unimplemented_simd128__
+
 i32x4 TESTFN i32x4_trunc_s_f32x4_sat(f32x4 vec) {
-  return __builtin_wasm_trunc_saturate_s_i32x4_f32x4(vec);
+  return wasm_trunc_saturate_i32x4_f32x4(vec);
 }
-i32x4 TESTFN i32x4_trunc_u_f32x4_sat(f32x4 vec) {
-  return __builtin_wasm_trunc_saturate_u_i32x4_f32x4(vec);
+u32x4 TESTFN i32x4_trunc_u_f32x4_sat(f32x4 vec) {
+  return wasm_trunc_saturate_u32x4_f32x4(vec);
 }
+
 #ifdef __wasm_unimplemented_simd128__
+
 i64x2 TESTFN i64x2_trunc_s_f64x2_sat(f64x2 vec) {
-  return __builtin_wasm_trunc_saturate_s_i64x2_f64x2(vec);
+  return wasm_trunc_saturate_i64x2_f64x2(vec);
 }
-i64x2 TESTFN i64x2_trunc_u_f64x2_sat(f64x2 vec) {
-  return __builtin_wasm_trunc_saturate_u_i64x2_f64x2(vec);
+u64x2 TESTFN i64x2_trunc_u_f64x2_sat(f64x2 vec) {
+  return wasm_trunc_saturate_u64x2_f64x2(vec);
 }
+
 #endif // __wasm_unimplemented_simd128__
+
 f32x4 TESTFN f32x4_convert_s_i32x4(i32x4 vec) {
-  return __builtin_convertvector(vec, f32x4);
+  return wasm_convert_f32x4_i32x4(vec);
 }
-f32x4 TESTFN f32x4_convert_u_i32x4(i32x4 vec) {
-  return __builtin_convertvector((u32x4)vec, f32x4);
+f32x4 TESTFN f32x4_convert_u_i32x4(u32x4 vec) {
+  return wasm_convert_f32x4_u32x4(vec);
 }
+
+#ifdef __wasm_unimplemented_simd128__
+
 f64x2 TESTFN f64x2_convert_s_i64x2(i64x2 vec) {
-  return __builtin_convertvector(vec, f64x2);
+  return wasm_convert_f64x2_i64x2(vec);
 }
-f64x2 TESTFN f64x2_convert_u_i64x2(i64x2 vec) {
-  return __builtin_convertvector((u64x2)vec, f64x2);
+f64x2 TESTFN f64x2_convert_u_i64x2(u64x2 vec) {
+  return wasm_convert_f64x2_u64x2(vec);
 }
 
+#endif // __wasm_unimplemented_simd128__
+
 static int failures = 0;
 
 #define formatter(x) _Generic((x),                        \
@@ -537,6 +581,16 @@ static int failures = 0;
       }                                                         \
     })
 
+#ifdef __wasm_unimplemented_simd128__
+#define UNIMPLEMENTED_TYPES                                     \
+  ,                                                             \
+  i64x2: 2,                                                     \
+  u64x2: 2,                                                     \
+  f64x2: 2
+#else
+#define UNIMPLEMENTED_TYPES
+#endif // __wasm_unimplemented_simd128__
+
 #define expect_vec(_a, _b) __extension__({                      \
       __typeof__(_a) a = (_a), b = (_b);                        \
       bool err = false;                                         \
@@ -547,10 +601,9 @@ static int failures = 0;
                               u16x8: 8,                         \
                               i32x4: 4,                         \
                               u32x4: 4,                         \
-                              i64x2: 2,                         \
-                              u64x2: 2,                         \
-                              f32x4: 4,                         \
-                              f64x2: 2);                        \
+                              f32x4: 4                          \
+                              UNIMPLEMENTED_TYPES               \
+                     );                                         \
       for (size_t i = 0; i < lanes; i++) {                      \
         if (!eq(a[i], b[i])) {                                  \
           err = true;                                           \
@@ -590,19 +643,23 @@ int EMSCRIPTEN_KEEPALIVE main(int argc, char** argv) {
       (i8x16){1, 0, 3, 0, 5, 0, 7, 0, 9, 0, 11, 0, 13, 0, 15, 0},
       (i8x16){0, 2, 0, 4, 0, 6, 0, 8, 0, 10, 0, 12, 0, 14, 0, 16}
     ),
-    ((i8x16){1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16})
+    ((v128)(i8x16){1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16})
   );
-  expect_vec(i32x4_shuffle_reverse((i32x4){1, 2, 3, 4}), ((i32x4){4, 3, 2, 1}));
+  expect_vec(i32x4_shuffle_reverse((i32x4){1, 2, 3, 4}), ((v128)(i32x4){4, 3, 2, 1}));
 
   // i8x16 lane accesses
   expect_vec(i8x16_splat(5), ((i8x16){5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5}));
   expect_vec(i8x16_splat(257), ((i8x16){1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1}));
   expect_eq(i8x16_extract_lane_s_first((i8x16){-1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0}), -1);
   expect_eq(i8x16_extract_lane_s_last((i8x16){0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, -1}), -1);
+
 #ifdef __wasm_unimplemented_simd128__
+
   expect_eq(i8x16_extract_lane_u_first((i8x16){-1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0}), 255);
   expect_eq(i8x16_extract_lane_u_last((i8x16){0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, -1}), 255);
+
 #endif // __wasm_unimplemented_simd128__
+
   expect_vec(
     i8x16_replace_lane_first((i8x16){0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0}, 7),
     ((i8x16){7, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0})
@@ -617,10 +674,14 @@ int EMSCRIPTEN_KEEPALIVE main(int argc, char** argv) {
   expect_vec(i16x8_splat(65537), ((i16x8){1, 1, 1, 1, 1, 1, 1, 1}));
   expect_eq(i16x8_extract_lane_s_first((i16x8){-1, 0, 0, 0, 0, 0, 0, 0}), -1);
   expect_eq(i16x8_extract_lane_s_last((i16x8){0, 0, 0, 0, 0, 0, 0, -1}), -1);
+
 #ifdef __wasm_unimplemented_simd128__
+
   expect_eq(i16x8_extract_lane_u_first((i16x8){-1, 0, 0, 0, 0, 0, 0, 0}), 65535);
   expect_eq(i16x8_extract_lane_u_last((i16x8){0, 0, 0, 0, 0, 0, 0, -1}), 65535);
+
 #endif // __wasm_unimplemented_simd128__
+
   expect_vec(i16x8_replace_lane_first((i16x8){0, 0, 0, 0, 0, 0, 0, 0}, 7), ((i16x8){7, 0, 0, 0, 0, 0, 0, 0}));
   expect_vec(i16x8_replace_lane_last((i16x8){0, 0, 0, 0, 0, 0, 0, 0}, 7), ((i16x8){0, 0, 0, 0, 0, 0, 0, 7}));
 
@@ -631,13 +692,16 @@ int EMSCRIPTEN_KEEPALIVE main(int argc, char** argv) {
   expect_vec(i32x4_replace_lane_first((i32x4){0, 0, 0, 0}, 53), ((i32x4){53, 0, 0, 0}));
   expect_vec(i32x4_replace_lane_last((i32x4){0, 0, 0, 0}, 53), ((i32x4){0, 0, 0, 53}));
 
+
+#ifdef __wasm_unimplemented_simd128__
+
   // i64x2 lane accesses
   expect_vec(i64x2_splat(-5), ((i64x2){-5, -5}));
-#ifdef __wasm_unimplemented_simd128__
   expect_eq(i64x2_extract_lane_first((i64x2){-5, 0}), -5);
   expect_eq(i64x2_extract_lane_last((i64x2){0, -5}), -5);
   expect_vec(i64x2_replace_lane_first((i64x2){0, 0}, 53), ((i64x2){53, 0}));
   expect_vec(i64x2_replace_lane_last((i64x2){0, 0}, 53), ((i64x2){0, 53}));
+
 #endif // __wasm_unimplemented_simd128__
 
   // f32x4 lane accesses
@@ -648,12 +712,14 @@ int EMSCRIPTEN_KEEPALIVE main(int argc, char** argv) {
   expect_vec(f32x4_replace_lane_last((f32x4){0, 0, 0, 0}, 53), ((f32x4){0, 0, 0, 53}));
 
 #ifdef __wasm_unimplemented_simd128__
+
   // f64x2 lane accesses
   expect_vec(f64x2_splat(-5), ((f64x2){-5, -5}));
   expect_eq(f64x2_extract_lane_first((f64x2){-5, 0}), -5);
   expect_eq(f64x2_extract_lane_last((f64x2){0, -5}), -5);
   expect_vec(f64x2_replace_lane_first((f64x2){0, 0}, 53), ((f64x2){53, 0}));
   expect_vec(f64x2_replace_lane_last((f64x2){0, 0}, 53), ((f64x2){0, 53}));
+
 #endif // __wasm_unimplemented_simd128__
 
   // i8x16 comparisons
@@ -902,6 +968,7 @@ int EMSCRIPTEN_KEEPALIVE main(int argc, char** argv) {
   );
 
 #ifdef __wasm_undefined_simd128__
+
   // f64x2 comparisons
   expect_vec(f64x2_eq((f64x2){0, 1}, (f64x2){0, 0}), ((u64x2){-1, 0}));
   expect_vec(f64x2_ne((f64x2){0, 1}, (f64x2){0, 0}), ((u64x2){0, -1}));
@@ -915,6 +982,7 @@ int EMSCRIPTEN_KEEPALIVE main(int argc, char** argv) {
   expect_vec(f64x2_gt((f64x2){NAN, 0}, (f64x2){INFINITY, INFINITY}), ((u64x2){0, 0}));
   expect_vec(f64x2_le((f64x2){NAN, 0}, (f64x2){INFINITY, INFINITY}), ((u64x2){0, -1}));
   expect_vec(f64x2_ge((f64x2){NAN, 0}, (f64x2){INFINITY, INFINITY}), ((u64x2){0, 0}));
+
 #endif // __wasm_undefined_simd128__
 
   // bitwise operations
@@ -1135,16 +1203,17 @@ int EMSCRIPTEN_KEEPALIVE main(int argc, char** argv) {
   expect_vec(i32x4_mul((i32x4){0, 0x80000001, 42, 5}, (i32x4){0, 0x80000001, 42, 5}), ((i32x4){0, 1, 1764, 25}));
 
 
+#ifdef __wasm_unimplemented_simd128__
+
   // i64x2 arithmetic
   expect_vec(i64x2_neg((i64x2){0x8000000000000000, 42}), ((i64x2){0x8000000000000000, -42}));
-#ifdef __wasm_unimplemented_simd128__
   expect_eq(i64x2_any_true((i64x2){0, 0}), 0);
   expect_eq(i64x2_any_true((i64x2){1, 0}), 1);
   expect_eq(i64x2_any_true((i64x2){1, 1}), 1);
   expect_eq(i64x2_all_true((i64x2){0, 0}), 0);
   expect_eq(i64x2_all_true((i64x2){1, 0}), 0);
   expect_eq(i64x2_all_true((i64x2){1, 1}), 1);
-#endif // __wasm_unimplemented_simd128__
+
   expect_vec(i64x2_shl((i64x2){1, 0x8000000000000000}, 1), ((i64x2){2, 0}));
   expect_vec(i64x2_shl((i64x2){1, 0x8000000000000000}, 64), ((i64x2){1, 0x8000000000000000}));
   expect_vec(i64x2_shr_s((i64x2){1, 0x8000000000000000}, 1), ((i64x2){0, 0xc000000000000000}));
@@ -1154,12 +1223,18 @@ int EMSCRIPTEN_KEEPALIVE main(int argc, char** argv) {
   expect_vec(i64x2_add((i64x2){0x8000000000000001, 42}, (i64x2){0x8000000000000001, 0}), ((i64x2){2, 42}));
   expect_vec(i64x2_sub((i64x2){2, 42}, (i64x2){0x8000000000000001, 0}), ((i64x2){0x8000000000000001, 42}));
 
+#endif // __wasm_unimplemented_simd128__
+
   // f32x4 arithmetic
   expect_vec(f32x4_abs((f32x4){-0., NAN, -INFINITY, 5}), ((f32x4){0, NAN, INFINITY, 5}));
   expect_vec(f32x4_neg((f32x4){-0., NAN, -INFINITY, 5}), ((f32x4){0, -NAN, INFINITY, -5}));
+
 #ifdef __wasm_unimplemented_simd128__
+
   expect_vec(f32x4_sqrt((f32x4){-0., NAN, INFINITY, 4}), ((f32x4){-0., NAN, INFINITY, 2}));
+
 #endif // __wasm_unimplemented_simd128__
+
   expect_vec(f32x4_add((f32x4){NAN, -NAN, INFINITY, 42}, (f32x4){42, INFINITY, INFINITY, 1}), ((f32x4){NAN, -NAN, INFINITY, 43}));
   expect_vec(f32x4_sub((f32x4){NAN, -NAN, INFINITY, 42}, (f32x4){42, INFINITY, -INFINITY, 1}), ((f32x4){NAN, -NAN, INFINITY, 41}));
   expect_vec(f32x4_mul((f32x4){NAN, -NAN, INFINITY, 42}, (f32x4){42, INFINITY, INFINITY, 2}), ((f32x4){NAN, -NAN, INFINITY, 84}));
@@ -1167,17 +1242,18 @@ int EMSCRIPTEN_KEEPALIVE main(int argc, char** argv) {
   // expect_vec(f32x4_min((f32x4){-0., 0, NAN, 5}, (f32x4){0, -0., 5, NAN}), ((f32x4){-0., -0., NAN, NAN}));
   // expect_vec(f32x4_max((f32x4){-0., 0, NAN, 5}, (f32x4){0, -0., 5, NAN}), ((f32x4){0, 0, NAN, NAN}));
 
-  // f64x2 arithmetic
 #ifdef __wasm_unimplemented_simd128__
+
+  // f64x2 arithmetic
   expect_vec(f64x2_abs((f64x2){-0., NAN}), ((f64x2){0, NAN}));
   expect_vec(f64x2_abs((f64x2){-INFINITY, 5}), ((f64x2){INFINITY, 5}));
-#endif // __wasm_unimplemented_simd128__
+
   expect_vec(f64x2_neg((f64x2){-0., NAN}), ((f64x2){0, -NAN}));
   expect_vec(f64x2_neg((f64x2){-INFINITY, 5}), ((f64x2){INFINITY, -5}));
-#ifdef __wasm_unimplemented_simd128__
+
   expect_vec(f64x2_sqrt((f64x2){-0., NAN}), ((f64x2){-0., NAN}));
   expect_vec(f64x2_sqrt((f64x2){INFINITY, 4}), ((f64x2){INFINITY, 2}));
-#endif // __wasm_unimplemented_simd128__
+
   expect_vec(f64x2_add((f64x2){NAN, -NAN}, (f64x2){42, INFINITY}), ((f64x2){NAN, -NAN}));
   expect_vec(f64x2_add((f64x2){INFINITY, 42}, (f64x2){INFINITY, 1}), ((f64x2){INFINITY, 43}));
   expect_vec(f64x2_sub((f64x2){NAN, -NAN}, (f64x2){42, INFINITY}), ((f64x2){NAN, -NAN}));
@@ -1186,28 +1262,38 @@ int EMSCRIPTEN_KEEPALIVE main(int argc, char** argv) {
   expect_vec(f64x2_mul((f64x2){INFINITY, 42}, (f64x2){INFINITY, 2}), ((f64x2){INFINITY, 84}));
   expect_vec(f64x2_div((f64x2){NAN, -NAN}, (f64x2){42, INFINITY}), ((f64x2){NAN, -NAN}));
   expect_vec(f64x2_div((f64x2){INFINITY, 42}, (f64x2){2, 2}), ((f64x2){INFINITY, 21}));
-#ifdef __wasm_unimplemented_simd128__
+
   expect_vec(f64x2_min((f64x2){-0., 0}, (f64x2){0, -0}), ((f64x2){-0., -0}));
   expect_vec(f64x2_min((f64x2){NAN, 5}, (f64x2){5, NAN}), ((f64x2){NAN, NAN}));
   expect_vec(f64x2_max((f64x2){-0., 0}, (f64x2){0, -0}), ((f64x2){0, 0}));
   expect_vec(f64x2_max((f64x2){NAN, 5}, (f64x2){5, NAN}), ((f64x2){NAN, NAN}));
+
 #endif // __wasm_unimplemented_simd128__
 
   // conversions
   expect_vec(i32x4_trunc_s_f32x4_sat((f32x4){42, NAN, INFINITY, -INFINITY}), ((i32x4){42, 0, 2147483647, -2147483648ll}));
-  expect_vec(i32x4_trunc_u_f32x4_sat((f32x4){42, NAN, INFINITY, -INFINITY}), ((i32x4){42, 0, 4294967295ull, 0}));
+  expect_vec(i32x4_trunc_u_f32x4_sat((f32x4){42, NAN, INFINITY, -INFINITY}), ((u32x4){42, 0, 4294967295ull, 0}));
+
 #ifdef __wasm_unimplemented_simd128__
+
   expect_vec(i64x2_trunc_s_f64x2_sat((f64x2){42, NAN}), ((i64x2){42, 0}));
   expect_vec(i64x2_trunc_s_f64x2_sat((f64x2){INFINITY, -INFINITY}), ((i64x2){9223372036854775807ll, -9223372036854775807ll - 1}));
-  expect_vec(i64x2_trunc_u_f64x2_sat((f64x2){42, NAN}), ((i64x2){42, 0}));
-  expect_vec(i64x2_trunc_u_f64x2_sat((f64x2){INFINITY, -INFINITY}), ((i64x2){18446744073709551615ull, 0}));
+  expect_vec(i64x2_trunc_u_f64x2_sat((f64x2){42, NAN}), ((u64x2){42, 0}));
+  expect_vec(i64x2_trunc_u_f64x2_sat((f64x2){INFINITY, -INFINITY}), ((u64x2){18446744073709551615ull, 0}));
+
 #endif // __wasm_unimplemented_simd128__
+
   expect_vec(f32x4_convert_s_i32x4((i32x4){0, -1, 2147483647, -2147483647 - 1}), ((f32x4){0, -1, 2147483648., -2147483648.}));
-  expect_vec(f32x4_convert_u_i32x4((i32x4){0, -1, 2147483647, -2147483647 - 1}), ((f32x4){0, 4294967296., 2147483648., 2147483648.}));
+  expect_vec(f32x4_convert_u_i32x4((u32x4){0, -1, 2147483647, -2147483647 - 1}), ((f32x4){0, 4294967296., 2147483648., 2147483648.}));
+
+#ifdef __wasm_unimplemented_simd128__
+
   expect_vec(f64x2_convert_s_i64x2((i64x2){0, -1}), ((f64x2){0, -1}));
   expect_vec(f64x2_convert_s_i64x2((i64x2){9223372036854775807, -9223372036854775807 - 1}), ((f64x2){9223372036854775807., -9223372036854775808.}));
-  expect_vec(f64x2_convert_u_i64x2((i64x2){0, -1}), ((f64x2){0, 18446744073709551616.}));
-  expect_vec(f64x2_convert_u_i64x2((i64x2){9223372036854775807 , -9223372036854775808.}), ((f64x2){9223372036854775807., 9223372036854775808.}));
+  expect_vec(f64x2_convert_u_i64x2((u64x2){0, -1}), ((f64x2){0, 18446744073709551616.}));
+  expect_vec(f64x2_convert_u_i64x2((u64x2)(i64x2){9223372036854775807 , -9223372036854775808.}), ((f64x2){9223372036854775807., 9223372036854775808.}));
+
+#endif // __wasm_unimplemented_simd128__
 
   if (failures == 0) {
     printf("Success!\n");

From e5b270483c14f8bbac71b027332d7a10842890a7 Mon Sep 17 00:00:00 2001
From: Thomas Lively <tlively@google.com>
Date: Thu, 9 May 2019 11:24:06 -0700
Subject: [PATCH 05/16] Add explicit alignments and make load and store
 unaligned

---
 system/include/simd128.h | 62 ++++++++++++++++++++++------------------
 1 file changed, 34 insertions(+), 28 deletions(-)

diff --git a/system/include/simd128.h b/system/include/simd128.h
index c7d8308ec05a2..550e777bb56b6 100644
--- a/system/include/simd128.h
+++ b/system/include/simd128.h
@@ -5,41 +5,47 @@ WebAssembly SIMD128 Intrinsics
 #include <stdint.h>
 
 // User-facing types
-typedef int8_t v128 __attribute__((__vector_size__(16)));
-typedef int8_t i8x16 __attribute__((__vector_size__(16)));
-typedef uint8_t u8x16 __attribute__((__vector_size__(16)));
-typedef int16_t i16x8 __attribute__((__vector_size__(16)));
-typedef uint16_t u16x8 __attribute__((__vector_size__(16)));
-typedef int32_t i32x4 __attribute__((__vector_size__(16)));
-typedef uint32_t u32x4 __attribute__((__vector_size__(16)));
-typedef float f32x4 __attribute__((__vector_size__(16)));
+typedef int8_t v128 __attribute__((__vector_size__(16), __aligned__(16)));
+typedef int8_t i8x16 __attribute__((__vector_size__(16), __aligned__(16)));
+typedef uint8_t u8x16 __attribute__((__vector_size__(16), __aligned__(16)));
+typedef int16_t i16x8 __attribute__((__vector_size__(16), __aligned__(16)));
+typedef uint16_t u16x8 __attribute__((__vector_size__(16), __aligned__(16)));
+typedef int32_t i32x4 __attribute__((__vector_size__(16), __aligned__(16)));
+typedef uint32_t u32x4 __attribute__((__vector_size__(16), __aligned__(16)));
+typedef float f32x4 __attribute__((__vector_size__(16), __aligned__(16)));
 
 #ifdef __wasm_unimplemented_simd128__
 
-typedef int64_t i64x2 __attribute__((__vector_size__(16)));
-typedef uint64_t u64x2 __attribute__((__vector_size__(16)));
-typedef double f64x2 __attribute__((__vector_size__(16)));
+typedef int64_t i64x2 __attribute__((__vector_size__(16), __aligned__(16)));
+typedef uint64_t u64x2 __attribute__((__vector_size__(16), __aligned__(16)));
+typedef double f64x2 __attribute__((__vector_size__(16), __aligned__(16)));
 
 #endif // __wasm_unimplemented_simd128__
 
 // Internal types
-typedef char __v128 __attribute__((__vector_size__(16)));
-typedef char  __i8x16 __attribute__((__vector_size__(16)));
-typedef short __i16x8 __attribute__((__vector_size__(16)));
-typedef int __i32x4 __attribute__((__vector_size__(16)));
-typedef long long __i64x2 __attribute__((__vector_size__(16)));
-
-
-#define __DEFAULT_FN_ATTRS __attribute__((__always_inline__, __nodebug__))
-
-// v128 wasm_v128_load(v128* mem)
-static __inline__ v128 __DEFAULT_FN_ATTRS wasm_v128_load(v128* mem) {
-  return (*mem);
-}
-
-// wasm_v128_store(v128 *mem, v128 a)
-static __inline__ void __DEFAULT_FN_ATTRS wasm_v128_store(v128* mem, v128 a) {
-  *mem = a;
+typedef int8_t __v128_u __attribute__((__vector_size__(16), __aligned__(1)));
+typedef char __v128 __attribute__((__vector_size__(16), __aligned__(16)));
+typedef char  __i8x16 __attribute__((__vector_size__(16), __aligned__(16)));
+typedef short __i16x8 __attribute__((__vector_size__(16), __aligned__(16)));
+typedef int __i32x4 __attribute__((__vector_size__(16), __aligned__(16)));
+typedef long long __i64x2 __attribute__((__vector_size__(16), __aligned__(16)));
+
+#define __DEFAULT_FN_ATTRS __attribute__((__always_inline__, __nodebug__, __target__("simd128"), __min_vector_width__(128)))
+
+// v128 wasm_v128_load(void* mem)
+static __inline__ v128 __DEFAULT_FN_ATTRS wasm_v128_load(void* __mem) {
+  struct __wasm_v128_load_struct {
+    __v128_u __v;
+  } __attribute__((__packed__, __may_alias__));
+  return ((struct __wasm_v128_load_struct*)__mem)->__v;
+}
+
+// wasm_v128_store(void* mem, v128 a)
+static __inline__ void __DEFAULT_FN_ATTRS wasm_v128_store(void* __mem, v128 __a) {
+  struct __wasm_v128_store_struct {
+    __v128_u __v;
+  } __attribute__((__packed__, __may_alias__));
+  ((struct __wasm_v128_store_struct*)__mem)->__v = __a;
 }
 
 // v128 wasm_v128_constant(...)

From 7ddcfaa9447d386aa94fd20ded0d936a23b5188b Mon Sep 17 00:00:00 2001
From: Thomas Lively <tlively@google.com>
Date: Mon, 13 May 2019 09:30:37 -0700
Subject: [PATCH 06/16] Add const to loaded pointer

---
 system/include/simd128.h | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/system/include/simd128.h b/system/include/simd128.h
index 550e777bb56b6..61a7fb7ec4a96 100644
--- a/system/include/simd128.h
+++ b/system/include/simd128.h
@@ -33,11 +33,11 @@ typedef long long __i64x2 __attribute__((__vector_size__(16), __aligned__(16)));
 #define __DEFAULT_FN_ATTRS __attribute__((__always_inline__, __nodebug__, __target__("simd128"), __min_vector_width__(128)))
 
 // v128 wasm_v128_load(void* mem)
-static __inline__ v128 __DEFAULT_FN_ATTRS wasm_v128_load(void* __mem) {
+static __inline__ v128 __DEFAULT_FN_ATTRS wasm_v128_load(const void* __mem) {
   struct __wasm_v128_load_struct {
     __v128_u __v;
   } __attribute__((__packed__, __may_alias__));
-  return ((struct __wasm_v128_load_struct*)__mem)->__v;
+  return ((const struct __wasm_v128_load_struct*)__mem)->__v;
 }
 
 // wasm_v128_store(void* mem, v128 a)

From b7a32a5c29aec106e3334e0e05c5e26dcde7d616 Mon Sep 17 00:00:00 2001
From: Thomas Lively <tlively@google.com>
Date: Thu, 6 Jun 2019 17:33:59 -0700
Subject: [PATCH 07/16] Rewrite intrinsics to expose only v128_t

---
 system/include/simd128.h          |  834 ++++++++++----------
 tests/test_core.py                |   10 +-
 tests/test_wasm_intrinsics_simd.c | 1213 ++++++++++++++++-------------
 3 files changed, 1103 insertions(+), 954 deletions(-)

diff --git a/system/include/simd128.h b/system/include/simd128.h
index 61a7fb7ec4a96..13f1fc1f1bb16 100644
--- a/system/include/simd128.h
+++ b/system/include/simd128.h
@@ -4,36 +4,26 @@ WebAssembly SIMD128 Intrinsics
 
 #include <stdint.h>
 
-// User-facing types
-typedef int8_t v128 __attribute__((__vector_size__(16), __aligned__(16)));
-typedef int8_t i8x16 __attribute__((__vector_size__(16), __aligned__(16)));
-typedef uint8_t u8x16 __attribute__((__vector_size__(16), __aligned__(16)));
-typedef int16_t i16x8 __attribute__((__vector_size__(16), __aligned__(16)));
-typedef uint16_t u16x8 __attribute__((__vector_size__(16), __aligned__(16)));
-typedef int32_t i32x4 __attribute__((__vector_size__(16), __aligned__(16)));
-typedef uint32_t u32x4 __attribute__((__vector_size__(16), __aligned__(16)));
-typedef float f32x4 __attribute__((__vector_size__(16), __aligned__(16)));
+// User-facing type
+typedef int32_t v128_t __attribute__((vector_size(16), __aligned__(16)));
 
-#ifdef __wasm_unimplemented_simd128__
-
-typedef int64_t i64x2 __attribute__((__vector_size__(16), __aligned__(16)));
-typedef uint64_t u64x2 __attribute__((__vector_size__(16), __aligned__(16)));
-typedef double f64x2 __attribute__((__vector_size__(16), __aligned__(16)));
-
-#endif // __wasm_unimplemented_simd128__
-
-// Internal types
-typedef int8_t __v128_u __attribute__((__vector_size__(16), __aligned__(1)));
-typedef char __v128 __attribute__((__vector_size__(16), __aligned__(16)));
+// Internal types determined by clang builtin definitions
+typedef int32_t __v128_u __attribute__((__vector_size__(16), __aligned__(1)));
 typedef char  __i8x16 __attribute__((__vector_size__(16), __aligned__(16)));
+typedef unsigned char __u8x16 __attribute__((__vector_size__(16), __aligned__(16)));
 typedef short __i16x8 __attribute__((__vector_size__(16), __aligned__(16)));
+typedef unsigned short __u16x8 __attribute__((__vector_size__(16), __aligned__(16)));
 typedef int __i32x4 __attribute__((__vector_size__(16), __aligned__(16)));
+typedef unsigned int __u32x4 __attribute__((__vector_size__(16), __aligned__(16)));
 typedef long long __i64x2 __attribute__((__vector_size__(16), __aligned__(16)));
+typedef unsigned long long __u64x2 __attribute__((__vector_size__(16), __aligned__(16)));
+typedef float __f32x4 __attribute__((__vector_size__(16), __aligned__(16)));
+typedef double __f64x2 __attribute__((__vector_size__(16), __aligned__(16)));
 
 #define __DEFAULT_FN_ATTRS __attribute__((__always_inline__, __nodebug__, __target__("simd128"), __min_vector_width__(128)))
 
 // v128 wasm_v128_load(void* mem)
-static __inline__ v128 __DEFAULT_FN_ATTRS wasm_v128_load(const void* __mem) {
+static __inline__ v128_t __DEFAULT_FN_ATTRS wasm_v128_load(const void* __mem) {
   struct __wasm_v128_load_struct {
     __v128_u __v;
   } __attribute__((__packed__, __may_alias__));
@@ -41,7 +31,7 @@ static __inline__ v128 __DEFAULT_FN_ATTRS wasm_v128_load(const void* __mem) {
 }
 
 // wasm_v128_store(void* mem, v128 a)
-static __inline__ void __DEFAULT_FN_ATTRS wasm_v128_store(void* __mem, v128 __a) {
+static __inline__ void __DEFAULT_FN_ATTRS wasm_v128_store(void* __mem, v128_t __a) {
   struct __wasm_v128_store_struct {
     __v128_u __v;
   } __attribute__((__packed__, __may_alias__));
@@ -49,705 +39,691 @@ static __inline__ void __DEFAULT_FN_ATTRS wasm_v128_store(void* __mem, v128 __a)
 }
 
 // v128 wasm_v128_constant(...)
-static __inline__ v128 __DEFAULT_FN_ATTRS wasm_v128_const(int8_t c0,
-  int8_t c1, int8_t c2, int8_t c3, int8_t c4, int8_t c5,
-  int8_t c6,  int8_t c7,  int8_t c8,  int8_t c9,  int8_t c10,
-  int8_t c11,  int8_t c12,  int8_t c13,  int8_t c14,  int8_t c15) {
-  return (v128){c0, c1, c2,  c3,  c4,  c5,  c6,  c7,
-                c8, c9, c10, c11, c12, c13, c14, c15};
+static __inline__ v128_t __DEFAULT_FN_ATTRS wasm_v128_const(int8_t c0, int8_t c1, int8_t c2,
+  int8_t c3, int8_t c4, int8_t c5, int8_t c6, int8_t c7, int8_t c8, int8_t c9, int8_t c10,
+  int8_t c11, int8_t c12, int8_t c13, int8_t c14, int8_t c15) {
+  return (v128_t)(__i8x16){c0, c1, c2, c3, c4, c5, c6, c7, c8, c9, c10, c11, c12, c13, c14, c15};
 }
 
-// i8x16 wasm_i8x16_splat(int8_t a)
-static __inline__ i8x16 __DEFAULT_FN_ATTRS wasm_i8x16_splat(int8_t a) {
-  return (i8x16){a, a, a, a, a, a, a, a, a, a, a, a, a, a, a, a};
+// v128_t wasm_i8x16_splat(int8_t a)
+static __inline__ v128_t __DEFAULT_FN_ATTRS wasm_i8x16_splat(int8_t a) {
+  return (v128_t)(__i8x16){a, a, a, a, a, a, a, a, a, a, a, a, a, a, a, a};
 }
 
-// int8_t wasm_i8x16_extract_lane(i8x16 a, imm)
-#define wasm_i8x16_extract_lane(a, b)  \
-  (__builtin_wasm_extract_lane_s_i8x16((__i8x16)(a), b))
+// int8_t wasm_i8x16_extract_lane(v128_t a, imm i)
+#define wasm_i8x16_extract_lane(a, i) (__builtin_wasm_extract_lane_s_i8x16((__i8x16)(a), i))
 
-// int8_t wasm_u8x16_extract_lane(u8x16 a, imm)
-#define wasm_u8x16_extract_lane(a, b) \
-  (__builtin_wasm_extract_lane_u_i8x16((__i8x16)(a), b))
+// int8_t wasm_u8x16_extract_lane(v128_t a, imm i)
+#define wasm_u8x16_extract_lane(a, i) (__builtin_wasm_extract_lane_u_i8x16((__i8x16)(a), i))
 
-// i8x16 wasm_i8x16_replace_lane(i8x16 a, imm i, int8_t b)
-#define wasm_i8x16_replace_lane(a, i, b) \
-  ((i8x16)__builtin_wasm_replace_lane_i8x16((__i8x16)(a), i, b))
+// v128_t wasm_i8x16_replace_lane(v128_t a, imm i, int8_t b)
+#define wasm_i8x16_replace_lane(a, i, b)                                                           \
+  ((v128_t)__builtin_wasm_replace_lane_i8x16((__i8x16)(a), i, b))
 
-// i16x8 wasm_i16x8_splat(int16_t a)
-static __inline__ i16x8 __DEFAULT_FN_ATTRS wasm_i16x8_splat(int16_t a) {
-  return (i16x8){a, a, a, a, a, a, a, a};
+// v128_t wasm_i16x8_splat(int16_t a)
+static __inline__ v128_t __DEFAULT_FN_ATTRS wasm_i16x8_splat(int16_t a) {
+  return (v128_t)(__i16x8){a, a, a, a, a, a, a, a};
 }
 
-// int16_t wasm_i16x8_extract_lane(i16x8 a, imm)
-#define wasm_i16x8_extract_lane(a, b) \
-  (__builtin_wasm_extract_lane_s_i16x8((__i16x8)(a), b))
+// int16_t wasm_i16x8_extract_lane(v128_t a, imm i)
+#define wasm_i16x8_extract_lane(a, i) (__builtin_wasm_extract_lane_s_i16x8((__i16x8)(a), i))
 
 #ifdef __wasm_unimplemented_simd128__
 
-// int16_t wasm_u16x8_extract_lane(u16x8 a, imm)
-#define wasm_u16x8_extract_lane(a, b) \
-  (__builtin_wasm_extract_lane_u_i16x8(a, b))
+// int16_t wasm_u16x8_extract_lane(v128_t a, imm i)
+#define wasm_u16x8_extract_lane(a, i) (__builtin_wasm_extract_lane_u_i16x8((__i16x8)(a), i))
 
 #endif // __wasm_unimplemented_simd128__
 
-// i16x8 wasm_i16x8_replace_lane(i16x8 a, imm i, int16_t b)
-#define wasm_i16x8_replace_lane(a, i, b) \
-  ((i16x8)__builtin_wasm_replace_lane_i16x8((__i16x8)(a), i, b))
+// v128_t wasm_i16x8_replace_lane(v128_t a, imm i, int16_t b)
+#define wasm_i16x8_replace_lane(a, i, b)                                                           \
+  ((v128_t)__builtin_wasm_replace_lane_i16x8((__i16x8)(a), i, b))
 
-// i32x4 wasm_i32x4_splat(int32_t a)
-static __inline__ i32x4 __DEFAULT_FN_ATTRS wasm_i32x4_splat(int32_t a) {
-  return (i32x4){a, a, a, a};
+// v128_t wasm_i32x4_splat(int32_t a)
+static __inline__ v128_t __DEFAULT_FN_ATTRS wasm_i32x4_splat(int32_t a) {
+  return (v128_t)(__i32x4){a, a, a, a};
 }
 
-// int32_t wasm_i32x4_extract_lane(i32x4 a, imm)
-#define wasm_i32x4_extract_lane(a, b) (__builtin_wasm_extract_lane_i32x4(a, b))
+// int32_t wasm_i32x4_extract_lane(v128_t a, imm i)
+#define wasm_i32x4_extract_lane(a, i) (__builtin_wasm_extract_lane_i32x4((__i32x4)(a), i))
 
-// i32x4 wasm_i32x4_replace_lane(i32x4 a, imm i, int32_t b)
-#define wasm_i32x4_replace_lane(a, i, b) \
-    ((i32x4)__builtin_wasm_replace_lane_i32x4((__i32x4)(a), i, b))
+// v128_t wasm_i32x4_replace_lane(v128_t a, imm i, int32_t b)
+#define wasm_i32x4_replace_lane(a, i, b)                                                           \
+  ((v128_t)__builtin_wasm_replace_lane_i32x4((__i32x4)(a), i, b))
 
 #ifdef __wasm_unimplemented_simd128__
 
-// i64x2 wasm_i64x2_splat(int64_t a)
-static __inline__ i64x2 __DEFAULT_FN_ATTRS wasm_i64x2_splat(int64_t a) {
-  return (i64x2){a, a};
+// v128_t wasm_i64x2_splat(int64_t a)
+static __inline__ v128_t __DEFAULT_FN_ATTRS wasm_i64x2_splat(int64_t a) {
+  return (v128_t)(__i64x2){a, a};
 }
 
 #endif // __wasm_unimplemented_simd128__
 
-// int64_t wasm_i64x2_extract_lane(i8x16, imm)
-#define wasm_i64x2_extract_lane(a, b) (__builtin_wasm_extract_lane_i64x2(a, b))
+// int64_t wasm_i64x2_extract_lane(v128_t a, imm i)
+#define wasm_i64x2_extract_lane(a, i) (__builtin_wasm_extract_lane_i64x2((__i64x2)(a), i))
 
-// i8x16 wasm_i64x2_replace_lane(i8x16 a, imm i, int64_t b)
-#define wasm_i64x2_replace_lane(a, i, b) \
-  ((i64x2)__builtin_wasm_replace_lane_i64x2((__i64x2)(a), i, b))
+// v128_t wasm_i64x2_replace_lane(v128_t a, imm i, int64_t b)
+#define wasm_i64x2_replace_lane(a, i, b)                                                           \
+  ((v128_t)__builtin_wasm_replace_lane_i64x2((__i64x2)(a), i, b))
 
-// f32x4 wasm_f32x4_splat(float a)
-static __inline__ f32x4 __DEFAULT_FN_ATTRS wasm_f32x4_splat(float a) {
-  return (f32x4){a, a, a, a};
+// v128_t wasm_f32x4_splat(float a)
+static __inline__ v128_t __DEFAULT_FN_ATTRS wasm_f32x4_splat(float a) {
+  return (v128_t)(__f32x4){a, a, a, a};
 }
 
-// float wasm_f32x4_extract_lane(f32x4, imm)
-#define wasm_f32x4_extract_lane(a, b) (__builtin_wasm_extract_lane_f32x4(a, b))
+// float wasm_f32x4_extract_lane(v128_t a, imm i)
+#define wasm_f32x4_extract_lane(a, i) (__builtin_wasm_extract_lane_f32x4((__f32x4)(a), i))
 
-// f32x4 wasm_f32x4_replace_lane(f32x4 a, imm i, float b)
-#define wasm_f32x4_replace_lane(a, i, b) \
-  (__builtin_wasm_replace_lane_f32x4(a, i, b))
+// v128_t wasm_f32x4_replace_lane(v128_t a, imm i, float b)
+#define wasm_f32x4_replace_lane(a, i, b)                                                           \
+  ((v128_t)__builtin_wasm_replace_lane_f32x4((__f32x4)(a), i, b))
 
 #ifdef __wasm_unimplemented_simd128__
 
-// f64x2 wasm_f64x2_splat(double a)
-static __inline__ f64x2 __DEFAULT_FN_ATTRS wasm_f64x2_splat(double a) {
-  return (f64x2){a, a};
+// v128_t wasm_f64x2_splat(double a)
+static __inline__ v128_t __DEFAULT_FN_ATTRS wasm_f64x2_splat(double a) {
+  return (v128_t)(__f64x2){a, a};
 }
 
-// double __builtin_wasm_extract_lane_f64x2(f64x2, imm)
-#define wasm_f64x2_extract_lane(a, b) (__builtin_wasm_extract_lane_f64x2(a, b))
+// double __builtin_wasm_extract_lane_f64x2(v128_t a, imm i)
+#define wasm_f64x2_extract_lane(a, i) (__builtin_wasm_extract_lane_f64x2((__f64x2)(a), i))
 
-// f64x2 wasm_f64x4_replace_lane(f64x2 a, imm i, double b)
-#define wasm_f64x2_replace_lane(a, i, b) \
-  (__builtin_wasm_replace_lane_f64x2(a, i, b))
+// v128_t wasm_f64x4_replace_lane(v128_t a, imm i, double b)
+#define wasm_f64x2_replace_lane(a, i, b)                                                           \
+  ((v128_t)__builtin_wasm_replace_lane_f64x2((__f64x2)(a), i, b))
 
 #endif // __wasm_unimplemented_simd128__
 
-// i8x16 wasm_i8x16_eq(i8x16 a, i8x16 b)
-static __inline__ u8x16 __DEFAULT_FN_ATTRS wasm_i8x16_eq(i8x16 a, i8x16 b) {
-  return (u8x16)(a == b);
+// v128_t wasm_i8x16_eq(v128_t a, v128_t b)
+static __inline__ v128_t __DEFAULT_FN_ATTRS wasm_i8x16_eq(v128_t a, v128_t b) {
+  return (v128_t)((__i8x16)a == (__i8x16)b);
 }
 
-// i8x16 wasm_i8x16_ne(i8x16 a, i8x16 b)
-static __inline__ u8x16 __DEFAULT_FN_ATTRS wasm_i8x16_ne(i8x16 a, i8x16 b) {
-  return (u8x16)(a != b);
+// v128_t wasm_i8x16_ne(v128_t a, v128_t b)
+static __inline__ v128_t __DEFAULT_FN_ATTRS wasm_i8x16_ne(v128_t a, v128_t b) {
+  return (v128_t)((__i8x16)a != (__i8x16)b);
 }
 
-// i8x16 wasm_i8x16_lt(i8x16 a, i8x16 b)
-static __inline__ u8x16 __DEFAULT_FN_ATTRS wasm_i8x16_lt(i8x16 a, i8x16 b) {
-  return (u8x16)(a < b);
+// v128_t wasm_i8x16_lt(v128_t a, v128_t b)
+static __inline__ v128_t __DEFAULT_FN_ATTRS wasm_i8x16_lt(v128_t a, v128_t b) {
+  return (v128_t)((__i8x16)a < (__i8x16)b);
 }
 
-// i8x16 wasm_u8x16_lt(u8x16 a, u8x16 b)
-static __inline__ u8x16 __DEFAULT_FN_ATTRS wasm_u8x16_lt(u8x16 a, u8x16 b) {
-  return (u8x16)(a < b);
+// v128_t wasm_u8x16_lt(v128_t a, v128_t b)
+static __inline__ v128_t __DEFAULT_FN_ATTRS wasm_u8x16_lt(v128_t a, v128_t b) {
+  return (v128_t)((__u8x16)a < (__u8x16)b);
 }
 
-// i8x16 wasm_i8x16_gt(i8x16 a, i8x16 b)
-static __inline__ u8x16 __DEFAULT_FN_ATTRS wasm_i8x16_gt(i8x16 a, i8x16 b) {
-  return (u8x16)(a > b);
+// v128_t wasm_i8x16_gt(v128_t a, v128_t b)
+static __inline__ v128_t __DEFAULT_FN_ATTRS wasm_i8x16_gt(v128_t a, v128_t b) {
+  return (v128_t)((__i8x16)a > (__i8x16)b);
 }
 
-// i8x16 wasm_u8x16_gt(u8x16 a, u8x16 b)
-static __inline__ u8x16 __DEFAULT_FN_ATTRS wasm_u8x16_gt(u8x16 a, u8x16 b) {
-  return (u8x16)(a > b);
+// v128_t wasm_u8x16_gt(v128_t a, v128_t b)
+static __inline__ v128_t __DEFAULT_FN_ATTRS wasm_u8x16_gt(v128_t a, v128_t b) {
+  return (v128_t)((__u8x16)a > (__u8x16)b);
 }
 
-// i8x16 wasm_i8x16_le(i8x16 a, i8x16 b)
-static __inline__ u8x16 __DEFAULT_FN_ATTRS wasm_i8x16_le(i8x16 a, i8x16 b) {
-  return (u8x16)(a <= b);
+// v128_t wasm_i8x16_le(v128_t a, v128_t b)
+static __inline__ v128_t __DEFAULT_FN_ATTRS wasm_i8x16_le(v128_t a, v128_t b) {
+  return (v128_t)((__i8x16)a <= (__i8x16)b);
 }
 
-// i8x16 wasm_i8x16_le(u8x16 a, u8x16 b)
-static __inline__ u8x16 __DEFAULT_FN_ATTRS wasm_u8x16_le(u8x16 a, u8x16 b) {
-  return (u8x16)(a <= b);
+// v128_t wasm_i8x16_le(v128_t a, v128_t b)
+static __inline__ v128_t __DEFAULT_FN_ATTRS wasm_u8x16_le(v128_t a, v128_t b) {
+  return (v128_t)((__u8x16)a <= (__u8x16)b);
 }
 
-// i8x16 wasm_i8x16_ge(i8x16 a, i8x16 b)
-static __inline__ u8x16 __DEFAULT_FN_ATTRS wasm_i8x16_ge(i8x16 a, i8x16 b) {
-  return (u8x16)(a >= b);
+// v128_t wasm_i8x16_ge(v128_t a, v128_t b)
+static __inline__ v128_t __DEFAULT_FN_ATTRS wasm_i8x16_ge(v128_t a, v128_t b) {
+  return (v128_t)((__i8x16)a >= (__i8x16)b);
 }
 
-// i8x16 wasm_u8x16_ge(u8x16 a, u8x16 b)
-static __inline__ u8x16 __DEFAULT_FN_ATTRS wasm_u8x16_ge(u8x16 a, u8x16 b) {
-  return (u8x16)(a >= b);
+// v128_t wasm_u8x16_ge(v128_t a, v128_t b)
+static __inline__ v128_t __DEFAULT_FN_ATTRS wasm_u8x16_ge(v128_t a, v128_t b) {
+  return (v128_t)((__u8x16)a >= (__u8x16)b);
 }
 
-// i16x8 wasm_i16x8_eq(i16x8 a, i16x8 b)
-static __inline__ u16x8 __DEFAULT_FN_ATTRS wasm_i16x8_eq(i16x8 a, i16x8 b) {
-  return (u16x8)(a == b);
+// v128_t wasm_i16x8_eq(v128_t a, v128_t b)
+static __inline__ v128_t __DEFAULT_FN_ATTRS wasm_i16x8_eq(v128_t a, v128_t b) {
+  return (v128_t)((__i16x8)a == (__i16x8)b);
 }
 
-// i16x8 wasm_i16x8_ne(i16x8 a, i32x4 b)
-static __inline__ u16x8 __DEFAULT_FN_ATTRS wasm_i16x8_ne(i16x8 a, i16x8 b) {
-  return (u16x8)(a != b);
+// v128_t wasm_i16x8_ne(v128_t a, v128_t b)
+static __inline__ v128_t __DEFAULT_FN_ATTRS wasm_i16x8_ne(v128_t a, v128_t b) {
+  return (v128_t)((__u16x8)a != (__u16x8)b);
 }
-// i16x8 wasm_i16x8_lt(i16x8 a, i16x8 b)
-static __inline__ u16x8 __DEFAULT_FN_ATTRS wasm_i16x8_lt(i16x8 a, i16x8 b) {
-  return (u16x8)(a < b);
+
+// v128_t wasm_i16x8_lt(v128_t a, v128_t b)
+static __inline__ v128_t __DEFAULT_FN_ATTRS wasm_i16x8_lt(v128_t a, v128_t b) {
+  return (v128_t)((__i16x8)a < (__i16x8)b);
 }
 
-// i16x8 wasm_u16x8_lt(u8x16 a, u8x16 b)
-static __inline__ u16x8 __DEFAULT_FN_ATTRS wasm_u16x8_lt(u16x8 a, u16x8 b) {
-  return (u16x8)(a < b);
+// v128_t wasm_u16x8_lt(v128_t a, v128_t b)
+static __inline__ v128_t __DEFAULT_FN_ATTRS wasm_u16x8_lt(v128_t a, v128_t b) {
+  return (v128_t)((__u16x8)a < (__u16x8)b);
 }
 
-// i16x8 wasm_i16x8_gt(i16x8 a, i16x8 b)
-static __inline__ u16x8 __DEFAULT_FN_ATTRS wasm_i16x8_gt(i16x8 a, i16x8 b) {
-  return (u16x8)(a > b);
+// v128_t wasm_i16x8_gt(v128_t a, v128_t b)
+static __inline__ v128_t __DEFAULT_FN_ATTRS wasm_i16x8_gt(v128_t a, v128_t b) {
+  return (v128_t)((__i16x8)a > (__i16x8)b);
 }
 
-// i16x8 wasm_u16x8_gt(u8x16 a, u8x16 b)
-static __inline__ u16x8 __DEFAULT_FN_ATTRS wasm_u16x8_gt(u16x8 a, u16x8 b) {
-  return (u16x8)(a > b);
+// v128_t wasm_u16x8_gt(v128_t a, v128_t b)
+static __inline__ v128_t __DEFAULT_FN_ATTRS wasm_u16x8_gt(v128_t a, v128_t b) {
+  return (v128_t)((__u16x8)a > (__u16x8)b);
 }
 
-// i16x8 wasm_i16x8_le(i16x8 a, i16x8 b)
-static __inline__ u16x8 __DEFAULT_FN_ATTRS wasm_i16x8_le(i16x8 a, i16x8 b) {
-  return (u16x8)(a <= b);
+// v128_t wasm_i16x8_le(v128_t a, v128_t b)
+static __inline__ v128_t __DEFAULT_FN_ATTRS wasm_i16x8_le(v128_t a, v128_t b) {
+  return (v128_t)((__i16x8)a <= (__i16x8)b);
 }
 
-// i16x8 wasm_i16x8_le(u8x16 a, u8x16 b)
-static __inline__ u16x8 __DEFAULT_FN_ATTRS wasm_u16x8_le(u16x8 a, u16x8 b) {
-  return (u16x8)(a <= b);
+// v128_t wasm_i16x8_le(v128_t a, v128_t b)
+static __inline__ v128_t __DEFAULT_FN_ATTRS wasm_u16x8_le(v128_t a, v128_t b) {
+  return (v128_t)((__u16x8)a <= (__u16x8)b);
 }
 
-// i16x8 wasm_i16x8_ge(i16x8 a, i16x8 b)
-static __inline__ u16x8 __DEFAULT_FN_ATTRS wasm_i16x8_ge(i16x8 a, i16x8 b) {
-  return (u16x8)(a >= b);
+// v128_t wasm_i16x8_ge(v128_t a, v128_t b)
+static __inline__ v128_t __DEFAULT_FN_ATTRS wasm_i16x8_ge(v128_t a, v128_t b) {
+  return (v128_t)((__i16x8)a >= (__i16x8)b);
 }
 
-// i16x8 wasm_i16x8_ge(u16x8 a, u16x8 b)
-static __inline__ u16x8 __DEFAULT_FN_ATTRS wasm_u16x8_ge(u16x8 a, u16x8 b) {
-  return (u16x8)(a >= b);
+// v128_t wasm_i16x8_ge(v128_t a, v128_t b)
+static __inline__ v128_t __DEFAULT_FN_ATTRS wasm_u16x8_ge(v128_t a, v128_t b) {
+  return (v128_t)((__u16x8)a >= (__u16x8)b);
 }
 
-// i32x4 wasm_i32x4_eq(i32x4 a, i32x4 b)
-static __inline__ u32x4 __DEFAULT_FN_ATTRS wasm_i32x4_eq(i32x4 a, i32x4 b) {
-  return (u32x4)(a == b);
+// v128_t wasm_i32x4_eq(v128_t a, v128_t b)
+static __inline__ v128_t __DEFAULT_FN_ATTRS wasm_i32x4_eq(v128_t a, v128_t b) {
+  return (v128_t)((__i32x4)a == (__i32x4)b);
 }
 
-// i32x4 wasm_i32x4_ne(i32x4 a, i32x4 b)
-static __inline__ u32x4 __DEFAULT_FN_ATTRS wasm_i32x4_ne(i32x4 a, i32x4 b) {
-  return (u32x4)(a != b);
+// v128_t wasm_i32x4_ne(v128_t a, v128_t b)
+static __inline__ v128_t __DEFAULT_FN_ATTRS wasm_i32x4_ne(v128_t a, v128_t b) {
+  return (v128_t)((__i32x4)a != (__i32x4)b);
 }
 
-// i32x4 wasm_i32x4_lt(i32x4 a, i32x4 b)
-static __inline__ u32x4 __DEFAULT_FN_ATTRS wasm_i32x4_lt(i32x4 a, i32x4 b) {
-  return (u32x4)(a < b);
+// v128_t wasm_i32x4_lt(v128_t a, v128_t b)
+static __inline__ v128_t __DEFAULT_FN_ATTRS wasm_i32x4_lt(v128_t a, v128_t b) {
+  return (v128_t)((__i32x4)a < (__i32x4)b);
 }
 
-// u32x4 wasm_u32x4_lt(u32x4 a, u32x4 b)
-static __inline__ u32x4 __DEFAULT_FN_ATTRS wasm_u32x4_lt(u32x4 a, u32x4 b) {
-  return (u32x4)(a < b);
+// v128_t wasm_u32x4_lt(v128_t a, v128_t b)
+static __inline__ v128_t __DEFAULT_FN_ATTRS wasm_u32x4_lt(v128_t a, v128_t b) {
+  return (v128_t)((__u32x4)a < (__u32x4)b);
 }
 
-// i32x4 wasm_i32x4_gt(i32x4 a, i32x4 b)
-static __inline__ u32x4 __DEFAULT_FN_ATTRS wasm_i32x4_gt(i32x4 a, i32x4 b) {
-  return (u32x4)(a > b);
+// v128_t wasm_i32x4_gt(v128_t a, v128_t b)
+static __inline__ v128_t __DEFAULT_FN_ATTRS wasm_i32x4_gt(v128_t a, v128_t b) {
+  return (v128_t)((__i32x4)a > (__i32x4)b);
 }
 
-// i32x4 wasm_i32x4_gt(u32x4 a, u32x4 b)
-static __inline__ u32x4 __DEFAULT_FN_ATTRS wasm_u32x4_gt(u32x4 a, u32x4 b) {
-  return (u32x4)(a > b);
+// v128_t wasm_i32x4_gt(v128_t a, v128_t b)
+static __inline__ v128_t __DEFAULT_FN_ATTRS wasm_u32x4_gt(v128_t a, v128_t b) {
+  return (v128_t)((__u32x4)a > (__u32x4)b);
 }
 
-// i32x4 wasm_i32x4_le(i32x4 a, i32x4 b)
-static __inline__ u32x4 __DEFAULT_FN_ATTRS wasm_i32x4_le(i32x4 a, i32x4 b) {
-  return (u32x4)(a <= b);
+// v128_t wasm_i32x4_le(v128_t a, v128_t b)
+static __inline__ v128_t __DEFAULT_FN_ATTRS wasm_i32x4_le(v128_t a, v128_t b) {
+  return (v128_t)((__i32x4)a <= (__i32x4)b);
 }
 
-// i32x4 wasm_u32x4_le(u32x4 a, u32x4 b)
-static __inline__ u32x4 __DEFAULT_FN_ATTRS wasm_u32x4_le(u32x4 a, u32x4 b) {
-  return (u32x4)(a <= b);
+// v128_t wasm_u32x4_le(v128_t a, v128_t b)
+static __inline__ v128_t __DEFAULT_FN_ATTRS wasm_u32x4_le(v128_t a, v128_t b) {
+  return (v128_t)((__u32x4)a <= (__u32x4)b);
 }
 
-// i32x4 wasm_i32x4_ge(i32x4 a, i32x4 b)
-static __inline__ u32x4 __DEFAULT_FN_ATTRS wasm_i32x4_ge(i32x4 a, i32x4 b) {
-  return (u32x4)(a >= b);
+// v128_t wasm_i32x4_ge(v128_t a, v128_t b)
+static __inline__ v128_t __DEFAULT_FN_ATTRS wasm_i32x4_ge(v128_t a, v128_t b) {
+  return (v128_t)((__i32x4)a >= (__i32x4)b);
 }
 
-// i32x4 wasm_u32x4_ge(u32x4 a, u32x4 b)
-static __inline__ u32x4 __DEFAULT_FN_ATTRS wasm_u32x4_ge(u32x4 a, u32x4 b) {
-  return (u32x4)(a >= b);
+// v128_t wasm_u32x4_ge(v128_t a, v128_t b)
+static __inline__ v128_t __DEFAULT_FN_ATTRS wasm_u32x4_ge(v128_t a, v128_t b) {
+  return (v128_t)((__u32x4)a >= (__u32x4)b);
 }
 
-// i32x4 wasm_f32x4_eq(f32x4 a f32x4 b)
-static __inline__ u32x4 __DEFAULT_FN_ATTRS wasm_f32x4_eq(f32x4 a, f32x4 b) {
-  return (u32x4)(a == b);
+// v128_t wasm_f32x4_eq(v128_t a, v128_t b)
+static __inline__ v128_t __DEFAULT_FN_ATTRS wasm_f32x4_eq(v128_t a, v128_t b) {
+  return (v128_t)((__f32x4)a == (__f32x4)b);
 }
 
-// i32x4 wasm_f32x4_ne(f32x4 a, f32x4 b)
-static __inline__ u32x4 __DEFAULT_FN_ATTRS wasm_f32x4_ne(f32x4 a, f32x4 b) {
-  return (u32x4)(a != b);
+// v128_t wasm_f32x4_ne(v128_t a, v128_t b)
+static __inline__ v128_t __DEFAULT_FN_ATTRS wasm_f32x4_ne(v128_t a, v128_t b) {
+  return (v128_t)((__f32x4)a != (__f32x4)b);
 }
 
-// i32x4 wasm_f32x4_lt(f32x4 a, f32x4 b)
-static __inline__ u32x4 __DEFAULT_FN_ATTRS wasm_f32x4_lt(f32x4 a, f32x4 b) {
-  return (u32x4)(a < b);
+// v128_t wasm_f32x4_lt(v128_t a, v128_t b)
+static __inline__ v128_t __DEFAULT_FN_ATTRS wasm_f32x4_lt(v128_t a, v128_t b) {
+  return (v128_t)((__f32x4)a < (__f32x4)b);
 }
 
-// f32x4 wasm_f32x4_gt(f32x4 a, f32x4 b)
-static __inline__ u32x4 __DEFAULT_FN_ATTRS wasm_f32x4_gt(f32x4 a, f32x4 b) {
-  return (u32x4)(a > b);
+// v128_t wasm_f32x4_gt(v128_t a, v128_t b)
+static __inline__ v128_t __DEFAULT_FN_ATTRS wasm_f32x4_gt(v128_t a, v128_t b) {
+  return (v128_t)((__f32x4)a > (__f32x4)b);
 }
 
-// i32x4 wasm_f32x4_le(f32x4 a, f32x4 b)
-static __inline__ u32x4 __DEFAULT_FN_ATTRS wasm_f32x4_le(f32x4 a, f32x4 b) {
-  return (u32x4)(a <= b);
+// v128_t wasm_f32x4_le(v128_t a, v128_t b)
+static __inline__ v128_t __DEFAULT_FN_ATTRS wasm_f32x4_le(v128_t a, v128_t b) {
+  return (v128_t)((__f32x4)a <= (__f32x4)b);
 }
 
-// i32x4 wasm_f32x4_ge(f32x4 a, f32x4 b)
-static __inline__ u32x4 __DEFAULT_FN_ATTRS wasm_f32x4_ge(f32x4 a, f32x4 b) {
-  return (u32x4)(a >= b);
+// v128_t wasm_f32x4_ge(v128_t a, v128_t b)
+static __inline__ v128_t __DEFAULT_FN_ATTRS wasm_f32x4_ge(v128_t a, v128_t b) {
+  return (v128_t)((__f32x4)a >= (__f32x4)b);
 }
 
 #ifdef __wasm_unimplemented_simd128__
 
-// i64x2 wasm_f64x2_eq(f64x2 a, f64x2 b)
-static __inline__ u64x2 __DEFAULT_FN_ATTRS wasm_f64x2_eq(f64x2 a, f64x2 b) {
-  return (u64x2)(a == b);
+// v128_t wasm_f64x2_eq(v128_t a, v128_t b)
+static __inline__ v128_t __DEFAULT_FN_ATTRS wasm_f64x2_eq(v128_t a, v128_t b) {
+  return (v128_t)((__f64x2)a == (__f64x2)b);
 }
 
-// i64x2 wasm_f64x2_ne(f64x2 a, f64x2 b)
-static __inline__ u64x2 __DEFAULT_FN_ATTRS wasm_f64x2_ne(f64x2 a, f64x2 b) {
-  return (u64x2)(a != b);
+// v128_t wasm_f64x2_ne(v128_t a, v128_t b)
+static __inline__ v128_t __DEFAULT_FN_ATTRS wasm_f64x2_ne(v128_t a, v128_t b) {
+  return (v128_t)((__f64x2)a != (__f64x2)b);
 }
 
-// i64x2 wasm_f64x2_lt(f64x2 a, f64x2 b)
-static __inline__ u64x2 __DEFAULT_FN_ATTRS wasm_f64x2_lt(f64x2 a, f64x2 b) {
-  return (u64x2)(a < b);
+// v128_t wasm_f64x2_lt(v128_t a, v128_t b)
+static __inline__ v128_t __DEFAULT_FN_ATTRS wasm_f64x2_lt(v128_t a, v128_t b) {
+  return (v128_t)((__f64x2)a < (__f64x2)b);
 }
 
-// i64x2 wasm_f64x2_gt(f64x2 a, f64x2 b)
-static __inline__ u64x2 __DEFAULT_FN_ATTRS wasm_f64x2_gt(f64x2 a, f64x2 b) {
-  return (u64x2)(a > b);
+// v128_t wasm_f64x2_gt(v128_t a, v128_t b)
+static __inline__ v128_t __DEFAULT_FN_ATTRS wasm_f64x2_gt(v128_t a, v128_t b) {
+  return (v128_t)((__f64x2)a > (__f64x2)b);
 }
 
-// i64x2 wasm_f64x2_le(f64x2 a, f64x2 b)
-static __inline__ u64x2 __DEFAULT_FN_ATTRS wasm_f64x2_le(f64x2 a, f64x2 b) {
-  return (u64x2)(a <= b);
+// v128_t wasm_f64x2_le(v128_t a, v128_t b)
+static __inline__ v128_t __DEFAULT_FN_ATTRS wasm_f64x2_le(v128_t a, v128_t b) {
+  return (v128_t)((__f64x2)a <= (__f64x2)b);
 }
 
-// i64x2 wasm_f64x2_ge(f64x2 a, f64x2 b)
-static __inline__ u64x2 __DEFAULT_FN_ATTRS wasm_f64x2_ge(f64x2 a, f64x2 b) {
-  return (u64x2)(a >= b);
+// v128_t wasm_f64x2_ge(v128_t a, v128_t b)
+static __inline__ v128_t __DEFAULT_FN_ATTRS wasm_f64x2_ge(v128_t a, v128_t b) {
+  return (v128_t)((__f64x2)a >= (__f64x2)b);
 }
 
 #endif // __wasm_unimplemented_simd128__
 
-// v128 wasm_v128_not(v128 a)
-static __inline__ v128 __DEFAULT_FN_ATTRS wasm_v128_not(v128 a) {
-  return ~a;
-}
+// v128_t wasm_v128_not(v128 a)
+static __inline__ v128_t __DEFAULT_FN_ATTRS wasm_v128_not(v128_t a) { return ~a; }
 
-// v128 wasm_v128_and(v128 a, v128 b)
-static __inline__ v128 __DEFAULT_FN_ATTRS wasm_v128_and(v128 a, v128 b) {
-  return a & b;
-}
+// v128_t wasm_v128_and(v128_t a, v128_t b)
+static __inline__ v128_t __DEFAULT_FN_ATTRS wasm_v128_and(v128_t a, v128_t b) { return a & b; }
 
-// v128 wasm_v128_or(v128 a, v128 b)
-static __inline__ v128 __DEFAULT_FN_ATTRS wasm_v128_or(v128 a, v128 b) {
-  return a | b;
-}
+// v128_t wasm_v128_or(v128_t a, v128_t b)
+static __inline__ v128_t __DEFAULT_FN_ATTRS wasm_v128_or(v128_t a, v128_t b) { return a | b; }
 
-// v128 wasm_v128_xor(v128 a, v128 b)
-static __inline__ v128 __DEFAULT_FN_ATTRS wasm_v128_xor(v128 a, v128 b) {
-  return a ^ b;
-}
+// v128_t wasm_v128_xor(v128_t a, v128_t b)
+static __inline__ v128_t __DEFAULT_FN_ATTRS wasm_v128_xor(v128_t a, v128_t b) { return a ^ b; }
 
-// v128 wasm_v128_bitselect(v128 a, v128 b, v128 c)
-static __inline__ v128 __DEFAULT_FN_ATTRS wasm_v128_bitselect(v128 a, v128 b, v128 c) {
-  return (v128)__builtin_wasm_bitselect((__i32x4)a, (__i32x4)b, (__i32x4)c);
+// v128_t wasm_v128_bitselect(v128_t a, v128_t b, v128_t c)
+static __inline__ v128_t __DEFAULT_FN_ATTRS wasm_v128_bitselect(v128_t a, v128_t b, v128_t c) {
+  return (v128_t)__builtin_wasm_bitselect((__i32x4)a, (__i32x4)b, (__i32x4)c);
 }
 
-// i8x16 wasm_i8x16_neg(i8x16 a)
-static __inline__ i8x16 __DEFAULT_FN_ATTRS wasm_i8x16_neg(i8x16 a) {
-  return -a;
+// v128_t wasm_i8x16_neg(v128_t a)
+static __inline__ v128_t __DEFAULT_FN_ATTRS wasm_i8x16_neg(v128_t a) {
+  return (v128_t)(-(__i8x16)a);
 }
 
-// bool wasm_i8x16_any_true(i8x16 a)
-static __inline__ bool __DEFAULT_FN_ATTRS wasm_i8x16_any_true(i8x16 a) {
+// bool wasm_i8x16_any_true(v128_t a)
+static __inline__ bool __DEFAULT_FN_ATTRS wasm_i8x16_any_true(v128_t a) {
   return __builtin_wasm_any_true_i8x16((__i8x16)a);
 }
 
-// bool wasm_i8x16_all_true(i8x16 a)
-static __inline__ bool __DEFAULT_FN_ATTRS wasm_i8x16_all_true(i8x16 a) {
+// bool wasm_i8x16_all_true(v128_t a)
+static __inline__ bool __DEFAULT_FN_ATTRS wasm_i8x16_all_true(v128_t a) {
   return __builtin_wasm_all_true_i8x16((__i8x16)a);
 }
 
-// i8x16 wasm_i8x16_shl(i8x16 a, int32_t b)
-static __inline__ i8x16 __DEFAULT_FN_ATTRS wasm_i8x16_shl(i8x16 a, int32_t b) {
-  return a << b;
+// v128_t wasm_i8x16_shl(v128_t a, int32_t b)
+static __inline__ v128_t __DEFAULT_FN_ATTRS wasm_i8x16_shl(v128_t a, int32_t b) {
+  return (v128_t)((__i8x16)a << b);
 }
 
-// i8x16 wasm_i8x64_shr(i8x16 a, int32_t b)
-static __inline__ i8x16 __DEFAULT_FN_ATTRS wasm_i8x16_shr(i8x16 a, int32_t b) {
-  return a >> b;
+// v128_t wasm_i8x64_shr(v128_t a, int32_t b)
+static __inline__ v128_t __DEFAULT_FN_ATTRS wasm_i8x16_shr(v128_t a, int32_t b) {
+  return (v128_t)((__i8x16)a >> b);
 }
 
-// u8x16 wasm_u8x16_shr(u8x16 a int32_t b)
-static __inline__ u8x16 __DEFAULT_FN_ATTRS wasm_u8x16_shr(u8x16 a, int32_t b) {
-  return a >> b;
+// v128_t wasm_u8x16_shr(v128_t a, int32_t b)
+static __inline__ v128_t __DEFAULT_FN_ATTRS wasm_u8x16_shr(v128_t a, int32_t b) {
+  return (v128_t)((__u8x16)a >> b);
 }
 
-// i8x16 wasm_i8x16_add(i8x16 a i8x16 b)
-static __inline__ i8x16 __DEFAULT_FN_ATTRS wasm_i8x16_add(i8x16 a, i8x16 b) {
-  return a + b;
+// v128_t wasm_i8x16_add(v128_t a, v128_t b)
+static __inline__ v128_t __DEFAULT_FN_ATTRS wasm_i8x16_add(v128_t a, v128_t b) {
+  return (v128_t)((__u8x16)a + (__u8x16)b);
 }
 
-// i8x16 wasm_add_saturate(i8x16 a, i8x16 b)
-static __inline__ i8x16 __DEFAULT_FN_ATTRS wasm_i8x16_add_saturate(i8x16 a, i8x16 b) {
-  return (i8x16) __builtin_wasm_add_saturate_s_i8x16((__i8x16)a, (__i8x16)b);
+// v128_t wasm_add_saturate(v128_t a, v128_t b)
+static __inline__ v128_t __DEFAULT_FN_ATTRS wasm_i8x16_add_saturate(v128_t a, v128_t b) {
+  return (v128_t)__builtin_wasm_add_saturate_s_i8x16((__i8x16)a, (__i8x16)b);
 }
 
-// u8x16 wasm_add_saturate(u8x16 a, u8x16 b)
-static __inline__ u8x16 __DEFAULT_FN_ATTRS wasm_u8x16_add_saturate(u8x16 a, u8x16 b) {
-  return (u8x16)__builtin_wasm_add_saturate_u_i8x16((__i8x16)a, (__i8x16)b);
+// v128_t wasm_add_saturate(v128_t a, v128_t b)
+static __inline__ v128_t __DEFAULT_FN_ATTRS wasm_u8x16_add_saturate(v128_t a, v128_t b) {
+  return (v128_t)__builtin_wasm_add_saturate_u_i8x16((__i8x16)a, (__i8x16)b);
 }
 
-// i8x16 wasm_i8x16_sub(i8x16 a, i8x16 b)
-static __inline__ i8x16 __DEFAULT_FN_ATTRS wasm_i8x16_sub(i8x16 a, i8x16 b) {
-  return a - b;
+// v128_t wasm_i8x16_sub(v128_t a, v128_t b)
+static __inline__ v128_t __DEFAULT_FN_ATTRS wasm_i8x16_sub(v128_t a, v128_t b) {
+  return (v128_t)((__u8x16)a - (__u8x16)b);
 }
 
-// i8x16 wasm_sub_saturate(i8x16 a, i8x16 b)
-static __inline__ i8x16 __DEFAULT_FN_ATTRS wasm_i8x16_sub_saturate(i8x16 a, i8x16 b) {
-  return (i8x16)__builtin_wasm_sub_saturate_s_i8x16((__i8x16)a, (__i8x16)b);
+// v128_t wasm_sub_saturate(v128_t a, v128_t b)
+static __inline__ v128_t __DEFAULT_FN_ATTRS wasm_i8x16_sub_saturate(v128_t a, v128_t b) {
+  return (v128_t)__builtin_wasm_sub_saturate_s_i8x16((__i8x16)a, (__i8x16)b);
 }
 
-// u8x16 wasm_sub_saturate(u8x16 a, u8x16 b)
-static __inline__ u8x16 __DEFAULT_FN_ATTRS wasm_u8x16_sub_saturate(u8x16 a, u8x16 b) {
-  return (u8x16)__builtin_wasm_sub_saturate_u_i8x16((__i8x16)a, (__i8x16)b);
+// v128_t wasm_sub_saturate(v128_t a, v128_t b)
+static __inline__ v128_t __DEFAULT_FN_ATTRS wasm_u8x16_sub_saturate(v128_t a, v128_t b) {
+  return (v128_t)__builtin_wasm_sub_saturate_u_i8x16((__i8x16)a, (__i8x16)b);
 }
 
-// i8x16 wasm_i8x16_mul(i8x16 a i8x16 b)
-static __inline__ i8x16 __DEFAULT_FN_ATTRS wasm_i8x16_mul(i8x16 a, i8x16 b) {
-  return a * b;
+// v128_t wasm_i8x16_mul(v128_t a, v128_t b)
+static __inline__ v128_t __DEFAULT_FN_ATTRS wasm_i8x16_mul(v128_t a, v128_t b) {
+  return (v128_t)((__i8x16)a * (__i8x16)b);
 }
 
-// i16x8 wasm_i16x8_neg(i16x8 a)
-static __inline__ i16x8 __DEFAULT_FN_ATTRS wasm_i16x8_neg(i16x8 a) {
-  return -a;
+// v128_t wasm_i16x8_neg(v128_t a)
+static __inline__ v128_t __DEFAULT_FN_ATTRS wasm_i16x8_neg(v128_t a) {
+  return (v128_t)(-(__i16x8)a);
 }
 
-// bool wasm_i16x8_any_true(i16x8 a)
-static __inline__ bool __DEFAULT_FN_ATTRS wasm_i16x8_any_true(i16x8 a) {
+// bool wasm_i16x8_any_true(v128_t a)
+static __inline__ bool __DEFAULT_FN_ATTRS wasm_i16x8_any_true(v128_t a) {
   return __builtin_wasm_any_true_i16x8((__i16x8)a);
 }
 
-// bool wasm_i16x8_all_true(i16x8 a)
-static __inline__ bool __DEFAULT_FN_ATTRS wasm_i16x8_all_true(i16x8 a) {
+// bool wasm_i16x8_all_true(v128_t a)
+static __inline__ bool __DEFAULT_FN_ATTRS wasm_i16x8_all_true(v128_t a) {
   return __builtin_wasm_all_true_i16x8((__i16x8)a);
 }
 
-// i16x8 wasm_i16x8_shl(i16x8 a, int32_t b)
-static __inline__ i16x8 __DEFAULT_FN_ATTRS wasm_i16x8_shl(i16x8 a, int32_t b) {
-  return a << b;
+// v128_t wasm_i16x8_shl(v128_t a, int32_t b)
+static __inline__ v128_t __DEFAULT_FN_ATTRS wasm_i16x8_shl(v128_t a, int32_t b) {
+  return (v128_t)((__i16x8)a << b);
 }
 
-// i16x8 wasm_i16x8_shr(i16x8 a, int32_t b)
-static __inline__ i16x8 __DEFAULT_FN_ATTRS wasm_i16x8_shr(i16x8 a, int32_t b) {
-  return a >> b;
+// v128_t wasm_i16x8_shr(v128_t a, int32_t b)
+static __inline__ v128_t __DEFAULT_FN_ATTRS wasm_i16x8_shr(v128_t a, int32_t b) {
+  return (v128_t)((__i16x8)a >> b);
 }
 
-// u16x8 wasm_u16x8_shr(u16x8 a, int32_t b)
-static __inline__ u16x8 __DEFAULT_FN_ATTRS wasm_u16x8_shr(u16x8 a, int32_t b) {
-  return a >> b;
+// v128_t wasm_u16x8_shr(v128_t a, int32_t b)
+static __inline__ v128_t __DEFAULT_FN_ATTRS wasm_u16x8_shr(v128_t a, int32_t b) {
+  return (v128_t)((__u16x8)a >> b);
 }
 
-// i16x8 wasm_i16x8_add(i16x8 a i16x8 b)
-static __inline__ i16x8 __DEFAULT_FN_ATTRS wasm_i16x8_add(i16x8 a, i16x8 b) {
-  return a + b;
+// v128_t wasm_i16x8_add(v128_t a, v128_t b)
+static __inline__ v128_t __DEFAULT_FN_ATTRS wasm_i16x8_add(v128_t a, v128_t b) {
+  return (v128_t)((__u16x8)a + (__u16x8)b);
 }
 
-// i16x8 wasm_add_saturate(i16x8 a, i16x8 b)
-static __inline__ i16x8 __DEFAULT_FN_ATTRS wasm_i16x8_add_saturate(i16x8 a, i16x8 b) {
-  return (i16x8) __builtin_wasm_add_saturate_s_i16x8((__i16x8)a, (__i16x8)b);
+// v128_t wasm_add_saturate(v128_t a, v128_t b)
+static __inline__ v128_t __DEFAULT_FN_ATTRS wasm_i16x8_add_saturate(v128_t a, v128_t b) {
+  return (v128_t)__builtin_wasm_add_saturate_s_i16x8((__i16x8)a, (__i16x8)b);
 }
 
-// u16x8 wasm_add_saturate(u16x8 a, u16x8 b)
-static __inline__ u16x8 __DEFAULT_FN_ATTRS wasm_u16x8_add_saturate(u16x8 a, u16x8 b) {
-  return (u16x8)__builtin_wasm_add_saturate_u_i16x8((__i16x8)a, (__i16x8)b);
+// v128_t wasm_add_saturate(v128_t a, v128_t b)
+static __inline__ v128_t __DEFAULT_FN_ATTRS wasm_u16x8_add_saturate(v128_t a, v128_t b) {
+  return (v128_t)__builtin_wasm_add_saturate_u_i16x8((__i16x8)a, (__i16x8)b);
 }
 
-// i16x8 wasm_i16x8_sub(i16x8 a i16x8 b)
-static __inline__ i16x8 __DEFAULT_FN_ATTRS wasm_i16x8_sub(i16x8 a, i16x8 b) {
-  return a - b;
+// v128_t wasm_i16x8_sub(v128_t a, v128_t b)
+static __inline__ v128_t __DEFAULT_FN_ATTRS wasm_i16x8_sub(v128_t a, v128_t b) {
+  return (v128_t)((__i16x8)a - (__i16x8)b);
 }
 
-// i16x8 wasm_sub_saturate(i16x8 a, i16x8 b)
-static __inline__ i16x8 __DEFAULT_FN_ATTRS wasm_i16x8_sub_saturate(i16x8 a, i16x8 b) {
-  return (i16x8)__builtin_wasm_sub_saturate_s_i16x8((__i16x8)a, (__i16x8)b);
+// v128_t wasm_sub_saturate(v128_t a, v128_t b)
+static __inline__ v128_t __DEFAULT_FN_ATTRS wasm_i16x8_sub_saturate(v128_t a, v128_t b) {
+  return (v128_t)__builtin_wasm_sub_saturate_s_i16x8((__i16x8)a, (__i16x8)b);
 }
 
-// u16x8 wasm_sub_saturate(u16x8 a, u16x8 b)
-static __inline__ u16x8 __DEFAULT_FN_ATTRS wasm_u16x8_sub_saturate(u16x8 a, u16x8 b) {
-  return (u16x8)__builtin_wasm_sub_saturate_u_i16x8((__i16x8)a, (__i16x8)b);
+// v128_t wasm_sub_saturate(v128_t a, v128_t b)
+static __inline__ v128_t __DEFAULT_FN_ATTRS wasm_u16x8_sub_saturate(v128_t a, v128_t b) {
+  return (v128_t)__builtin_wasm_sub_saturate_u_i16x8((__i16x8)a, (__i16x8)b);
 }
 
-// i16x8 wasm_i16x8_mul(i16x8 a i16x8 b)
-static __inline__ i16x8 __DEFAULT_FN_ATTRS wasm_i16x8_mul(i16x8 a, i16x8 b) {
-  return a * b;
+// v128_t wasm_i16x8_mul(v128_t a v128_t b)
+static __inline__ v128_t __DEFAULT_FN_ATTRS wasm_i16x8_mul(v128_t a, v128_t b) {
+  return (v128_t)((__i16x8)a * (__i16x8)b);
 }
 
-// i32x4 wasm_i32x4_neg(i32x4 a)
-static __inline__ i32x4 __DEFAULT_FN_ATTRS wasm_i32x4_neg(i32x4 a) {
-  return -a;
+// v128_t wasm_i32x4_neg(v128_t a)
+static __inline__ v128_t __DEFAULT_FN_ATTRS wasm_i32x4_neg(v128_t a) {
+  return (v128_t)(-(__i32x4)a);
 }
 
-// bool wasm_i32x4_any_true(i32x4 a)
-static __inline__ bool __DEFAULT_FN_ATTRS wasm_i32x4_any_true(i32x4 a) {
+// bool wasm_i32x4_any_true(v128_t a)
+static __inline__ bool __DEFAULT_FN_ATTRS wasm_i32x4_any_true(v128_t a) {
   return __builtin_wasm_any_true_i32x4((__i32x4)a);
 }
 
-// bool wasm_i32x4_all_true(i32x4 a)
-static __inline__ bool __DEFAULT_FN_ATTRS wasm_i32x4_all_true(i32x4 a) {
+// bool wasm_i32x4_all_true(v128_t a)
+static __inline__ bool __DEFAULT_FN_ATTRS wasm_i32x4_all_true(v128_t a) {
   return __builtin_wasm_all_true_i32x4((__i32x4)a);
 }
 
-// i32x4 wasm_i32x4_shl(i32x4 a, int32_t b)
-static __inline__ i32x4 __DEFAULT_FN_ATTRS wasm_i32x4_shl(i32x4 a, int32_t b) {
-  return a << b;
+// v128_t wasm_i32x4_shl(v128_t a, int32_t b)
+static __inline__ v128_t __DEFAULT_FN_ATTRS wasm_i32x4_shl(v128_t a, int32_t b) {
+  return (v128_t)((__i32x4)a << b);
 }
 
-// i32x4 wasm_i32x4_shr(i32x4 a, int32_t b)
-static __inline__ i32x4 __DEFAULT_FN_ATTRS wasm_i32x4_shr(i32x4 a, int32_t b) {
-  return a >> b;
+// v128_t wasm_i32x4_shr(v128_t a, int32_t b)
+static __inline__ v128_t __DEFAULT_FN_ATTRS wasm_i32x4_shr(v128_t a, int32_t b) {
+  return (v128_t)((__i32x4)a >> b);
 }
 
-// u32x4 wasm_u32x4_shr(u32x4 a, int32_t b)
-static __inline__ u32x4 __DEFAULT_FN_ATTRS wasm_u32x4_shr(u32x4 a, int32_t b) {
-  return a >> b;
+// v128_t wasm_u32x4_shr(v128_t a, int32_t b)
+static __inline__ v128_t __DEFAULT_FN_ATTRS wasm_u32x4_shr(v128_t a, int32_t b) {
+  return (v128_t)((__u32x4)a >> b);
 }
 
-// i32x4 wasm_i32x4_add(i32x4 a i32x4 b)
-static __inline__ i32x4 __DEFAULT_FN_ATTRS wasm_i32x4_add(i32x4 a, i32x4 b) {
-  return a + b;
+// v128_t wasm_i32x4_add(v128_t a v128_t b)
+static __inline__ v128_t __DEFAULT_FN_ATTRS wasm_i32x4_add(v128_t a, v128_t b) {
+  return (v128_t)((__u32x4)a + (__u32x4)b);
 }
 
-// i32x4 wasm_i32x4_sub(i32x4 a i32x4 b)
-static __inline__ i32x4 __DEFAULT_FN_ATTRS wasm_i32x4_sub(i32x4 a, i32x4 b) {
-  return a - b;
+// v128_t wasm_i32x4_sub(v128_t a v128_t b)
+static __inline__ v128_t __DEFAULT_FN_ATTRS wasm_i32x4_sub(v128_t a, v128_t b) {
+  return (v128_t)((__u32x4)a - (__u32x4)b);
 }
 
-// i32x4 wasm_i32x4_mul(i32x4 a i32x4 b)
-static __inline__ i32x4 __DEFAULT_FN_ATTRS wasm_i32x4_mul(i32x4 a, i32x4 b) {
-  return a * b;
+// v128_t wasm_i32x4_mul(v128_t a v128_t b)
+static __inline__ v128_t __DEFAULT_FN_ATTRS wasm_i32x4_mul(v128_t a, v128_t b) {
+  return (v128_t)((__i32x4)a * (__i32x4)b);
 }
 
 #ifdef __wasm_unimplemented_simd128__
 
-// i64x2 wasm_i64x2_neg(i64x2 a)
-static __inline__ i64x2 __DEFAULT_FN_ATTRS wasm_i64x2_neg(i64x2 a) {
-  return -a;
+// v128_t wasm_i64x2_neg(v128_t a)
+static __inline__ v128_t __DEFAULT_FN_ATTRS wasm_i64x2_neg(v128_t a) {
+  return (v128_t)(-(__i64x2)a);
 }
 
-// bool wasm_i64x2_any_true(i64x2 a)
-static __inline__ bool __DEFAULT_FN_ATTRS wasm_i64x2_any_true(i64x2 a) {
+// bool wasm_i64x2_any_true(v128_t a)
+static __inline__ bool __DEFAULT_FN_ATTRS wasm_i64x2_any_true(v128_t a) {
   return __builtin_wasm_any_true_i64x2((__i64x2)a);
 }
 
-// bool wasm_i64x2_all_true(i64x2 a)
-static __inline__ bool __DEFAULT_FN_ATTRS wasm_i64x2_all_true(i64x2 a) {
+// bool wasm_i64x2_all_true(v128_t a)
+static __inline__ bool __DEFAULT_FN_ATTRS wasm_i64x2_all_true(v128_t a) {
   return __builtin_wasm_all_true_i64x2((__i64x2)a);
 }
 
-// i64x2 wasm_i64x2_shl(i64x2 a, int32_t b)
-static __inline__ i64x2 __DEFAULT_FN_ATTRS wasm_i64x2_shl(i64x2 a, int32_t b) {
-  return a << b;
+// v128_t wasm_i64x2_shl(v128_t a, int32_t b)
+static __inline__ v128_t __DEFAULT_FN_ATTRS wasm_i64x2_shl(v128_t a, int32_t b) {
+  return (v128_t)((__i64x2)a << b);
 }
 
-// i64x2 wasm_i64x2_shr(i64x2 a, int32_t b)
-static __inline__ i64x2 __DEFAULT_FN_ATTRS wasm_i64x2_shr(i64x2 a, int32_t b) {
-  return a >> b;
+// v128_t wasm_i64x2_shr(v128_t a, int32_t b)
+static __inline__ v128_t __DEFAULT_FN_ATTRS wasm_i64x2_shr(v128_t a, int32_t b) {
+  return (v128_t)((__i64x2)a >> b);
 }
 
-// u64x2 wasm_u64x2_shr_u(u64x2 a, int32_t b)
-static __inline__ u64x2 __DEFAULT_FN_ATTRS wasm_u64x2_shr(u64x2 a, int32_t b) {
-  return a >> b;
+// v128_t wasm_u64x2_shr_u(v128_t a, int32_t b)
+static __inline__ v128_t __DEFAULT_FN_ATTRS wasm_u64x2_shr(v128_t a, int32_t b) {
+  return (v128_t)((__u64x2)a >> b);
 }
 
-// i8x16 wasm_i64x2_add(i8x16 a i8x16 b)
-static __inline__ i64x2 __DEFAULT_FN_ATTRS wasm_i64x2_add(i64x2 a, i64x2 b) {
-  return a + b;
+// v128_t wasm_i64x2_add(v128_t a v128_t b)
+static __inline__ v128_t __DEFAULT_FN_ATTRS wasm_i64x2_add(v128_t a, v128_t b) {
+  return (v128_t)((__u64x2)a + (__u64x2)b);
 }
 
-// i64x2 wasm_i64x2_sub(i64x2 a i64x2 b)
-static __inline__ i64x2 __DEFAULT_FN_ATTRS wasm_i64x2_sub(i64x2 a, i64x2 b) {
-  return a - b;
+// v128_t wasm_i64x2_sub(v128_t a v128_t b)
+static __inline__ v128_t __DEFAULT_FN_ATTRS wasm_i64x2_sub(v128_t a, v128_t b) {
+  return (v128_t)((__u64x2)a - (__u64x2)b);
 }
 
 #endif // __wasm_unimplemented_simd128__
 
-// f32x4  wasm_f32x4_abs(f32x4 a)
-static __inline__ f32x4 __DEFAULT_FN_ATTRS wasm_f32x4_abs(f32x4 a) {
-  return __builtin_wasm_abs_f32x4(a);
+// v128_t  wasm_f32x4_abs(v128_t a)
+static __inline__ v128_t __DEFAULT_FN_ATTRS wasm_f32x4_abs(v128_t a) {
+  return (v128_t)__builtin_wasm_abs_f32x4((__f32x4)a);
 }
 
-// f32x4 wasm_f32x4_neg(f32x4 a)
-static __inline__ f32x4 __DEFAULT_FN_ATTRS wasm_f32x4_neg(f32x4 a) {
-  return -a;
+// v128_t wasm_f32x4_neg(v128_t a)
+static __inline__ v128_t __DEFAULT_FN_ATTRS wasm_f32x4_neg(v128_t a) {
+  return (v128_t)(-(__f32x4)a);
 }
 
-// f32x4 wasm_f32x4_sqrt(f32x4 a)
-static __inline__ f32x4 __DEFAULT_FN_ATTRS wasm_f32x4_sqrt(f32x4 a) {
-  return __builtin_wasm_sqrt_f32x4(a);
+// v128_t wasm_f32x4_sqrt(v128_t a)
+static __inline__ v128_t __DEFAULT_FN_ATTRS wasm_f32x4_sqrt(v128_t a) {
+  return (v128_t)__builtin_wasm_sqrt_f32x4((__f32x4)a);
 }
 
-// f32x4 wasm_f32x4_add(f32x4 a f32x4 b)
-static __inline__ f32x4 __DEFAULT_FN_ATTRS wasm_f32x4_add(f32x4 a, f32x4 b) {
-  return a + b;
+// v128_t wasm_f32x4_add(v128_t a v128_t b)
+static __inline__ v128_t __DEFAULT_FN_ATTRS wasm_f32x4_add(v128_t a, v128_t b) {
+  return (v128_t)((__f32x4)a + (__f32x4)b);
 }
 
-// f32x4 wasm_f32x4_sub(f32x4 a f32x4 b)
-static __inline__ f32x4 __DEFAULT_FN_ATTRS wasm_f32x4_sub(f32x4 a, f32x4 b) {
-  return a - b;
+// v128_t wasm_f32x4_sub(v128_t a v128_t b)
+static __inline__ v128_t __DEFAULT_FN_ATTRS wasm_f32x4_sub(v128_t a, v128_t b) {
+  return (v128_t)((__f32x4)a - (__f32x4)b);
 }
 
-// f32x4 wasm_f32x4_mul(f32x4 a f32x4 b)
-static __inline__ f32x4 __DEFAULT_FN_ATTRS wasm_f32x4_mul(f32x4 a, f32x4 b) {
-  return a * b;
+// v128_t wasm_f32x4_mul(v128_t a v128_t b)
+static __inline__ v128_t __DEFAULT_FN_ATTRS wasm_f32x4_mul(v128_t a, v128_t b) {
+  return (v128_t)((__f32x4)a * (__f32x4)b);
 }
 
-// f32x4 wasm_f32x4_div(f32x4 a f32x4 b)
-static __inline__ f32x4 __DEFAULT_FN_ATTRS wasm_f32x4_div(f32x4 a, f32x4 b) {
-  return a / b;
+// v128_t wasm_f32x4_div(v128_t a v128_t b)
+static __inline__ v128_t __DEFAULT_FN_ATTRS wasm_f32x4_div(v128_t a, v128_t b) {
+  return (v128_t)((__f32x4)a / (__f32x4)b);
 }
 
-// f32x4 wasm_f32x4_min(f32x4 a f32x4 b)
-static __inline__ f32x4 __DEFAULT_FN_ATTRS wasm_f32x4_min(f32x4 a, f32x4 b) {
-  return __builtin_wasm_min_f32x4(a, b);
+// v128_t wasm_f32x4_min(v128_t a v128_t b)
+static __inline__ v128_t __DEFAULT_FN_ATTRS wasm_f32x4_min(v128_t a, v128_t b) {
+  return (v128_t)__builtin_wasm_min_f32x4((__f32x4)a, (__f32x4)b);
 }
 
-// f32x4 wasm_f32x4_max(f32x4 a f32x4 b)
-static __inline__ f32x4 __DEFAULT_FN_ATTRS wasm_f32x4_max(f32x4 a, f32x4 b) {
-  return __builtin_wasm_max_f32x4(a, b);
+// v128_t wasm_f32x4_max(v128_t a v128_t b)
+static __inline__ v128_t __DEFAULT_FN_ATTRS wasm_f32x4_max(v128_t a, v128_t b) {
+  return (v128_t)__builtin_wasm_max_f32x4((__f32x4)a, (__f32x4)b);
 }
 
 #ifdef __wasm_unimplemented_simd128__
 
-// f64x2  wasm_f64x2_abs(f64x2 a)
-static __inline__ f64x2 __DEFAULT_FN_ATTRS wasm_f64x2_abs(f64x2 a) {
-  return __builtin_wasm_abs_f64x2(a);
+// v128_t  wasm_f64x2_abs(v128_t a)
+static __inline__ v128_t __DEFAULT_FN_ATTRS wasm_f64x2_abs(v128_t a) {
+  return (v128_t)__builtin_wasm_abs_f64x2((__f64x2)a);
 }
 
-// f64x2 wasm_f64x2_neg(f64x2 a)
-static __inline__ f64x2 __DEFAULT_FN_ATTRS wasm_f64x2_neg(f64x2 a) {
-  return -a;
+// v128_t wasm_f64x2_neg(v128_t a)
+static __inline__ v128_t __DEFAULT_FN_ATTRS wasm_f64x2_neg(v128_t a) {
+  return (v128_t)(-(__f64x2)a);
 }
 
-// f64x2  wasm_f64x2_sqrt(f64x2 a)
-static __inline__ f64x2 __DEFAULT_FN_ATTRS wasm_f64x2_sqrt(f64x2 a) {
-  return __builtin_wasm_sqrt_f64x2(a);
+// v128_t  wasm_f64x2_sqrt(v128_t a)
+static __inline__ v128_t __DEFAULT_FN_ATTRS wasm_f64x2_sqrt(v128_t a) {
+  return (v128_t)__builtin_wasm_sqrt_f64x2((__f64x2)a);
 }
 
-// f64x2 wasm_f64x2_add(f64x2 a f64x2 b)
-static __inline__ f64x2 __DEFAULT_FN_ATTRS wasm_f64x2_add(f64x2 a, f64x2 b) {
-  return a + b;
+// v128_t wasm_f64x2_add(v128_t a v128_t b)
+static __inline__ v128_t __DEFAULT_FN_ATTRS wasm_f64x2_add(v128_t a, v128_t b) {
+  return (v128_t)((__f64x2)a + (__f64x2)b);
 }
 
-// f64x2 wasm_f64x2_sub(f64x2 a f64x2 b)
-static __inline__ f64x2 __DEFAULT_FN_ATTRS wasm_f64x2_sub(f64x2 a, f64x2 b) {
-  return a - b;
+// v128_t wasm_f64x2_sub(v128_t a v128_t b)
+static __inline__ v128_t __DEFAULT_FN_ATTRS wasm_f64x2_sub(v128_t a, v128_t b) {
+  return (v128_t)((__f64x2)a - (__f64x2)b);
 }
 
-// f64x2 wasm_f64x2_mul(f64x2 a f64x2 b)
-static __inline__ f64x2 __DEFAULT_FN_ATTRS wasm_f64x2_mul(f64x2 a, f64x2 b) {
-  return a * b;
+// v128_t wasm_f64x2_mul(v128_t a v128_t b)
+static __inline__ v128_t __DEFAULT_FN_ATTRS wasm_f64x2_mul(v128_t a, v128_t b) {
+  return (v128_t)((__f64x2)a * (__f64x2)b);
 }
 
-// f64x2 wasm_f64x2_div(f64x2 a f64x2 b)
-static __inline__ f64x2 __DEFAULT_FN_ATTRS wasm_f64x2_div(f64x2 a, f64x2 b) {
-  return a / b;
+// v128_t wasm_f64x2_div(v128_t a v128_t b)
+static __inline__ v128_t __DEFAULT_FN_ATTRS wasm_f64x2_div(v128_t a, v128_t b) {
+  return (v128_t)((__f64x2)a / (__f64x2)b);
 }
 
-// f64x2 wasm_f64x2_min(f64x2 a f64x2 b)
-static __inline__ f64x2 __DEFAULT_FN_ATTRS wasm_f64x2_min(f64x2 a, f64x2 b) {
-  return __builtin_wasm_min_f64x2(a, b);
+// v128_t wasm_f64x2_min(v128_t a v128_t b)
+static __inline__ v128_t __DEFAULT_FN_ATTRS wasm_f64x2_min(v128_t a, v128_t b) {
+  return (v128_t)__builtin_wasm_min_f64x2((__f64x2)a, (__f64x2)b);
 }
 
-// f64x2 wasm_f64x2_max(f64x2 a f64x2 b)
-static __inline__ f64x2 __DEFAULT_FN_ATTRS wasm_f64x2_max(f64x2 a, f64x2 b) {
-  return __builtin_wasm_max_f64x2(a, b);
+// v128_t wasm_f64x2_max(v128_t a v128_t b)
+static __inline__ v128_t __DEFAULT_FN_ATTRS wasm_f64x2_max(v128_t a, v128_t b) {
+  return (v128_t)__builtin_wasm_max_f64x2((__f64x2)a, (__f64x2)b);
 }
 
 #endif // __wasm_unimplemented_simd128__
 
-static __inline__ i32x4 __DEFAULT_FN_ATTRS wasm_trunc_saturate_i32x4_f32x4(f32x4 a) {
-  return (i32x4)__builtin_wasm_trunc_saturate_s_i32x4_f32x4(a);
+static __inline__ v128_t __DEFAULT_FN_ATTRS wasm_trunc_saturate_i32x4_f32x4(v128_t a) {
+  return (v128_t)__builtin_wasm_trunc_saturate_s_i32x4_f32x4((__f32x4)a);
 }
 
-static __inline__ u32x4 __DEFAULT_FN_ATTRS wasm_trunc_saturate_u32x4_f32x4(f32x4 a) {
-  return (u32x4)__builtin_wasm_trunc_saturate_u_i32x4_f32x4(a);
+static __inline__ v128_t __DEFAULT_FN_ATTRS wasm_trunc_saturate_u32x4_f32x4(v128_t a) {
+  return (v128_t)__builtin_wasm_trunc_saturate_u_i32x4_f32x4((__f32x4)a);
 }
 
 #ifdef __wasm_unimplemented_simd128__
 
-static __inline__ i64x2 __DEFAULT_FN_ATTRS wasm_trunc_saturate_i64x2_f64x2(f64x2 a) {
-  return __builtin_wasm_trunc_saturate_s_i64x2_f64x2(a);
+static __inline__ v128_t __DEFAULT_FN_ATTRS wasm_trunc_saturate_i64x2_f64x2(v128_t a) {
+  return (v128_t)__builtin_wasm_trunc_saturate_s_i64x2_f64x2((__f64x2)a);
 }
 
-static __inline__ u64x2 __DEFAULT_FN_ATTRS wasm_trunc_saturate_u64x2_f64x2(f64x2 a) {
-  return (u64x2)__builtin_wasm_trunc_saturate_s_i64x2_f64x2(a);
+static __inline__ v128_t __DEFAULT_FN_ATTRS wasm_trunc_saturate_u64x2_f64x2(v128_t a) {
+  return (v128_t)__builtin_wasm_trunc_saturate_s_i64x2_f64x2((__f64x2)a);
 }
 
 #endif // __wasm_unimplemented_simd128__
 
-// f32x4 wasm_convert_f32x4_i32x4(i32x4 a)
-static __inline__ f32x4 __DEFAULT_FN_ATTRS wasm_convert_f32x4_i32x4(i32x4 v) {
-  return __builtin_convertvector(v, f32x4);
+// v128_t wasm_convert_f32x4_i32x4(v128_t a)
+static __inline__ v128_t __DEFAULT_FN_ATTRS wasm_convert_f32x4_i32x4(v128_t v) {
+  return (v128_t) __builtin_convertvector((__i32x4)v, __f32x4);
 }
 
-// f32x4 wasm_convert_f32x4_u32x4(u32x4 a)
-static __inline__ f32x4 __DEFAULT_FN_ATTRS wasm_convert_f32x4_u32x4(u32x4 v) {
-  return __builtin_convertvector(v, f32x4);
+// v128_t wasm_convert_f32x4_u32x4(v128_t a)
+static __inline__ v128_t __DEFAULT_FN_ATTRS wasm_convert_f32x4_u32x4(v128_t v) {
+  return (v128_t) __builtin_convertvector((__u32x4)v, __f32x4);
 }
 
 #ifdef __wasm_unimplemented_simd128__
 
-// f64x2 wasm_convert_f64x2_i64x2(i64x2 a)
-static __inline__ f64x2 __DEFAULT_FN_ATTRS wasm_convert_f64x2_i64x2(i64x2 v) {
-  return __builtin_convertvector(v, f64x2);
+// v128_t wasm_convert_f64x2_i64x2(v128_t a)
+static __inline__ v128_t __DEFAULT_FN_ATTRS wasm_convert_f64x2_i64x2(v128_t v) {
+  return (v128_t) __builtin_convertvector((__i64x2)v, __f64x2);
 }
 
-// f64x2 wasm_convert_f64x2_u64x2(u64x2 a)
-static __inline__ f64x2 __DEFAULT_FN_ATTRS wasm_convert_f64x2_u64x2(u64x2 v) {
-  return __builtin_convertvector(v, f64x2);
+// v128_t wasm_convert_f64x2_u64x2(v128_t a)
+static __inline__ v128_t __DEFAULT_FN_ATTRS wasm_convert_f64x2_u64x2(v128_t v) {
+  return (v128_t) __builtin_convertvector((__u64x2)v, __f64x2);
 }
 
 #endif // __wasm_unimplemented_simd128__
 
-#define wasm_v8x16_shuffle(a, b, c0, c1, c2, c3, c4, c5, c6, c7,        \
-                           c8, c9, c10, c11, c12, c13, c14, c15)        \
-  ((v128)(__builtin_shufflevector((v128)(a), (v128)(b),                 \
-                                  c0, c1, c2, c3, c4, c5, c6, c7,       \
-                                  c8, c9, c10, c11, c12, c13, c14, c15)))
+#define wasm_v8x16_shuffle(                                                                        \
+  a, b, c0, c1, c2, c3, c4, c5, c6, c7, c8, c9, c10, c11, c12, c13, c14, c15)                      \
+  ((v128_t)(__builtin_shufflevector((__u8x16)(a), (__u8x16)(b), c0, c1, c2, c3, c4, c5, c6, c7,    \
+    c8, c9, c10, c11, c12, c13, c14, c15)))
diff --git a/tests/test_core.py b/tests/test_core.py
index 7bf3d4e08cb7f..167be9115b014 100644
--- a/tests/test_core.py
+++ b/tests/test_core.py
@@ -5527,8 +5527,8 @@ def test_wasm_builtin_simd(self, js_engines):
     self.do_run(open(path_from_root('tests', 'test_wasm_builtin_simd.c')).read(), 'Success!',
                 js_engines=js_engines)
     self.emcc_args.append('-munimplemented-simd128')
-    self.do_run(open(path_from_root('tests', 'test_wasm_builtin_simd.c')).read(), 'Success!',
-                js_engines=[])
+    self.build(open(path_from_root('tests', 'test_wasm_builtin_simd.c')).read(),
+               self.get_dir(), os.path.join(self.get_dir(), 'src.cpp'))
 
   @wasm_simd
   def test_wasm_intrinsics_simd(self, js_engines):
@@ -5536,8 +5536,8 @@ def test_wasm_intrinsics_simd(self, js_engines):
     self.do_run(open(path_from_root('tests', 'test_wasm_intrinsics_simd.c')).read(), 'Success!',
                 js_engines=js_engines)
     self.emcc_args.append('-munimplemented-simd128')
-    self.do_run(open(path_from_root('tests', 'test_wasm_intrinsics_simd.c')).read(), 'Success!',
-                js_engines=[])
+    self.build(open(path_from_root('tests', 'test_wasm_intrinsics_simd.c')).read(),
+               self.get_dir(), os.path.join(self.get_dir(), 'src.cpp'))
 
   @asm_simd
   def test_simd(self):
@@ -6580,7 +6580,7 @@ class std_string {
   std::cout << txtTestString.data() << std::endl;
   return 0;
 }
-      ''', '''std_string(const char* s) 
+      ''', '''std_string(const char* s)
 someweirdtext
 212121
 212121
diff --git a/tests/test_wasm_intrinsics_simd.c b/tests/test_wasm_intrinsics_simd.c
index a5635c005a21c..071d57d12bab0 100644
--- a/tests/test_wasm_intrinsics_simd.c
+++ b/tests/test_wasm_intrinsics_simd.c
@@ -6,534 +6,534 @@
 
 #define TESTFN EMSCRIPTEN_KEEPALIVE __attribute__((noinline))
 
-i8x16 TESTFN i8x16_load(i8x16 *ptr) {
-  return (i8x16) wasm_v128_load(ptr);
+v128_t TESTFN i8x16_load(void *ptr) {
+  return wasm_v128_load(ptr);
 }
-void TESTFN i8x16_store(i8x16 *ptr, i8x16 vec) {
+void TESTFN i8x16_store(void *ptr, v128_t vec) {
   wasm_v128_store(ptr, vec);
 }
-v128 TESTFN i32x4_const(void) {
+v128_t TESTFN i32x4_const(void) {
   return wasm_v128_const(1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16);
 }
-v128 TESTFN i8x16_shuffle_interleave_bytes(i8x16 x, i8x16 y) {
+v128_t TESTFN i8x16_shuffle_interleave_bytes(v128_t x, v128_t y) {
   return wasm_v8x16_shuffle(x, y, 0, 17, 2, 19, 4, 21, 6, 23, 8, 25, 10, 27, 12, 29, 14, 31);
 }
-v128 TESTFN i32x4_shuffle_reverse(i32x4 vec) {
+v128_t TESTFN i32x4_shuffle_reverse(v128_t vec) {
   return wasm_v8x16_shuffle(vec, vec, 12, 13, 14, 15, 8, 9, 10, 11, 4, 5, 6, 7, 0, 1, 2, 3);
 }
-i8x16 TESTFN i8x16_splat(int32_t x) {
+v128_t TESTFN i8x16_splat(int32_t x) {
   return wasm_i8x16_splat(x);
 }
-int32_t TESTFN i8x16_extract_lane_s_first(i8x16 vec) {
+int32_t TESTFN i8x16_extract_lane_s_first(v128_t vec) {
   return wasm_i8x16_extract_lane(vec, 0);
 }
-int32_t TESTFN i8x16_extract_lane_s_last(i8x16 vec) {
+int32_t TESTFN i8x16_extract_lane_s_last(v128_t vec) {
   return wasm_i8x16_extract_lane(vec, 15);
 }
 
 #ifdef __wasm_unimplemented_simd128__
 
-uint32_t TESTFN i8x16_extract_lane_u_first(i8x16 vec) {
+uint32_t TESTFN i8x16_extract_lane_u_first(v128_t vec) {
   return wasm_u8x16_extract_lane(vec, 0);
 }
-uint32_t TESTFN i8x16_extract_lane_u_last(i8x16 vec) {
+uint32_t TESTFN i8x16_extract_lane_u_last(v128_t vec) {
   return wasm_u8x16_extract_lane(vec, 15);
 }
 
 #endif // __wasm_unimplemented_simd128__
 
-i8x16 TESTFN i8x16_replace_lane_first(i8x16 vec, int32_t val) {
+v128_t TESTFN i8x16_replace_lane_first(v128_t vec, int32_t val) {
   return wasm_i8x16_replace_lane(vec, 0, val);
 }
-i8x16 TESTFN i8x16_replace_lane_last(i8x16 vec, int32_t val) {
+v128_t TESTFN i8x16_replace_lane_last(v128_t vec, int32_t val) {
   return wasm_i8x16_replace_lane(vec, 15, val);
 }
-i16x8 TESTFN i16x8_splat(int32_t x) {
+v128_t TESTFN i16x8_splat(int32_t x) {
   return wasm_i16x8_splat(x);
 }
-int32_t TESTFN i16x8_extract_lane_s_first(i16x8 vec) {
+int32_t TESTFN i16x8_extract_lane_s_first(v128_t vec) {
   return wasm_i16x8_extract_lane(vec, 0);
 }
-int32_t TESTFN i16x8_extract_lane_s_last(i16x8 vec) {
+int32_t TESTFN i16x8_extract_lane_s_last(v128_t vec) {
   return wasm_i16x8_extract_lane(vec, 7);
 }
 
 #ifdef __wasm_unimplemented_simd128__
 
-int32_t TESTFN i16x8_extract_lane_u_first(i16x8 vec) {
+int32_t TESTFN i16x8_extract_lane_u_first(v128_t vec) {
     return wasm_u16x8_extract_lane(vec, 0);
 }
-int32_t TESTFN i16x8_extract_lane_u_last(i16x8 vec) {
+int32_t TESTFN i16x8_extract_lane_u_last(v128_t vec) {
   return wasm_u16x8_extract_lane(vec, 7);
 }
 
 #endif // __wasm_unimplemented_simd128__
 
-i16x8 TESTFN i16x8_replace_lane_first(i16x8 vec, int32_t val) {
+v128_t TESTFN i16x8_replace_lane_first(v128_t vec, int32_t val) {
   return wasm_i16x8_replace_lane(vec, 0, val);
 }
-i16x8 TESTFN i16x8_replace_lane_last(i16x8 vec, int32_t val) {
+v128_t TESTFN i16x8_replace_lane_last(v128_t vec, int32_t val) {
   return wasm_i16x8_replace_lane(vec, 7, val);
 }
-i32x4 TESTFN i32x4_splat(int32_t x) {
+v128_t TESTFN i32x4_splat(int32_t x) {
   return wasm_i32x4_splat(x);
 }
-int32_t TESTFN i32x4_extract_lane_first(i32x4 vec) {
+int32_t TESTFN i32x4_extract_lane_first(v128_t vec) {
   return wasm_i32x4_extract_lane(vec, 0);
 }
-int32_t TESTFN i32x4_extract_lane_last(i32x4 vec) {
+int32_t TESTFN i32x4_extract_lane_last(v128_t vec) {
   return wasm_i32x4_extract_lane(vec, 3);
 }
-i32x4 TESTFN i32x4_replace_lane_first(i32x4 vec, int32_t val) {
+v128_t TESTFN i32x4_replace_lane_first(v128_t vec, int32_t val) {
   return wasm_i32x4_replace_lane(vec, 0, val);
 }
-i32x4 TESTFN i32x4_replace_lane_last(i32x4 vec, int32_t val) {
+v128_t TESTFN i32x4_replace_lane_last(v128_t vec, int32_t val) {
   return wasm_i32x4_replace_lane(vec, 3, val);
 }
 
 #ifdef __wasm_unimplemented_simd128__
 
-i64x2 TESTFN i64x2_splat(int64_t x) {
+v128_t TESTFN i64x2_splat(int64_t x) {
   return wasm_i64x2_splat(x);
 }
-int64_t TESTFN i64x2_extract_lane_first(i64x2 vec) {
+int64_t TESTFN i64x2_extract_lane_first(v128_t vec) {
   return wasm_i64x2_extract_lane(vec, 0);
 }
-int64_t TESTFN i64x2_extract_lane_last(i64x2 vec) {
+int64_t TESTFN i64x2_extract_lane_last(v128_t vec) {
   return wasm_i64x2_extract_lane(vec, 1);
 }
-i64x2 TESTFN i64x2_replace_lane_first(i64x2 vec, int64_t val) {
+v128_t TESTFN i64x2_replace_lane_first(v128_t vec, int64_t val) {
   return wasm_i64x2_replace_lane(vec, 0, val);
 }
-i64x2 TESTFN i64x2_replace_lane_last(i64x2 vec, int64_t val) {
+v128_t TESTFN i64x2_replace_lane_last(v128_t vec, int64_t val) {
   return wasm_i64x2_replace_lane(vec, 1, val);
 }
 
 #endif // __wasm_unimplemented_simd128__
 
-f32x4 TESTFN f32x4_splat(float x) {
+v128_t TESTFN f32x4_splat(float x) {
   return wasm_f32x4_splat(x);
 }
-float TESTFN f32x4_extract_lane_first(f32x4 vec) {
+float TESTFN f32x4_extract_lane_first(v128_t vec) {
   return wasm_f32x4_extract_lane(vec, 0);
 }
-float TESTFN f32x4_extract_lane_last(f32x4 vec) {
+float TESTFN f32x4_extract_lane_last(v128_t vec) {
   return wasm_f32x4_extract_lane(vec, 3);
 }
-f32x4 TESTFN f32x4_replace_lane_first(f32x4 vec, float val) {
+v128_t TESTFN f32x4_replace_lane_first(v128_t vec, float val) {
   return wasm_f32x4_replace_lane(vec, 0, val);
 }
-f32x4 TESTFN f32x4_replace_lane_last(f32x4 vec, float val) {
+v128_t TESTFN f32x4_replace_lane_last(v128_t vec, float val) {
   return wasm_f32x4_replace_lane(vec, 3, val);
 }
 
 #ifdef __wasm_unimplemented_simd128__
 
-f64x2 TESTFN f64x2_splat(int64_t x) {
+v128_t TESTFN f64x2_splat(int64_t x) {
   return wasm_f64x2_splat((double ) x);
 }
-double TESTFN f64x2_extract_lane_first(f64x2 vec) {
+double TESTFN f64x2_extract_lane_first(v128_t vec) {
     return wasm_f64x2_extract_lane(vec, 0);
 }
-double TESTFN f64x2_extract_lane_last(f64x2 vec) {
+double TESTFN f64x2_extract_lane_last(v128_t vec) {
   return wasm_f64x2_extract_lane(vec, 1);
 }
-f64x2 TESTFN f64x2_replace_lane_first(f64x2 vec, double val) {
+v128_t TESTFN f64x2_replace_lane_first(v128_t vec, double val) {
   return wasm_f64x2_replace_lane(vec, 0, val);
 }
-f64x2 TESTFN f64x2_replace_lane_last(f64x2 vec, double val) {
+v128_t TESTFN f64x2_replace_lane_last(v128_t vec, double val) {
   return wasm_f64x2_replace_lane(vec, 1, val);
 }
 
 #endif // __wasm_unimplemented_simd128__
 
-u8x16 TESTFN i8x16_eq(i8x16 x, i8x16 y) {
+v128_t TESTFN i8x16_eq(v128_t x, v128_t y) {
   return wasm_i8x16_eq(x, y);
 }
-u8x16 TESTFN i8x16_ne(i8x16 x, i8x16 y) {
+v128_t TESTFN i8x16_ne(v128_t x, v128_t y) {
   return wasm_i8x16_ne(x, y);
 }
-u8x16 TESTFN i8x16_lt_s(i8x16 x, i8x16 y) {
+v128_t TESTFN i8x16_lt_s(v128_t x, v128_t y) {
   return wasm_i8x16_lt(x, y);
 }
-u8x16 TESTFN i8x16_lt_u(u8x16 x, u8x16 y) {
+v128_t TESTFN i8x16_lt_u(v128_t x, v128_t y) {
   return wasm_u8x16_lt(x, y);
 }
-u8x16 TESTFN i8x16_gt_s(i8x16 x, i8x16 y) {
+v128_t TESTFN i8x16_gt_s(v128_t x, v128_t y) {
   return wasm_i8x16_gt(x, y);
 }
-u8x16 TESTFN i8x16_gt_u(u8x16 x, u8x16 y) {
+v128_t TESTFN i8x16_gt_u(v128_t x, v128_t y) {
   return wasm_u8x16_gt(x,y);
 }
-u8x16 TESTFN i8x16_le_s(i8x16 x, i8x16 y) {
+v128_t TESTFN i8x16_le_s(v128_t x, v128_t y) {
   return wasm_i8x16_le(x,y);
 }
-u8x16 TESTFN u8x16_le_u(u8x16 x, u8x16 y) {
+v128_t TESTFN u8x16_le_u(v128_t x, v128_t y) {
    return wasm_u8x16_le(x, y);
 }
-u8x16 TESTFN i8x16_ge_s(i8x16 x, i8x16 y) {
+v128_t TESTFN i8x16_ge_s(v128_t x, v128_t y) {
   return wasm_i8x16_ge(x, y);
 }
-u8x16  TESTFN i8x16_ge_u(u8x16 x, u8x16 y) {
+v128_t  TESTFN i8x16_ge_u(v128_t x, v128_t y) {
   return wasm_u8x16_ge(x, y);
 }
-u16x8 TESTFN i16x8_eq(i16x8 x, i16x8 y) {
+v128_t TESTFN i16x8_eq(v128_t x, v128_t y) {
   return wasm_i16x8_eq(x,y);
 }
-u16x8 TESTFN i16x8_ne(i16x8 x, i16x8 y) {
+v128_t TESTFN i16x8_ne(v128_t x, v128_t y) {
   return wasm_i16x8_ne(x,y);
 }
-u16x8 TESTFN i16x8_lt_s(i16x8 x, i16x8 y) {
+v128_t TESTFN i16x8_lt_s(v128_t x, v128_t y) {
   return wasm_i16x8_lt(x,y);
 }
-u16x8 TESTFN i16x8_lt_u(u16x8 x, u16x8 y) {
+v128_t TESTFN i16x8_lt_u(v128_t x, v128_t y) {
     return wasm_u16x8_lt(x,y);
 }
-u16x8 TESTFN i16x8_gt_s(i16x8 x, i16x8 y) {
+v128_t TESTFN i16x8_gt_s(v128_t x, v128_t y) {
   return wasm_i16x8_gt(x,y);
 }
-u16x8 TESTFN i16x8_gt_u(u16x8 x, u16x8 y) {
+v128_t TESTFN i16x8_gt_u(v128_t x, v128_t y) {
   return wasm_u16x8_gt(x,y);
 }
-u16x8 TESTFN i16x8_le_s(i16x8 x, i16x8 y) {
+v128_t TESTFN i16x8_le_s(v128_t x, v128_t y) {
   return wasm_i16x8_le(x, y);
 }
-u16x8 TESTFN i16x8_le_u(u16x8 x, u16x8 y) {
+v128_t TESTFN i16x8_le_u(v128_t x, v128_t y) {
   return wasm_u16x8_le(x, y);
 }
-u16x8 TESTFN i16x8_ge_s(i16x8 x, i16x8 y) {
+v128_t TESTFN i16x8_ge_s(v128_t x, v128_t y) {
   return wasm_i16x8_ge(x, y);
 }
-u16x8  TESTFN i16x8_ge_u(u16x8 x, u16x8 y) {
+v128_t  TESTFN i16x8_ge_u(v128_t x, v128_t y) {
   return wasm_u16x8_ge(x, y);
 }
-u32x4 TESTFN i32x4_eq(i32x4 x, i32x4 y) {
+v128_t TESTFN i32x4_eq(v128_t x, v128_t y) {
   return wasm_i32x4_eq(x, y);
 }
-u32x4 TESTFN i32x4_ne(i32x4 x, i32x4 y) {
+v128_t TESTFN i32x4_ne(v128_t x, v128_t y) {
   return wasm_i32x4_ne(x, y);
 }
-u32x4 TESTFN i32x4_lt_s(i32x4 x, i32x4 y) {
+v128_t TESTFN i32x4_lt_s(v128_t x, v128_t y) {
   return wasm_i32x4_lt(x, y);
 }
-u32x4 TESTFN i32x4_lt_u(u32x4 x, u32x4 y) {
+v128_t TESTFN i32x4_lt_u(v128_t x, v128_t y) {
   return wasm_u32x4_lt(x, y);
 }
-u32x4 TESTFN i32x4_gt_s(i32x4 x, i32x4 y) {
+v128_t TESTFN i32x4_gt_s(v128_t x, v128_t y) {
   return wasm_i32x4_gt(x, y);
 }
-u32x4 TESTFN i32x4_gt_u(u32x4 x, u32x4 y) {
+v128_t TESTFN i32x4_gt_u(v128_t x, v128_t y) {
   return wasm_u32x4_gt(x, y);
 }
-u32x4 TESTFN i32x4_le_s(i32x4 x, i32x4 y) {
+v128_t TESTFN i32x4_le_s(v128_t x, v128_t y) {
   return wasm_i32x4_le(x, y);
 }
-u32x4 TESTFN i32x4_le_u(u32x4 x, u32x4 y) {
+v128_t TESTFN i32x4_le_u(v128_t x, v128_t y) {
   return wasm_u32x4_le(x, y);
 }
-u32x4 TESTFN i32x4_ge_s(i32x4 x, i32x4 y) {
+v128_t TESTFN i32x4_ge_s(v128_t x, v128_t y) {
   return wasm_i32x4_ge(x, y);
 }
-u32x4  TESTFN i32x4_ge_u(u32x4 x, u32x4 y) {
+v128_t  TESTFN i32x4_ge_u(v128_t x, v128_t y) {
   return wasm_u32x4_ge(x, y);
 }
-u32x4 TESTFN f32x4_eq(f32x4 x, f32x4 y) {
+v128_t TESTFN f32x4_eq(v128_t x, v128_t y) {
   return wasm_f32x4_eq(x,y);
 }
-u32x4 TESTFN f32x4_ne(f32x4 x, f32x4 y) {
+v128_t TESTFN f32x4_ne(v128_t x, v128_t y) {
   return wasm_f32x4_ne(x, y);
 }
-u32x4 TESTFN f32x4_lt(f32x4 x, f32x4 y) {
+v128_t TESTFN f32x4_lt(v128_t x, v128_t y) {
   return wasm_f32x4_lt(x, y);
 }
-u32x4 TESTFN f32x4_gt(f32x4 x, f32x4 y) {
+v128_t TESTFN f32x4_gt(v128_t x, v128_t y) {
   return wasm_f32x4_gt(x,y);
 }
-u32x4 TESTFN f32x4_le(f32x4 x, f32x4 y) {
+v128_t TESTFN f32x4_le(v128_t x, v128_t y) {
   return wasm_f32x4_le(x, y);
 }
-u32x4 TESTFN f32x4_ge(f32x4 x, f32x4 y) {
+v128_t TESTFN f32x4_ge(v128_t x, v128_t y) {
   return wasm_f32x4_ge(x, y);
 }
 
 #ifdef __wasm_undefined_simd128__
 
-u64x2 TESTFN f64x2_eq(f64x2 x, f64x2 y) {
+v128_t TESTFN f64x2_eq(v128_t x, v128_t y) {
   return wasm_f64x2_eq(x,y);
 }
-u64x2 TESTFN f64x2_ne(f64x2 x, f64x2 y) {
+v128_t TESTFN f64x2_ne(v128_t x, v128_t y) {
   return wasm_f64x2_ne(x,y);
 }
-u64x2 TESTFN f64x2_lt(f64x2 x, f64x2 y) {
+v128_t TESTFN f64x2_lt(v128_t x, v128_t y) {
   return wasm_f64x2_lt(x,y);
 }
-u64x2 TESTFN f64x2_gt(f64x2 x, f64x2 y) {
+v128_t TESTFN f64x2_gt(v128_t x, v128_t y) {
   return wasm_f64x2_gt(x, y);
 }
-u64x2 TESTFN f64x2_le(f64x2 x, f64x2 y) {
+v128_t TESTFN f64x2_le(v128_t x, v128_t y) {
   return wasm_f64x2_le(x, y);
 }
-u64x2 TESTFN f64x2_ge(f64x2 x, f64x2 y) {
+v128_t TESTFN f64x2_ge(v128_t x, v128_t y) {
   return wasm_f64x2_ge(x, y);
 }
 
 #endif // __wasm_undefined_simd128__
 
-v128 TESTFN v128_not(v128 vec) {
+v128_t TESTFN v128_not(v128_t vec) {
   return wasm_v128_not(vec);
 }
-v128 TESTFN v128_and(v128 x, v128 y) {
+v128_t TESTFN v128_and(v128_t x, v128_t y) {
   return wasm_v128_and(x, y);
 }
-v128 TESTFN v128_or(v128 x, v128 y) {
+v128_t TESTFN v128_or(v128_t x, v128_t y) {
   return wasm_v128_or(x,y);
 }
-v128 TESTFN v128_xor(v128 x, v128 y) {
+v128_t TESTFN v128_xor(v128_t x, v128_t y) {
   return wasm_v128_xor(x,y);
 }
-v128 TESTFN v128_bitselect(v128 x, v128 y, v128 cond) {
+v128_t TESTFN v128_bitselect(v128_t x, v128_t y, v128_t cond) {
   return wasm_v128_bitselect(x, y, cond);
 }
-i8x16 TESTFN i8x16_neg(i8x16 vec) {
+v128_t TESTFN i8x16_neg(v128_t vec) {
   return wasm_i8x16_neg(vec);
 }
-int32_t TESTFN i8x16_any_true(i8x16 vec) {
+int32_t TESTFN i8x16_any_true(v128_t vec) {
   return wasm_i8x16_any_true(vec);
 }
-int32_t TESTFN i8x16_all_true(i8x16 vec) {
+int32_t TESTFN i8x16_all_true(v128_t vec) {
   return wasm_i8x16_all_true(vec);
 }
-i8x16 TESTFN i8x16_shl(i8x16 vec, int32_t shift) {
+v128_t TESTFN i8x16_shl(v128_t vec, int32_t shift) {
   return wasm_i8x16_shl(vec, shift);
 }
-i8x16 TESTFN i8x16_shr_s(i8x16 vec, int32_t shift) {
+v128_t TESTFN i8x16_shr_s(v128_t vec, int32_t shift) {
   return wasm_i8x16_shr(vec, shift);
 }
-u8x16 TESTFN i8x16_shr_u(u8x16 vec, int32_t shift) {
+v128_t TESTFN i8x16_shr_u(v128_t vec, int32_t shift) {
   return wasm_u8x16_shr(vec, shift);
 }
-i8x16 TESTFN i8x16_add(i8x16 x, i8x16 y) {
+v128_t TESTFN i8x16_add(v128_t x, v128_t y) {
   return wasm_i8x16_add(x,y);
 }
-i8x16 TESTFN i8x16_add_saturate_s(i8x16 x, i8x16 y) {
+v128_t TESTFN i8x16_add_saturate_s(v128_t x, v128_t y) {
   return wasm_i8x16_add_saturate(x, y);
 }
-u8x16 TESTFN i8x16_add_saturate_u(u8x16 x, u8x16 y) {
+v128_t TESTFN i8x16_add_saturate_u(v128_t x, v128_t y) {
   return wasm_u8x16_add_saturate(x, y);
 }
-i8x16 TESTFN i8x16_sub(i8x16 x, i8x16 y) {
+v128_t TESTFN i8x16_sub(v128_t x, v128_t y) {
   return wasm_i8x16_sub(x,y);
 }
-i8x16 TESTFN i8x16_sub_saturate_s(i8x16 x, i8x16 y) {
+v128_t TESTFN i8x16_sub_saturate_s(v128_t x, v128_t y) {
   return wasm_i8x16_sub_saturate(x, y);
 }
-u8x16 TESTFN i8x16_sub_saturate_u(u8x16 x, u8x16 y) {
+v128_t TESTFN i8x16_sub_saturate_u(v128_t x, v128_t y) {
   return wasm_u8x16_sub_saturate(x, y);
 }
-i8x16 TESTFN i8x16_mul(i8x16 x, i8x16 y) {
+v128_t TESTFN i8x16_mul(v128_t x, v128_t y) {
   return wasm_i8x16_mul(x, y);
 }
-i16x8 TESTFN i16x8_neg(i16x8 vec) {
+v128_t TESTFN i16x8_neg(v128_t vec) {
   return wasm_i16x8_neg(vec);
 }
-bool TESTFN i16x8_any_true(i16x8 vec) {
+bool TESTFN i16x8_any_true(v128_t vec) {
   return wasm_i16x8_any_true(vec);
 }
-int32_t TESTFN i16x8_all_true(i16x8 vec) {
+int32_t TESTFN i16x8_all_true(v128_t vec) {
   return wasm_i16x8_all_true(vec);
 }
-i16x8 TESTFN i16x8_shl(i16x8 vec, int32_t shift) {
+v128_t TESTFN i16x8_shl(v128_t vec, int32_t shift) {
   return wasm_i16x8_shl(vec, shift);
 }
-i16x8 TESTFN i16x8_shr_s(i16x8 vec, int32_t shift) {
+v128_t TESTFN i16x8_shr_s(v128_t vec, int32_t shift) {
   return wasm_i16x8_shr(vec, shift);
 }
-u16x8 TESTFN i16x8_shr_u(u16x8 vec, int32_t shift) {
+v128_t TESTFN i16x8_shr_u(v128_t vec, int32_t shift) {
   return wasm_u16x8_shr(vec, shift);
 }
-i16x8 TESTFN i16x8_add(i16x8 x, i16x8 y) {
+v128_t TESTFN i16x8_add(v128_t x, v128_t y) {
   return wasm_i16x8_add(x, y);
 }
-i16x8 TESTFN i16x8_add_saturate_s(i16x8 x, i16x8 y) {
+v128_t TESTFN i16x8_add_saturate_s(v128_t x, v128_t y) {
   return wasm_i16x8_add_saturate(x, y);
 }
-u16x8 TESTFN i16x8_add_saturate_u(u16x8 x, u16x8 y) {
+v128_t TESTFN i16x8_add_saturate_u(v128_t x, v128_t y) {
   return wasm_u16x8_add_saturate(x, y);
 }
-i16x8 TESTFN i16x8_sub(i16x8 x, i16x8 y) {
+v128_t TESTFN i16x8_sub(v128_t x, v128_t y) {
   return wasm_i16x8_sub(x, y);
 }
-i16x8 TESTFN i16x8_sub_saturate_s(i16x8 x, i16x8 y) {
+v128_t TESTFN i16x8_sub_saturate_s(v128_t x, v128_t y) {
   return wasm_i16x8_sub_saturate(x,y);
 }
-u16x8 TESTFN i16x8_sub_saturate_u(u16x8 x, u16x8 y) {
+v128_t TESTFN i16x8_sub_saturate_u(v128_t x, v128_t y) {
   return wasm_u16x8_sub_saturate(x, y);
 }
-i16x8 TESTFN i16x8_mul(i16x8 x, i16x8 y) {
+v128_t TESTFN i16x8_mul(v128_t x, v128_t y) {
   return wasm_i16x8_mul(x, y);
 }
-i32x4 TESTFN i32x4_neg(i32x4 vec) {
+v128_t TESTFN i32x4_neg(v128_t vec) {
   return wasm_i32x4_neg(vec);
 }
-int32_t TESTFN i32x4_any_true(i32x4 vec) {
+int32_t TESTFN i32x4_any_true(v128_t vec) {
   return wasm_i32x4_any_true(vec);
 }
-int32_t TESTFN i32x4_all_true(i32x4 vec) {
+int32_t TESTFN i32x4_all_true(v128_t vec) {
   return wasm_i32x4_all_true(vec);
 }
-i32x4 TESTFN i32x4_shl(i32x4 vec, int32_t shift) {
+v128_t TESTFN i32x4_shl(v128_t vec, int32_t shift) {
   return wasm_i32x4_shl(vec, shift);
 }
-i32x4 TESTFN i32x4_shr_s(i32x4 vec, int32_t shift) {
+v128_t TESTFN i32x4_shr_s(v128_t vec, int32_t shift) {
   return wasm_i32x4_shr(vec, shift);
 }
-u32x4 TESTFN i32x4_shr_u(u32x4 vec, int32_t shift) {
+v128_t TESTFN i32x4_shr_u(v128_t vec, int32_t shift) {
   return wasm_u32x4_shr(vec, shift);
 }
-i32x4 TESTFN i32x4_add(i32x4 x, i32x4 y) {
+v128_t TESTFN i32x4_add(v128_t x, v128_t y) {
   return wasm_i32x4_add(x, y);
 }
-i32x4 TESTFN i32x4_sub(i32x4 x, i32x4 y) {
+v128_t TESTFN i32x4_sub(v128_t x, v128_t y) {
   return wasm_i32x4_sub(x, y);
 }
-i32x4 TESTFN i32x4_mul(i32x4 x, i32x4 y) {
+v128_t TESTFN i32x4_mul(v128_t x, v128_t y) {
   return wasm_i32x4_mul(x, y);
 }
 
 #ifdef __wasm_unimplemented_simd128__
 
 
-i64x2 TESTFN i64x2_neg(i64x2 vec) {
+v128_t TESTFN i64x2_neg(v128_t vec) {
   return wasm_i64x2_neg(vec);
 }
 
 #ifdef __wasm_unimplemented_simd128__
 
-bool TESTFN i64x2_any_true(i64x2 vec) {
+bool TESTFN i64x2_any_true(v128_t vec) {
   return wasm_i64x2_any_true(vec);
 }
-bool TESTFN i64x2_all_true(i64x2 vec) {
+bool TESTFN i64x2_all_true(v128_t vec) {
   return wasm_i64x2_all_true(vec);
 }
 
 #endif // __wasm_unimplemented_simd128__
 
-i64x2 TESTFN i64x2_shl(i64x2 vec, int32_t shift) {
+v128_t TESTFN i64x2_shl(v128_t vec, int32_t shift) {
   return wasm_i64x2_shl(vec, shift);
 }
 
-i64x2 TESTFN i64x2_shr_s(i64x2 vec, int32_t shift) {
+v128_t TESTFN i64x2_shr_s(v128_t vec, int32_t shift) {
   return wasm_i64x2_shr(vec, shift);
 }
-u64x2 TESTFN i64x2_shr_u(u64x2 vec, int32_t shift) {
+v128_t TESTFN i64x2_shr_u(v128_t vec, int32_t shift) {
   return wasm_u64x2_shr(vec, shift);
 }
-i64x2 TESTFN i64x2_add(i64x2 x, i64x2 y) {
+v128_t TESTFN i64x2_add(v128_t x, v128_t y) {
   return wasm_i64x2_add(x, y);
 }
-i64x2 TESTFN i64x2_sub(i64x2 x, i64x2 y) {
+v128_t TESTFN i64x2_sub(v128_t x, v128_t y) {
   return wasm_i64x2_sub(x, y);
 }
 
 #endif // __wasm_unimplemented_simd128__
 
-f32x4 TESTFN f32x4_abs(f32x4 vec) {
+v128_t TESTFN f32x4_abs(v128_t vec) {
   return wasm_f32x4_abs(vec);
 }
-f32x4 TESTFN f32x4_neg(f32x4 vec) {
+v128_t TESTFN f32x4_neg(v128_t vec) {
   return wasm_f32x4_neg(vec);
 }
 
 #ifdef __wasm_unimplemented_simd128__
 
-f32x4 TESTFN f32x4_sqrt(f32x4 vec) {
+v128_t TESTFN f32x4_sqrt(v128_t vec) {
   return wasm_f32x4_sqrt(vec);
 }
 
 #endif // __wasm_unimplemented_simd128__
 
-f32x4 TESTFN f32x4_add(f32x4 x, f32x4 y) {
+v128_t TESTFN f32x4_add(v128_t x, v128_t y) {
   return wasm_f32x4_add(x,y);
 }
-f32x4 TESTFN f32x4_sub(f32x4 x, f32x4 y) {
+v128_t TESTFN f32x4_sub(v128_t x, v128_t y) {
   return wasm_f32x4_sub(x, y);
 }
-f32x4 TESTFN f32x4_mul(f32x4 x, f32x4 y) {
+v128_t TESTFN f32x4_mul(v128_t x, v128_t y) {
   return wasm_f32x4_mul(x, y);
 }
-f32x4 TESTFN f32x4_div(f32x4 x, f32x4 y) {
+v128_t TESTFN f32x4_div(v128_t x, v128_t y) {
   return wasm_f32x4_div(x, y);
 }
-f32x4 TESTFN f32x4_min(f32x4 x, f32x4 y) {
+v128_t TESTFN f32x4_min(v128_t x, v128_t y) {
   return wasm_f32x4_min(x, y);
 }
-f32x4 TESTFN f32x4_max(f32x4 x, f32x4 y) {
+v128_t TESTFN f32x4_max(v128_t x, v128_t y) {
   return wasm_f32x4_max(x, y);
 }
 
 #ifdef __wasm_unimplemented_simd128__
 
-f64x2 TESTFN f64x2_abs(f64x2 vec) {
+v128_t TESTFN f64x2_abs(v128_t vec) {
   return wasm_f64x2_abs(vec);
 }
-f64x2 TESTFN f64x2_neg(f64x2 vec) {
+v128_t TESTFN f64x2_neg(v128_t vec) {
   return -vec;
 }
-f64x2 TESTFN f64x2_sqrt(f64x2 vec) {
+v128_t TESTFN f64x2_sqrt(v128_t vec) {
   return wasm_f64x2_sqrt(vec);
 }
-f64x2 TESTFN f64x2_add(f64x2 x, f64x2 y) {
+v128_t TESTFN f64x2_add(v128_t x, v128_t y) {
   return x + y;
 }
-f64x2 TESTFN f64x2_sub(f64x2 x, f64x2 y) {
+v128_t TESTFN f64x2_sub(v128_t x, v128_t y) {
   return x - y;
 }
-f64x2 TESTFN f64x2_mul(f64x2 x, f64x2 y) {
+v128_t TESTFN f64x2_mul(v128_t x, v128_t y) {
   return x * y;
 }
-f64x2 TESTFN f64x2_div(f64x2 x, f64x2 y) {
+v128_t TESTFN f64x2_div(v128_t x, v128_t y) {
   return x / y;
 }
-f64x2 TESTFN f64x2_min(f64x2 x, f64x2 y) {
+v128_t TESTFN f64x2_min(v128_t x, v128_t y) {
   return wasm_f64x2_min(x, y);
 }
-f64x2 TESTFN f64x2_max(f64x2 x, f64x2 y) {
+v128_t TESTFN f64x2_max(v128_t x, v128_t y) {
   return wasm_f64x2_max(x, y);
 }
 
 #endif // __wasm_unimplemented_simd128__
 
-i32x4 TESTFN i32x4_trunc_s_f32x4_sat(f32x4 vec) {
+v128_t TESTFN i32x4_trunc_s_f32x4_sat(v128_t vec) {
   return wasm_trunc_saturate_i32x4_f32x4(vec);
 }
-u32x4 TESTFN i32x4_trunc_u_f32x4_sat(f32x4 vec) {
+v128_t TESTFN i32x4_trunc_u_f32x4_sat(v128_t vec) {
   return wasm_trunc_saturate_u32x4_f32x4(vec);
 }
 
 #ifdef __wasm_unimplemented_simd128__
 
-i64x2 TESTFN i64x2_trunc_s_f64x2_sat(f64x2 vec) {
+v128_t TESTFN i64x2_trunc_s_f64x2_sat(v128_t vec) {
   return wasm_trunc_saturate_i64x2_f64x2(vec);
 }
-u64x2 TESTFN i64x2_trunc_u_f64x2_sat(f64x2 vec) {
+v128_t TESTFN i64x2_trunc_u_f64x2_sat(v128_t vec) {
   return wasm_trunc_saturate_u64x2_f64x2(vec);
 }
 
 #endif // __wasm_unimplemented_simd128__
 
-f32x4 TESTFN f32x4_convert_s_i32x4(i32x4 vec) {
+v128_t TESTFN f32x4_convert_s_i32x4(v128_t vec) {
   return wasm_convert_f32x4_i32x4(vec);
 }
-f32x4 TESTFN f32x4_convert_u_i32x4(u32x4 vec) {
+v128_t TESTFN f32x4_convert_u_i32x4(v128_t vec) {
   return wasm_convert_f32x4_u32x4(vec);
 }
 
 #ifdef __wasm_unimplemented_simd128__
 
-f64x2 TESTFN f64x2_convert_s_i64x2(i64x2 vec) {
+v128_t TESTFN f64x2_convert_s_i64x2(v128_t vec) {
   return wasm_convert_f64x2_i64x2(vec);
 }
-f64x2 TESTFN f64x2_convert_u_i64x2(u64x2 vec) {
+v128_t TESTFN f64x2_convert_u_i64x2(v128_t vec) {
   return wasm_convert_f64x2_u64x2(vec);
 }
 
@@ -541,18 +541,18 @@ f64x2 TESTFN f64x2_convert_u_i64x2(u64x2 vec) {
 
 static int failures = 0;
 
-#define formatter(x) _Generic((x),                        \
-                              int8_t: "%d",               \
-                              uint8_t: "%d",              \
-                              int16_t: "%d",              \
-                              uint16_t: "%d",             \
-                              int32_t: "%d",              \
-                              uint32_t: "%d",             \
-                              int64_t: "%ld",             \
-                              uint64_t: "%ld",            \
-                              bool: "%d",                 \
-                              float: "%f",                \
-                              double: "%f"                \
+#define formatter(x) _Generic((x),                              \
+                              char: "%d",                       \
+                              unsigned char: "%d",              \
+                              short: "%d",                      \
+                              unsigned short: "%d",             \
+                              int: "%d",                        \
+                              unsigned int: "%d",               \
+                              long long: "%lld",                \
+                              unsigned long long: "%lld",       \
+                              bool: "%d",                       \
+                              float: "%f",                      \
+                              double: "%f"                      \
   )
 
 #define err(x) fprintf(stderr, formatter(x), x)
@@ -581,29 +581,10 @@ static int failures = 0;
       }                                                         \
     })
 
-#ifdef __wasm_unimplemented_simd128__
-#define UNIMPLEMENTED_TYPES                                     \
-  ,                                                             \
-  i64x2: 2,                                                     \
-  u64x2: 2,                                                     \
-  f64x2: 2
-#else
-#define UNIMPLEMENTED_TYPES
-#endif // __wasm_unimplemented_simd128__
-
 #define expect_vec(_a, _b) __extension__({                      \
-      __typeof__(_a) a = (_a), b = (_b);                        \
+      __typeof__(_b) a = (__typeof__(_b))(_a), b = (_b);        \
       bool err = false;                                         \
-      size_t lanes = _Generic((a),                              \
-                              u8x16: 16,                        \
-                              i8x16: 16,                        \
-                              i16x8: 8,                         \
-                              u16x8: 8,                         \
-                              i32x4: 4,                         \
-                              u32x4: 4,                         \
-                              f32x4: 4                          \
-                              UNIMPLEMENTED_TYPES               \
-                     );                                         \
+      size_t lanes = sizeof(a) / sizeof(a[0]);                  \
       for (size_t i = 0; i < lanes; i++) {                      \
         if (!eq(a[i], b[i])) {                                  \
           err = true;                                           \
@@ -628,670 +609,862 @@ static int failures = 0;
       }                                                         \
     })
 
+#define i8x16(c1, c2, c3, c4, c5, c6, c7, c8, c9, c10, c11, c12, c13, c14, c15, c16) \
+  (__extension__(char __attribute__((__vector_size__(16))))             \
+  {c1, c2, c3, c4, c5, c6, c7, c8, c9, c10, c11, c12, c13, c14, c15, c16})
+
+#define u8x16(c1, c2, c3, c4, c5, c6, c7, c8, c9, c10, c11, c12, c13, c14, c15, c16) \
+  (__extension__(unsigned char __attribute__((__vector_size__(16))))    \
+  {c1, c2, c3, c4, c5, c6, c7, c8, c9, c10, c11, c12, c13, c14, c15, c16})
+
+#define i16x8(c1, c2, c3, c4, c5, c6, c7, c8)                   \
+  (__extension__(short __attribute__((__vector_size__(16))))    \
+  {c1, c2, c3, c4, c5, c6, c7, c8})
+
+#define u16x8(c1, c2, c3, c4, c5, c6, c7, c8)                           \
+  (__extension__(unsigned short __attribute__((__vector_size__(16))))   \
+  {c1, c2, c3, c4, c5, c6, c7, c8})
+
+#define i32x4(c1, c2, c3, c4)                                           \
+  (__extension__(int __attribute__((__vector_size__(16)))){c1, c2, c3, c4})
+
+#define u32x4(c1, c2, c3, c4)                                           \
+  (__extension__(unsigned int __attribute__((__vector_size__(16)))){c1, c2, c3, c4})
+
+#define i64x2(c1, c2)                                                   \
+  (__extension__(long long __attribute__((__vector_size__(16)))) {c1, c2})
+
+#define u64x2(c1, c2)                                                   \
+  (__extension__(unsigned long long __attribute__((__vector_size__(16)))){c1, c2})
+
+#define f32x4(c1, c2, c3, c4)                                           \
+  (__extension__(float __attribute__((__vector_size__(16)))){c1, c2, c3, c4})
+
+#define f64x2(c1, c2)                                                   \
+  (__extension__(double __attribute__((__vector_size__(16)))) {c1, c2})
+
+
 int EMSCRIPTEN_KEEPALIVE main(int argc, char** argv) {
   {
-    i8x16 vec = {3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3};
+    v128_t vec = (v128_t)u8x16(3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3);
     expect_vec(i8x16_load(&vec),
-              (__extension__(i8x16){3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3}));
-    i8x16_store(&vec, __extension__(i8x16){7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7});
+               i8x16(3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3));
+    i8x16_store(&vec, (v128_t)i8x16(7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7));
     expect_vec(i8x16_load(&vec),
-              (__extension__(i8x16){7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7}));
+               i8x16(7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7));
   }
-  expect_vec(i32x4_const(), ((v128){1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16}));
+  expect_vec(i32x4_const(), u8x16(1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16));
   expect_vec(
     i8x16_shuffle_interleave_bytes(
-      (i8x16){1, 0, 3, 0, 5, 0, 7, 0, 9, 0, 11, 0, 13, 0, 15, 0},
-      (i8x16){0, 2, 0, 4, 0, 6, 0, 8, 0, 10, 0, 12, 0, 14, 0, 16}
+      (v128_t)i8x16(1, 0, 3, 0, 5, 0, 7, 0, 9, 0, 11, 0, 13, 0, 15, 0),
+      (v128_t)i8x16(0, 2, 0, 4, 0, 6, 0, 8, 0, 10, 0, 12, 0, 14, 0, 16)
     ),
-    ((v128)(i8x16){1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16})
+    i8x16(1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16)
   );
-  expect_vec(i32x4_shuffle_reverse((i32x4){1, 2, 3, 4}), ((v128)(i32x4){4, 3, 2, 1}));
+  expect_vec(i32x4_shuffle_reverse((v128_t)i32x4(1, 2, 3, 4)), i32x4(4, 3, 2, 1));
 
   // i8x16 lane accesses
-  expect_vec(i8x16_splat(5), ((i8x16){5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5}));
-  expect_vec(i8x16_splat(257), ((i8x16){1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1}));
-  expect_eq(i8x16_extract_lane_s_first((i8x16){-1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0}), -1);
-  expect_eq(i8x16_extract_lane_s_last((i8x16){0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, -1}), -1);
+  expect_vec(i8x16_splat(5), i8x16(5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5));
+  expect_vec(i8x16_splat(257), i8x16(1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1));
+  expect_eq(
+    i8x16_extract_lane_s_first(
+      (v128_t)i8x16(-1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0)
+    ),
+    -1
+  );
+  expect_eq(
+    i8x16_extract_lane_s_last(
+      (v128_t)i8x16(0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, -1)
+    ),
+    -1
+  );
 
 #ifdef __wasm_unimplemented_simd128__
 
-  expect_eq(i8x16_extract_lane_u_first((i8x16){-1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0}), 255);
-  expect_eq(i8x16_extract_lane_u_last((i8x16){0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, -1}), 255);
+  expect_eq(
+    i8x16_extract_lane_u_first(
+      (v128_t)i8x16(-1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0)
+    ),
+    255
+  );
+  expect_eq(
+    i8x16_extract_lane_u_last(
+      (v128_t)i8x16(0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, -1)
+    ),
+    255
+  );
 
 #endif // __wasm_unimplemented_simd128__
 
   expect_vec(
-    i8x16_replace_lane_first((i8x16){0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0}, 7),
-    ((i8x16){7, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0})
+    i8x16_replace_lane_first(
+      (v128_t)i8x16(0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0),
+      7
+    ),
+    i8x16(7, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0)
   );
   expect_vec(
-    i8x16_replace_lane_last((i8x16){0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0}, 7),
-    ((i8x16){0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 7})
+    i8x16_replace_lane_last(
+      (v128_t)i8x16(0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0),
+      7
+    ),
+    i8x16(0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 7)
   );
 
   // i16x8 lane accesses
-  expect_vec(i16x8_splat(5), ((i16x8){5, 5, 5, 5, 5, 5, 5, 5}));
-  expect_vec(i16x8_splat(65537), ((i16x8){1, 1, 1, 1, 1, 1, 1, 1}));
-  expect_eq(i16x8_extract_lane_s_first((i16x8){-1, 0, 0, 0, 0, 0, 0, 0}), -1);
-  expect_eq(i16x8_extract_lane_s_last((i16x8){0, 0, 0, 0, 0, 0, 0, -1}), -1);
+  expect_vec(i16x8_splat(5), i16x8(5, 5, 5, 5, 5, 5, 5, 5));
+  expect_vec(i16x8_splat(65537), i16x8(1, 1, 1, 1, 1, 1, 1, 1));
+  expect_eq(i16x8_extract_lane_s_first((v128_t)i16x8(-1, 0, 0, 0, 0, 0, 0, 0)), -1);
+  expect_eq(i16x8_extract_lane_s_last((v128_t)i16x8(0, 0, 0, 0, 0, 0, 0, -1)), -1);
 
 #ifdef __wasm_unimplemented_simd128__
 
-  expect_eq(i16x8_extract_lane_u_first((i16x8){-1, 0, 0, 0, 0, 0, 0, 0}), 65535);
-  expect_eq(i16x8_extract_lane_u_last((i16x8){0, 0, 0, 0, 0, 0, 0, -1}), 65535);
+  expect_eq(i16x8_extract_lane_u_first((v128_t)i16x8(-1, 0, 0, 0, 0, 0, 0, 0)), 65535);
+  expect_eq(i16x8_extract_lane_u_last((v128_t)i16x8(0, 0, 0, 0, 0, 0, 0, -1)), 65535);
 
 #endif // __wasm_unimplemented_simd128__
 
-  expect_vec(i16x8_replace_lane_first((i16x8){0, 0, 0, 0, 0, 0, 0, 0}, 7), ((i16x8){7, 0, 0, 0, 0, 0, 0, 0}));
-  expect_vec(i16x8_replace_lane_last((i16x8){0, 0, 0, 0, 0, 0, 0, 0}, 7), ((i16x8){0, 0, 0, 0, 0, 0, 0, 7}));
+  expect_vec(
+    i16x8_replace_lane_first((v128_t)i16x8(0, 0, 0, 0, 0, 0, 0, 0), 7),
+    i16x8(7, 0, 0, 0, 0, 0, 0, 0)
+  );
+  expect_vec(
+    i16x8_replace_lane_last((v128_t)i16x8(0, 0, 0, 0, 0, 0, 0, 0), 7),
+    i16x8(0, 0, 0, 0, 0, 0, 0, 7)
+  );
 
   // i32x4 lane accesses
-  expect_vec(i32x4_splat(-5), ((i32x4){-5, -5, -5, -5}));
-  expect_eq(i32x4_extract_lane_first((i32x4){-5, 0, 0, 0}), -5);
-  expect_eq(i32x4_extract_lane_last((i32x4){0, 0, 0, -5}), -5);
-  expect_vec(i32x4_replace_lane_first((i32x4){0, 0, 0, 0}, 53), ((i32x4){53, 0, 0, 0}));
-  expect_vec(i32x4_replace_lane_last((i32x4){0, 0, 0, 0}, 53), ((i32x4){0, 0, 0, 53}));
-
+  expect_vec(i32x4_splat(-5), i32x4(-5, -5, -5, -5));
+  expect_eq(i32x4_extract_lane_first((v128_t)i32x4(-5, 0, 0, 0)), -5);
+  expect_eq(i32x4_extract_lane_last((v128_t)i32x4(0, 0, 0, -5)), -5);
+  expect_vec(
+    i32x4_replace_lane_first((v128_t)i32x4(0, 0, 0, 0), 53),
+    i32x4(53, 0, 0, 0)
+  );
+  expect_vec(
+    i32x4_replace_lane_last((v128_t)i32x4(0, 0, 0, 0), 53),
+    i32x4(0, 0, 0, 53)
+  );
 
 #ifdef __wasm_unimplemented_simd128__
 
   // i64x2 lane accesses
-  expect_vec(i64x2_splat(-5), ((i64x2){-5, -5}));
-  expect_eq(i64x2_extract_lane_first((i64x2){-5, 0}), -5);
-  expect_eq(i64x2_extract_lane_last((i64x2){0, -5}), -5);
-  expect_vec(i64x2_replace_lane_first((i64x2){0, 0}, 53), ((i64x2){53, 0}));
-  expect_vec(i64x2_replace_lane_last((i64x2){0, 0}, 53), ((i64x2){0, 53}));
+  expect_vec(i64x2_splat(-5), i64x2(-5, -5));
+  expect_eq(i64x2_extract_lane_first((v128_t)i64x2(-5, 0)), -5);
+  expect_eq(i64x2_extract_lane_last((v128_t)i64x2(0, -5)), -5);
+  expect_vec(i64x2_replace_lane_first((v128_t)i64x2(0, 0), 53), i64x2(53, 0));
+  expect_vec(i64x2_replace_lane_last((v128_t)i64x2(0, 0), 53), i64x2(0, 53));
 
 #endif // __wasm_unimplemented_simd128__
 
   // f32x4 lane accesses
-  expect_vec(f32x4_splat(-5), ((f32x4){-5, -5, -5, -5}));
-  expect_eq(f32x4_extract_lane_first((f32x4){-5, 0, 0, 0}), -5);
-  expect_eq(f32x4_extract_lane_last((f32x4){0, 0, 0, -5}), -5);
-  expect_vec(f32x4_replace_lane_first((f32x4){0, 0, 0, 0}, 53), ((f32x4){53, 0, 0, 0}));
-  expect_vec(f32x4_replace_lane_last((f32x4){0, 0, 0, 0}, 53), ((f32x4){0, 0, 0, 53}));
+  expect_vec(f32x4_splat(-5), f32x4(-5, -5, -5, -5));
+  expect_eq(f32x4_extract_lane_first((v128_t)f32x4(-5, 0, 0, 0)), -5);
+  expect_eq(f32x4_extract_lane_last((v128_t)f32x4(0, 0, 0, -5)), -5);
+  expect_vec(f32x4_replace_lane_first((v128_t)f32x4(0, 0, 0, 0), 53), f32x4(53, 0, 0, 0));
+  expect_vec(f32x4_replace_lane_last((v128_t)f32x4(0, 0, 0, 0), 53), f32x4(0, 0, 0, 53));
 
 #ifdef __wasm_unimplemented_simd128__
 
   // f64x2 lane accesses
-  expect_vec(f64x2_splat(-5), ((f64x2){-5, -5}));
-  expect_eq(f64x2_extract_lane_first((f64x2){-5, 0}), -5);
-  expect_eq(f64x2_extract_lane_last((f64x2){0, -5}), -5);
-  expect_vec(f64x2_replace_lane_first((f64x2){0, 0}, 53), ((f64x2){53, 0}));
-  expect_vec(f64x2_replace_lane_last((f64x2){0, 0}, 53), ((f64x2){0, 53}));
+  expect_vec(f64x2_splat(-5), f64x2(-5, -5));
+  expect_eq(f64x2_extract_lane_first((v128_t)f64x2(-5, 0)), -5);
+  expect_eq(f64x2_extract_lane_last((v128_t)f64x2(0, -5)), -5);
+  expect_vec(f64x2_replace_lane_first((v128_t)f64x2(0, 0), 53), f64x2(53, 0));
+  expect_vec(f64x2_replace_lane_last((v128_t)f64x2(0, 0), 53), f64x2(0, 53));
 
 #endif // __wasm_unimplemented_simd128__
 
   // i8x16 comparisons
   expect_vec(
     i8x16_eq(
-      (i8x16){0, 127, 13, 128,  1,  13, 129,  42, 0, 127, 255, 42,   1,  13, 129,  42},
-      (i8x16){0, 255, 13, 42, 129, 127,   0, 128, 0, 255,  13, 42, 129, 127,   0, 128}
+      (v128_t)i8x16(0, 127, 13, 128,  1,  13, 129,  42, 0, 127, 255, 42,   1,  13, 129,  42),
+      (v128_t)i8x16(0, 255, 13, 42, 129, 127,   0, 128, 0, 255,  13, 42, 129, 127,   0, 128)
     ),
-    ((u8x16){-1, 0, -1, 0, 0, 0, 0, 0, -1, 0, 0, -1, 0, 0, 0, 0})
+    u8x16(-1, 0, -1, 0, 0, 0, 0, 0, -1, 0, 0, -1, 0, 0, 0, 0)
   );
   expect_vec(
     i8x16_ne(
-      (i8x16){0, 127, 13, 128,  1,  13, 129,  42, 0, 127, 255, 42,   1,  13, 129,  42},
-      (i8x16){0, 255, 13, 42, 129, 127,   0, 128, 0, 255,  13, 42, 129, 127,   0, 128}
+      (v128_t)i8x16(0, 127, 13, 128,  1,  13, 129,  42, 0, 127, 255, 42,   1,  13, 129,  42),
+      (v128_t)i8x16(0, 255, 13, 42, 129, 127,   0, 128, 0, 255,  13, 42, 129, 127,   0, 128)
     ),
-    ((u8x16){0, -1, 0, -1, -1, -1, -1, -1, 0, -1, -1, 0, -1, -1, -1, -1})
+    u8x16(0, -1, 0, -1, -1, -1, -1, -1, 0, -1, -1, 0, -1, -1, -1, -1)
   );
   expect_vec(
     i8x16_lt_s(
-      (i8x16){0, 127, 13, 128,  1,  13, 129,  42, 0, 127, 255, 42,   1,  13, 129,  42},
-      (i8x16){0, 255, 13, 42, 129, 127,   0, 128, 0, 255,  13, 42, 129, 127,   0, 128}
+      (v128_t)i8x16(0, 127, 13, 128,  1,  13, 129,  42, 0, 127, 255, 42,   1,  13, 129,  42),
+      (v128_t)i8x16(0, 255, 13, 42, 129, 127,   0, 128, 0, 255,  13, 42, 129, 127,   0, 128)
     ),
-    ((u8x16){0, 0, 0, -1, 0, -1, -1, 0, 0, 0, -1, 0, 0, -1, -1, 0})
+    u8x16(0, 0, 0, -1, 0, -1, -1, 0, 0, 0, -1, 0, 0, -1, -1, 0)
   );
   expect_vec(
     i8x16_lt_u(
-      (u8x16){0, 127, 13, 128,  1,  13, 129,  42, 0, 127, 255, 42,   1,  13, 129,  42},
-      (u8x16){0, 255, 13, 42, 129, 127,   0, 128, 0, 255,  13, 42, 129, 127,   0, 128}
+      (v128_t)u8x16(0, 127, 13, 128,  1,  13, 129,  42, 0, 127, 255, 42,   1,  13, 129,  42),
+      (v128_t)u8x16(0, 255, 13, 42, 129, 127,   0, 128, 0, 255,  13, 42, 129, 127,   0, 128)
     ),
-    ((u8x16){0, -1, 0, 0, -1, -1, 0, -1, 0, -1, 0, 0, -1, -1, 0, -1})
+    u8x16(0, -1, 0, 0, -1, -1, 0, -1, 0, -1, 0, 0, -1, -1, 0, -1)
   );
   expect_vec(
     i8x16_gt_s(
-      (i8x16){0, 127, 13, 128,  1,  13, 129,  42, 0, 127, 255, 42,   1,  13, 129,  42},
-      (i8x16){0, 255, 13, 42, 129, 127,   0, 128, 0, 255,  13, 42, 129, 127,   0, 128}
+      (v128_t)i8x16(0, 127, 13, 128,  1,  13, 129,  42, 0, 127, 255, 42,   1,  13, 129,  42),
+      (v128_t)i8x16(0, 255, 13, 42, 129, 127,   0, 128, 0, 255,  13, 42, 129, 127,   0, 128)
     ),
-    ((u8x16){0, -1, 0, 0, -1, 0, 0, -1, 0, -1, 0, 0, -1, 0, 0, -1})
+    u8x16(0, -1, 0, 0, -1, 0, 0, -1, 0, -1, 0, 0, -1, 0, 0, -1)
   );
   expect_vec(
     i8x16_gt_u(
-      (u8x16){0, 127, 13, 128,  1,  13, 129,  42, 0, 127, 255, 42,   1,  13, 129,  42},
-      (u8x16){0, 255, 13, 42, 129, 127,   0, 128, 0, 255,  13, 42, 129, 127,   0, 128}
+      (v128_t)u8x16(0, 127, 13, 128,  1,  13, 129,  42, 0, 127, 255, 42,   1,  13, 129,  42),
+      (v128_t)u8x16(0, 255, 13, 42, 129, 127,   0, 128, 0, 255,  13, 42, 129, 127,   0, 128)
     ),
-    ((u8x16){0, 0, 0, -1, 0, 0, -1, 0, 0, 0, -1, 0, 0, 0, -1, 0})
+    u8x16(0, 0, 0, -1, 0, 0, -1, 0, 0, 0, -1, 0, 0, 0, -1, 0)
   );
   expect_vec(
     i8x16_le_s(
-      (i8x16){0, 127, 13, 128,  1,  13, 129,  42, 0, 127, 255, 42,   1,  13, 129,  42},
-      (i8x16){0, 255, 13, 42, 129, 127,   0, 128, 0, 255,  13, 42, 129, 127,   0, 128}
+        (v128_t)i8x16(0, 127, 13, 128,  1,  13, 129,  42, 0, 127, 255, 42,   1,  13, 129,  42),
+        (v128_t)i8x16(0, 255, 13, 42, 129, 127,   0, 128, 0, 255,  13, 42, 129, 127,   0, 128)
     ),
-    ((u8x16){-1, 0, -1, -1, 0, -1, -1, 0, -1, 0, -1, -1, 0, -1, -1, 0})
+    u8x16(-1, 0, -1, -1, 0, -1, -1, 0, -1, 0, -1, -1, 0, -1, -1, 0)
   );
   // bugs.chromium.org/p/v8/issues/detail?id=8635
   // expect_vec(
   //   i8x16_le_u(
-  //     (i8x16){0, 127, 13, 128,  1,  13, 129,  42, 0, 127, 255, 42,   1,  13, 129,  42},
-  //     (i8x16){0, 255, 13, 42, 129, 127,   0, 128, 0, 255,  13, 42, 129, 127,   0, 128}
+  //     (v128_t)i8x16(0, 127, 13, 128,  1,  13, 129,  42, 0, 127, 255, 42,   1,  13, 129,  42),
+  //     (v128_t)i8x16(0, 255, 13, 42, 129, 127,   0, 128, 0, 255,  13, 42, 129, 127,   0, 128)
   //   ),
-  //   ((i8x16){-1, -1, -1, 0, -1, -1, 0, -1, -1, -1, 0, -1, -1, -1, 0, -1})
+  //   i8x16(-1, -1, -1, 0, -1, -1, 0, -1, -1, -1, 0, -1, -1, -1, 0, -1)
   // );
   expect_vec(
     i8x16_ge_s(
-      (i8x16){0, 127, 13, 128,  1,  13, 129,  42, 0, 127, 255, 42,   1,  13, 129,  42},
-      (i8x16){0, 255, 13, 42, 129, 127,   0, 128, 0, 255,  13, 42, 129, 127,   0, 128}
+      (v128_t)i8x16(0, 127, 13, 128,  1,  13, 129,  42, 0, 127, 255, 42,   1,  13, 129,  42),
+      (v128_t)i8x16(0, 255, 13, 42, 129, 127,   0, 128, 0, 255,  13, 42, 129, 127,   0, 128)
     ),
-    ((u8x16){-1, -1, -1, 0, -1, 0, 0, -1, -1, -1, 0, -1, -1, 0, 0, -1})
+    u8x16(-1, -1, -1, 0, -1, 0, 0, -1, -1, -1, 0, -1, -1, 0, 0, -1)
   );
   // expect_vec(
   //   i8x16_ge_u(
-  //     (i8x16){0, 127, 13, 128,  1,  13, 129,  42, 0, 127, 255, 42,   1,  13, 129,  42},
-  //     (i8x16){0, 255, 13, 42, 129, 127,   0, 128, 0, 255,  13, 42, 129, 127,   0, 128}
+  //     (v128_t)i8x16(0, 127, 13, 128,  1,  13, 129,  42, 0, 127, 255, 42,   1,  13, 129,  42),
+  //     (v128_t)i8x16(0, 255, 13, 42, 129, 127,   0, 128, 0, 255,  13, 42, 129, 127,   0, 128)
   //   ),
-  //   ((i8x16){-1, 0, -1, -1, 0, 0, -1, 0, -1, 0, -1, -1, 0, 0, -1, 0})
+  //   i8x16(-1, 0, -1, -1, 0, 0, -1, 0, -1, 0, -1, -1, 0, 0, -1, 0)
   // );
 
   // i16x8 comparisons
   expect_vec(
     i16x8_eq(
-      (i16x8){0, 32767, 13, -32768,      1, -32767,     42, -25536},
-      (i16x8){0,    13,  1,  32767, -32767,     42, -25536,  32767}
+      (v128_t)i16x8(0, 32767, 13, -32768,      1, -32767,     42, -25536),
+      (v128_t)i16x8(0,    13,  1,  32767, -32767,     42, -25536,  32767)
     ),
-    ((u16x8){-1, 0, 0, 0, 0, 0, 0, 0})
+    u16x8(-1, 0, 0, 0, 0, 0, 0, 0)
   );
   expect_vec(
     i16x8_ne(
-      (i16x8){0, 32767, 13, -32768,      1, -32767,     42, -25536},
-      (i16x8){0,    13,  1,  32767, -32767,     42, -25536,  32767}
+      (v128_t)i16x8(0, 32767, 13, -32768,      1, -32767,     42, -25536),
+      (v128_t)i16x8(0,    13,  1,  32767, -32767,     42, -25536,  32767)
     ),
-    ((u16x8){0, -1, -1, -1, -1, -1, -1, -1})
+    u16x8(0, -1, -1, -1, -1, -1, -1, -1)
   );
   expect_vec(
     i16x8_lt_s(
-      (i16x8){0, 32767, 13, -32768,      1, -32767,     42, -25536},
-      (i16x8){0,    13,  1,  32767, -32767,     42, -25536,  32767}
+      (v128_t)i16x8(0, 32767, 13, -32768,      1, -32767,     42, -25536),
+      (v128_t)i16x8(0,    13,  1,  32767, -32767,     42, -25536,  32767)
     ),
-    ((u16x8){0, 0, 0, -1, 0, -1, 0, -1})
+    u16x8(0, 0, 0, -1, 0, -1, 0, -1)
   );
   expect_vec(
     i16x8_lt_u(
-      (u16x8){0, 32767, 13, -32768,      1, -32767,     42, -25536},
-      (u16x8){0,    13,  1,  32767, -32767,     42, -25536,  32767}
+      (v128_t)u16x8(0, 32767, 13, -32768,      1, -32767,     42, -25536),
+      (v128_t)u16x8(0,    13,  1,  32767, -32767,     42, -25536,  32767)
     ),
-    ((u16x8){0, 0, 0, 0, -1, 0, -1, 0})
+    u16x8(0, 0, 0, 0, -1, 0, -1, 0)
   );
   expect_vec(
     i16x8_gt_s(
-      (i16x8){0, 32767, 13, -32768,      1, -32767,     42, -25536},
-      (i16x8){0,    13,  1,  32767, -32767,     42, -25536,  32767}
+      (v128_t)i16x8(0, 32767, 13, -32768,      1, -32767,     42, -25536),
+      (v128_t)i16x8(0,    13,  1,  32767, -32767,     42, -25536,  32767)
     ),
-    ((u16x8){0, -1, -1, 0, -1, 0, -1, 0})
+    u16x8(0, -1, -1, 0, -1, 0, -1, 0)
   );
   expect_vec(
     i16x8_gt_u(
-      (u16x8){0, 32767, 13, -32768,      1, -32767,     42, -25536},
-      (u16x8){0,    13,  1,  32767, -32767,     42, -25536,  32767}
+      (v128_t)u16x8(0, 32767, 13, -32768,      1, -32767,     42, -25536),
+      (v128_t)u16x8(0,    13,  1,  32767, -32767,     42, -25536,  32767)
     ),
-    ((u16x8){0, -1, -1, -1, 0, -1, 0, -1})
+    u16x8(0, -1, -1, -1, 0, -1, 0, -1)
   );
   expect_vec(
     i16x8_le_s(
-      (i16x8){0, 32767, 13, -32768,      1, -32767,     42, -25536},
-      (i16x8){0,    13,  1,  32767, -32767,     42, -25536,  32767}
+      (v128_t)i16x8(0, 32767, 13, -32768,      1, -32767,     42, -25536),
+      (v128_t)i16x8(0,    13,  1,  32767, -32767,     42, -25536,  32767)
     ),
-    ((u16x8){-1, 0, 0, -1, 0, -1, 0, -1})
+    u16x8(-1, 0, 0, -1, 0, -1, 0, -1)
   );
   expect_vec(
     i16x8_le_u(
-      (u16x8){0, 32767, 13, -32768,      1, -32767,     42, -25536},
-      (u16x8){0,    13,  1,  32767, -32767,     42, -25536,  32767}
+      (v128_t)u16x8(0, 32767, 13, -32768,      1, -32767,     42, -25536),
+      (v128_t)u16x8(0,    13,  1,  32767, -32767,     42, -25536,  32767)
     ),
-    ((u16x8){-1, 0, 0, 0, -1, 0, -1, 0})
+    u16x8(-1, 0, 0, 0, -1, 0, -1, 0)
   );
   expect_vec(
     i16x8_ge_s(
-      (i16x8){0, 32767, 13, -32768,      1, -32767,     42, -25536},
-      (i16x8){0,    13,  1,  32767, -32767,     42, -25536,  32767}
+      (v128_t)i16x8(0, 32767, 13, -32768,      1, -32767,     42, -25536),
+      (v128_t)i16x8(0,    13,  1,  32767, -32767,     42, -25536,  32767)
     ),
-    ((u16x8){-1, -1, -1, 0, -1, 0, -1, 0})
+    u16x8(-1, -1, -1, 0, -1, 0, -1, 0)
   );
   expect_vec(
     i16x8_ge_u(
-      (u16x8){0, 32767, 13, -32768,      1, -32767,     42, -25536},
-      (u16x8){0,    13,  1,  32767, -32767,     42, -25536,  32767}
+      (v128_t)u16x8(0, 32767, 13, -32768,      1, -32767,     42, -25536),
+      (v128_t)u16x8(0,    13,  1,  32767, -32767,     42, -25536,  32767)
     ),
-    ((u16x8){-1, -1, -1, -1, 0, -1, 0, -1})
+    u16x8(-1, -1, -1, -1, 0, -1, 0, -1)
   );
 
   // i342x4 comparisons
   expect_vec(
-    i32x4_eq((i32x4){0, -1, 53, -7}, (i32x4){0, 53, -7, -1}), ((u32x4){-1, 0, 0, 0})
+    i32x4_eq((v128_t)i32x4(0, -1, 53, -7), (v128_t)i32x4(0, 53, -7, -1)),
+    u32x4(-1, 0, 0, 0)
   );
   expect_vec(
-    i32x4_ne((i32x4){0, -1, 53, -7}, (i32x4){0, 53, -7, -1}), ((u32x4){0, -1, -1, -1})
+    i32x4_ne((v128_t)i32x4(0, -1, 53, -7), (v128_t)i32x4(0, 53, -7, -1)),
+    u32x4(0, -1, -1, -1)
   );
   expect_vec(
-    i32x4_lt_s((i32x4){0, -1, 53, -7}, (i32x4){0, 53, -7, -1}), ((u32x4){0, -1, 0, -1})
+      i32x4_lt_s((v128_t)i32x4(0, -1, 53, -7), (v128_t)i32x4(0, 53, -7, -1)),
+      u32x4(0, -1, 0, -1)
   );
   expect_vec(
-    i32x4_lt_u((u32x4){0, -1, 53, -7}, (u32x4){0, 53, -7, -1}), ((u32x4){0, 0, -1, -1})
+    i32x4_lt_u((v128_t)u32x4(0, -1, 53, -7), (v128_t)u32x4(0, 53, -7, -1)),
+    u32x4(0, 0, -1, -1)
   );
   expect_vec(
-    i32x4_gt_s((i32x4){0, -1, 53, -7}, (i32x4){0, 53, -7, -1}), ((u32x4){0, 0, -1, 0})
+    i32x4_gt_s((v128_t)i32x4(0, -1, 53, -7), (v128_t)i32x4(0, 53, -7, -1)),
+    u32x4(0, 0, -1, 0)
   );
   expect_vec(
-    i32x4_gt_u((u32x4){0, -1, 53, -7}, (u32x4){0, 53, -7, -1}), ((u32x4){0, -1, 0, 0})
+    i32x4_gt_u((v128_t)u32x4(0, -1, 53, -7), (v128_t)u32x4(0, 53, -7, -1)),
+    u32x4(0, -1, 0, 0)
   );
   expect_vec(
-    i32x4_le_s((i32x4){0, -1, 53, -7}, (i32x4){0, 53, -7, -1}), ((u32x4){-1, -1, 0, -1})
+      i32x4_le_s((v128_t)i32x4(0, -1, 53, -7), (v128_t)i32x4(0, 53, -7, -1)),
+      u32x4(-1, -1, 0, -1)
   );
   expect_vec(
-    i32x4_le_u((u32x4){0, -1, 53, -7}, (u32x4){0, 53, -7, -1}), ((u32x4){-1, 0, -1, -1})
+    i32x4_le_u((v128_t)u32x4(0, -1, 53, -7), (v128_t)u32x4(0, 53, -7, -1)),
+    u32x4(-1, 0, -1, -1)
   );
   expect_vec(
-    i32x4_ge_s((i32x4){0, -1, 53, -7}, (i32x4){0, 53, -7, -1}), ((u32x4){-1, 0, -1, 0})
+    i32x4_ge_s((v128_t)i32x4(0, -1, 53, -7), (v128_t)i32x4(0, 53, -7, -1)),
+    u32x4(-1, 0, -1, 0)
   );
   expect_vec(
-    i32x4_ge_u((u32x4){0, -1, 53, -7}, (u32x4){0, 53, -7, -1}), ((u32x4){-1, -1, 0, 0})
+    i32x4_ge_u((v128_t)u32x4(0, -1, 53, -7), (v128_t)u32x4(0, 53, -7, -1)),
+    u32x4(-1, -1, 0, 0)
   );
 
   // f32x4 comparisons
   expect_vec(
-    f32x4_eq((f32x4){0, -1, 1, 0}, (f32x4){0, 0, -1, 1}), ((u32x4){-1, 0, 0, 0})
+    f32x4_eq((v128_t)f32x4(0, -1, 1, 0), (v128_t)f32x4(0, 0, -1, 1)),
+    u32x4(-1, 0, 0, 0)
   );
   expect_vec(
-    f32x4_ne((f32x4){0, -1, 1, 0}, (f32x4){0, 0, -1, 1}), ((u32x4){0, -1, -1, -1})
+    f32x4_ne((v128_t)f32x4(0, -1, 1, 0), (v128_t)f32x4(0, 0, -1, 1)),
+    u32x4(0, -1, -1, -1)
   );
   expect_vec(
-    f32x4_lt((f32x4){0, -1, 1, 0}, (f32x4){0, 0, -1, 1}), ((u32x4){0, -1, 0, -1})
+    f32x4_lt((v128_t)f32x4(0, -1, 1, 0), (v128_t)f32x4(0, 0, -1, 1)),
+    u32x4(0, -1, 0, -1)
   );
   expect_vec(
-    f32x4_gt((f32x4){0, -1, 1, 0}, (f32x4){0, 0, -1, 1}), ((u32x4){0, 0, -1, 0})
+    f32x4_gt((v128_t)f32x4(0, -1, 1, 0), (v128_t)f32x4(0, 0, -1, 1)),
+    u32x4(0, 0, -1, 0)
   );
   expect_vec(
-    f32x4_le((f32x4){0, -1, 1, 0}, (f32x4){0, 0, -1, 1}), ((u32x4){-1, -1, 0, -1})
+    f32x4_le((v128_t)f32x4(0, -1, 1, 0), (v128_t)f32x4(0, 0, -1, 1)),
+    u32x4(-1, -1, 0, -1)
   );
   expect_vec(
-    f32x4_ge((f32x4){0, -1, 1, 0}, (f32x4){0, 0, -1, 1}), ((u32x4){-1, 0, -1, 0})
+    f32x4_ge((v128_t)i32x4(0, -1, 1, 0), (v128_t)f32x4(0, 0, -1, 1)),
+    u32x4(-1, 0, -1, 0)
   );
   expect_vec(
-    f32x4_eq((f32x4){NAN, 0, NAN, INFINITY}, (f32x4){0, NAN, NAN, INFINITY}),
-    ((u32x4){0, 0, 0, -1})
+    f32x4_eq((v128_t)f32x4(NAN, 0, NAN, INFINITY), (v128_t)f32x4(0, NAN, NAN, INFINITY)),
+    u32x4(0, 0, 0, -1)
   );
   expect_vec(
-    f32x4_ne((f32x4){NAN, 0, NAN, INFINITY}, (f32x4){0, NAN, NAN, INFINITY}),
-    ((u32x4){-1, -1, -1, 0})
+    f32x4_ne((v128_t)f32x4(NAN, 0, NAN, INFINITY), (v128_t)f32x4(0, NAN, NAN, INFINITY)),
+    u32x4(-1, -1, -1, 0)
   );
   expect_vec(
-    f32x4_lt((f32x4){NAN, 0, NAN, INFINITY}, (f32x4){0, NAN, NAN, INFINITY}),
-    ((u32x4){0, 0, 0, 0})
+    f32x4_lt((v128_t)f32x4(NAN, 0, NAN, INFINITY), (v128_t)f32x4(0, NAN, NAN, INFINITY)),
+    u32x4(0, 0, 0, 0)
   );
   expect_vec(
-    f32x4_gt((f32x4){NAN, 0, NAN, INFINITY}, (f32x4){0, NAN, NAN, INFINITY}),
-    ((u32x4){0, 0, 0, 0})
+    f32x4_gt((v128_t)f32x4(NAN, 0, NAN, INFINITY), (v128_t)f32x4(0, NAN, NAN, INFINITY)),
+    u32x4(0, 0, 0, 0)
   );
   expect_vec(
-    f32x4_le((f32x4){NAN, 0, NAN, INFINITY}, (f32x4){0, NAN, NAN, INFINITY}),
-    ((u32x4){0, 0, 0, -1})
+    f32x4_le((v128_t)f32x4(NAN, 0, NAN, INFINITY), (v128_t)f32x4(0, NAN, NAN, INFINITY)),
+    u32x4(0, 0, 0, -1)
   );
   expect_vec(
-    f32x4_ge((f32x4){NAN, 0, NAN, INFINITY}, (f32x4){0, NAN, NAN, INFINITY}),
-    ((u32x4){0, 0, 0, -1})
+    f32x4_ge((v128_t)f32x4(NAN, 0, NAN, INFINITY), (v128_t)f32x4(0, NAN, NAN, INFINITY)),
+    u32x4(0, 0, 0, -1)
   );
   expect_vec(
-    f32x4_eq((f32x4){-INFINITY, 0, NAN, -INFINITY}, (f32x4){0, INFINITY, INFINITY, NAN}),
-    ((u32x4){0, 0, 0, 0})
+    f32x4_eq((v128_t)f32x4(-INFINITY, 0, NAN, -INFINITY), (v128_t)f32x4(0, INFINITY, INFINITY, NAN)),
+    u32x4(0, 0, 0, 0)
   );
   expect_vec(
-    f32x4_ne((f32x4){-INFINITY, 0, NAN, -INFINITY}, (f32x4){0, INFINITY, INFINITY, NAN}),
-    ((u32x4){-1, -1, -1, -1})
+    f32x4_ne((v128_t)f32x4(-INFINITY, 0, NAN, -INFINITY), (v128_t)f32x4(0, INFINITY, INFINITY, NAN)),
+    u32x4(-1, -1, -1, -1)
   );
   expect_vec(
-    f32x4_lt((f32x4){-INFINITY, 0, NAN, -INFINITY}, (f32x4){0, INFINITY, INFINITY, NAN}),
-    ((u32x4){-1, -1, 0, 0})
+    f32x4_lt((v128_t)f32x4(-INFINITY, 0, NAN, -INFINITY), (v128_t)f32x4(0, INFINITY, INFINITY, NAN)),
+    u32x4(-1, -1, 0, 0)
   );
   expect_vec(
-    f32x4_gt((f32x4){-INFINITY, 0, NAN, -INFINITY}, (f32x4){0, INFINITY, INFINITY, NAN}),
-    ((u32x4){0, 0, 0, 0})
+    f32x4_gt((v128_t)f32x4(-INFINITY, 0, NAN, -INFINITY), (v128_t)f32x4(0, INFINITY, INFINITY, NAN)),
+    u32x4(0, 0, 0, 0)
   );
   expect_vec(
-    f32x4_le((f32x4){-INFINITY, 0, NAN, -INFINITY}, (f32x4){0, INFINITY, INFINITY, NAN}),
-    ((u32x4){-1, -1, 0, 0})
+    f32x4_le((v128_t)f32x4(-INFINITY, 0, NAN, -INFINITY), (v128_t)f32x4(0, INFINITY, INFINITY, NAN)),
+    u32x4(-1, -1, 0, 0)
   );
   expect_vec(
-    f32x4_ge((f32x4){-INFINITY, 0, NAN, -INFINITY}, (f32x4){0, INFINITY, INFINITY, NAN}),
-    ((u32x4){0, 0, 0, 0})
+    f32x4_ge((v128_t)f32x4(-INFINITY, 0, NAN, -INFINITY), (v128_t)f32x4(0, INFINITY, INFINITY, NAN)),
+    u32x4(0, 0, 0, 0)
   );
 
 #ifdef __wasm_undefined_simd128__
 
   // f64x2 comparisons
-  expect_vec(f64x2_eq((f64x2){0, 1}, (f64x2){0, 0}), ((u64x2){-1, 0}));
-  expect_vec(f64x2_ne((f64x2){0, 1}, (f64x2){0, 0}), ((u64x2){0, -1}));
-  expect_vec(f64x2_lt((f64x2){0, 1}, (f64x2){0, 0}), ((u64x2){0, 0}));
-  expect_vec(f64x2_gt((f64x2){0, 1}, (f64x2){0, 0}), ((u64x2){0, -1}));
-  expect_vec(f64x2_le((f64x2){0, 1}, (f64x2){0, 0}), ((u64x2){-1, 0}));
-  expect_vec(f64x2_ge((f64x2){0, 1}, (f64x2){0, 0}), ((u64x2){-1, -1}));
-  expect_vec(f64x2_eq((f64x2){NAN, 0}, (f64x2){INFINITY, INFINITY}), ((u64x2){0, 0}));
-  expect_vec(f64x2_ne((f64x2){NAN, 0}, (f64x2){INFINITY, INFINITY}), ((u64x2){-1, -1}));
-  expect_vec(f64x2_lt((f64x2){NAN, 0}, (f64x2){INFINITY, INFINITY}), ((u64x2){0, -1}));
-  expect_vec(f64x2_gt((f64x2){NAN, 0}, (f64x2){INFINITY, INFINITY}), ((u64x2){0, 0}));
-  expect_vec(f64x2_le((f64x2){NAN, 0}, (f64x2){INFINITY, INFINITY}), ((u64x2){0, -1}));
-  expect_vec(f64x2_ge((f64x2){NAN, 0}, (f64x2){INFINITY, INFINITY}), ((u64x2){0, 0}));
+  expect_vec(f64x2_eq((v128_t)f64x2(0, 1), (v128_t)f64x2(0, 0)), u64x2(-1, 0));
+  expect_vec(f64x2_ne((v128_t)f64x2(0, 1), (v128_t)f64x2(0, 0)), u64x2(0, -1));
+  expect_vec(f64x2_lt((v128_t)f64x2(0, 1), (v128_t)f64x2(0, 0)), u64x2(0, 0));
+  expect_vec(f64x2_gt((v128_t)f64x2(0, 1), (v128_t)f64x2(0, 0)), u64x2(0, -1));
+  expect_vec(f64x2_le((v128_t)f64x2(0, 1), (v128_t)f64x2(0, 0)), u64x2(-1, 0));
+  expect_vec(f64x2_ge((v128_t)f64x2(0, 1), (v128_t)f64x2(0, 0)), u64x2(-1, -1));
+  expect_vec(f64x2_eq((v128_t)f64x2(NAN, 0), (v128_t)f64x2(INFINITY, INFINITY)), u64x2(0, 0));
+  expect_vec(f64x2_ne((v128_t)f64x2(NAN, 0), (v128_t)f64x2(INFINITY, INFINITY)), u64x2(-1, -1));
+  expect_vec(f64x2_lt((v128_t)f64x2(NAN, 0), (v128_t)f64x2(INFINITY, INFINITY)), u64x2(0, -1));
+  expect_vec(f64x2_gt((v128_t)f64x2(NAN, 0), (v128_t)f64x2(INFINITY, INFINITY)), u64x2(0, 0));
+  expect_vec(f64x2_le((v128_t)f64x2(NAN, 0), (v128_t)f64x2(INFINITY, INFINITY)), u64x2(0, -1));
+  expect_vec(f64x2_ge((v128_t)f64x2(NAN, 0), (v128_t)f64x2(INFINITY, INFINITY)), u64x2(0, 0));
 
 #endif // __wasm_undefined_simd128__
 
   // bitwise operations
-  expect_vec(v128_not((v128)(i32x4){0, -1, 0, -1}), (v128)((i32x4){-1, 0, -1, 0}));
+  expect_vec(v128_not((v128_t)i32x4(0, -1, 0, -1)), (v128_t)i32x4(-1, 0, -1, 0));
   expect_vec(
-    v128_and((v128)(i32x4){0, 0, -1, -1}, (v128)(i32x4){0, -1, 0, -1}),
-    (v128)((i32x4){0, 0, 0, -1})
+    v128_and((v128_t)i32x4(0, 0, -1, -1), (v128_t)i32x4(0, -1, 0, -1)),
+    i32x4(0, 0, 0, -1)
   );
   expect_vec(
-    v128_or((v128)(i32x4){0, 0, -1, -1}, (v128)(i32x4){0, -1, 0, -1}),
-    (v128)((i32x4){0, -1, -1, -1})
+    v128_or((v128_t)i32x4(0, 0, -1, -1), (v128_t)i32x4(0, -1, 0, -1)),
+    i32x4(0, -1, -1, -1)
   );
   expect_vec(
-    v128_xor((v128)(i32x4){0, 0, -1, -1}, (v128)(i32x4){0, -1, 0, -1}),
-    (v128)((i32x4){0, -1, -1, 0})
+    v128_xor((v128_t)i32x4(0, 0, -1, -1), (v128_t)i32x4(0, -1, 0, -1)),
+    i32x4(0, -1, -1, 0)
   );
   expect_vec(
     v128_bitselect(
-      (v128)(i32x4){0xAAAAAAAA, 0xAAAAAAAA, 0xAAAAAAAA, 0xAAAAAAAA},
-      (v128)(i32x4){0xBBBBBBBB, 0xBBBBBBBB, 0xBBBBBBBB, 0xBBBBBBBB},
-      (v128)(i32x4){0xF0F0F0F0, 0xFFFFFFFF, 0x00000000, 0xFF00FF00}
+      (v128_t)i32x4(0xAAAAAAAA, 0xAAAAAAAA, 0xAAAAAAAA, 0xAAAAAAAA),
+      (v128_t)i32x4(0xBBBBBBBB, 0xBBBBBBBB, 0xBBBBBBBB, 0xBBBBBBBB),
+      (v128_t)i32x4(0xF0F0F0F0, 0xFFFFFFFF, 0x00000000, 0xFF00FF00)
     ),
-    (v128)((i32x4){0xABABABAB, 0xAAAAAAAA, 0xBBBBBBBB, 0xAABBAABB})
+    i32x4(0xABABABAB, 0xAAAAAAAA, 0xBBBBBBBB, 0xAABBAABB)
   );
 
   // i8x16 arithmetic
   expect_vec(
-    i8x16_neg((i8x16){0, 1, 42, -3, -56, 127, -128, -126, 0, -1, -42, 3, 56, -127, -128, 126}),
-    ((i8x16){0, -1, -42, 3, 56, -127, -128, 126, 0, 1, 42, -3, -56, 127, -128, -126})
+    i8x16_neg((v128_t)i8x16(0, 1, 42, -3, -56, 127, -128, -126, 0, -1, -42, 3, 56, -127, -128, 126)),
+    i8x16(0, -1, -42, 3, 56, -127, -128, 126, 0, 1, 42, -3, -56, 127, -128, -126)
   );
-  expect_eq(i8x16_any_true((i8x16){0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0}), 0);
-  expect_eq(i8x16_any_true((i8x16){0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0}), 1);
-  expect_eq(i8x16_any_true((i8x16){1, 1, 1, 1, 1, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1}), 1);
-  expect_eq(i8x16_any_true((i8x16){1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1}), 1);
-  expect_eq(i8x16_all_true((i8x16){0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0}), 0);
-  expect_eq(i8x16_all_true((i8x16){0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0}), 0);
-  expect_eq(i8x16_all_true((i8x16){1, 1, 1, 1, 1, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1}), 0);
-  expect_eq(i8x16_all_true((i8x16){1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1}), 1);
+  expect_eq(i8x16_any_true((v128_t)i8x16(0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0)), 0);
+  expect_eq(i8x16_any_true((v128_t)i8x16(0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0)), 1);
+  expect_eq(i8x16_any_true((v128_t)i8x16(1, 1, 1, 1, 1, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1)), 1);
+  expect_eq(i8x16_any_true((v128_t)i8x16(1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1)), 1);
+  expect_eq(i8x16_all_true((v128_t)i8x16(0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0)), 0);
+  expect_eq(i8x16_all_true((v128_t)i8x16(0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0)), 0);
+  expect_eq(i8x16_all_true((v128_t)i8x16(1, 1, 1, 1, 1, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1)), 0);
+  expect_eq(i8x16_all_true((v128_t)i8x16(1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1)), 1);
   expect_vec(
-    i8x16_shl((i8x16){0, 1, 2, 4, 8, 16, 32, 64, -128, 3, 6, 12, 24, 48, 96, -64}, 1),
-    ((i8x16){0, 2, 4, 8, 16, 32, 64, -128, 0, 6, 12, 24, 48, 96, -64, -128})
+    i8x16_shl((v128_t)i8x16(0, 1, 2, 4, 8, 16, 32, 64, -128, 3, 6, 12, 24, 48, 96, -64), 1),
+    i8x16(0, 2, 4, 8, 16, 32, 64, -128, 0, 6, 12, 24, 48, 96, -64, -128)
   );
   expect_vec(
-    i8x16_shl((i8x16){0, 1, 2, 4, 8, 16, 32, 64, -128, 3, 6, 12, 24, 48, 96, -64}, 8),
-    ((i8x16){0, 1, 2, 4, 8, 16, 32, 64, -128, 3, 6, 12, 24, 48, 96, -64})
+    i8x16_shl((v128_t)i8x16(0, 1, 2, 4, 8, 16, 32, 64, -128, 3, 6, 12, 24, 48, 96, -64), 8),
+    i8x16(0, 1, 2, 4, 8, 16, 32, 64, -128, 3, 6, 12, 24, 48, 96, -64)
   );
   expect_vec(
-    i8x16_shr_u((u8x16){0, 1, 2, 4, 8, 16, 32, 64, -128, 3, 6, 12, 24, 48, 96, -64}, 1),
-    ((u8x16){0, 0, 1, 2, 4, 8, 16, 32, 64, 1, 3, 6, 12, 24, 48, 96})
+    i8x16_shr_u((v128_t)u8x16(0, 1, 2, 4, 8, 16, 32, 64, -128, 3, 6, 12, 24, 48, 96, -64), 1),
+    u8x16(0, 0, 1, 2, 4, 8, 16, 32, 64, 1, 3, 6, 12, 24, 48, 96)
   );
   expect_vec(
-    i8x16_shr_u((u8x16){0, 1, 2, 4, 8, 16, 32, 64, -128, 3, 6, 12, 24, 48, 96, -64}, 8),
-    ((u8x16){0, 1, 2, 4, 8, 16, 32, 64, -128, 3, 6, 12, 24, 48, 96, -64})
+    i8x16_shr_u((v128_t)u8x16(0, 1, 2, 4, 8, 16, 32, 64, -128, 3, 6, 12, 24, 48, 96, -64), 8),
+    u8x16(0, 1, 2, 4, 8, 16, 32, 64, -128, 3, 6, 12, 24, 48, 96, -64)
   );
   expect_vec(
-    i8x16_shr_s((i8x16){0, 1, 2, 4, 8, 16, 32, 64, -128, 3, 6, 12, 24, 48, 96, -64}, 1),
-    ((i8x16){0, 0, 1, 2, 4, 8, 16, 32, -64, 1, 3, 6, 12, 24, 48, -32})
+    i8x16_shr_s((v128_t)i8x16(0, 1, 2, 4, 8, 16, 32, 64, -128, 3, 6, 12, 24, 48, 96, -64), 1),
+    i8x16(0, 0, 1, 2, 4, 8, 16, 32, -64, 1, 3, 6, 12, 24, 48, -32)
   );
   expect_vec(
-    i8x16_shr_s((i8x16){0, 1, 2, 4, 8, 16, 32, 64, -128, 3, 6, 12, 24, 48, 96, -64}, 8),
-    ((i8x16){0, 1, 2, 4, 8, 16, 32, 64, -128, 3, 6, 12, 24, 48, 96, -64})
+    i8x16_shr_s((v128_t)i8x16(0, 1, 2, 4, 8, 16, 32, 64, -128, 3, 6, 12, 24, 48, 96, -64), 8),
+    i8x16(0, 1, 2, 4, 8, 16, 32, 64, -128, 3, 6, 12, 24, 48, 96, -64)
   );
   expect_vec(
     i8x16_add(
-      (i8x16){0, 42, 255, 128, 127, 129, 6, 29, 103, 196, 231, 142, 17, 250, 1, 73},
-      (i8x16){3, 231, 1, 128, 129, 6, 103, 17, 42, 29, 73, 42, 0, 255, 127, 142}
+      (v128_t)i8x16(0, 42, 255, 128, 127, 129, 6, 29, 103, 196, 231, 142, 17, 250, 1, 73),
+      (v128_t)i8x16(3, 231, 1, 128, 129, 6, 103, 17, 42, 29, 73, 42, 0, 255, 127, 142)
     ),
-    ((i8x16){3, 17, 0, 0, 0, 135, 109, 46, 145, 225, 48, 184, 17, 249, 128, 215})
+    i8x16(3, 17, 0, 0, 0, 135, 109, 46, 145, 225, 48, 184, 17, 249, 128, 215)
   );
   expect_vec(
     i8x16_add_saturate_s(
-      (i8x16){0, 42, 255, 128, 127, 129, 6, 29, 103, 196, 231, 142, 17, 250, 1, 73},
-      (i8x16){3, 231, 1, 128, 129, 6, 103, 17, 42, 29, 73, 42, 0, 255, 127, 142}
+      (v128_t)i8x16(0, 42, 255, 128, 127, 129, 6, 29, 103, 196, 231, 142, 17, 250, 1, 73),
+      (v128_t)i8x16(3, 231, 1, 128, 129, 6, 103, 17, 42, 29, 73, 42, 0, 255, 127, 142)
     ),
-    ((i8x16){3, 17, 0, 128, 0, 135, 109, 46, 127, 225, 48, 184, 17, 249, 127, 215})
+    i8x16(3, 17, 0, 128, 0, 135, 109, 46, 127, 225, 48, 184, 17, 249, 127, 215)
   );
   expect_vec(
     i8x16_add_saturate_u(
-      (u8x16){0, 42, 255, 128, 127, 129, 6, 29, 103, 196, 231, 142, 17, 250, 1, 73},
-      (u8x16){3, 231, 1, 128, 129, 6, 103, 17, 42, 29, 73, 42, 0, 255, 127, 142}
+      (v128_t)u8x16(0, 42, 255, 128, 127, 129, 6, 29, 103, 196, 231, 142, 17, 250, 1, 73),
+      (v128_t)u8x16(3, 231, 1, 128, 129, 6, 103, 17, 42, 29, 73, 42, 0, 255, 127, 142)
     ),
-    ((u8x16){3, 255, 255, 255, 255, 135, 109, 46, 145, 225, 255, 184, 17, 255, 128, 215})
+    u8x16(3, 255, 255, 255, 255, 135, 109, 46, 145, 225, 255, 184, 17, 255, 128, 215)
   );
   expect_vec(
     i8x16_sub(
-      (i8x16){0, 42, 255, 128, 127, 129, 6, 29, 103, 196, 231, 142, 17, 250, 1, 73},
-      (i8x16){3, 231, 1, 128, 129, 6, 103, 17, 42, 29, 73, 42, 0, 255, 127, 142}
+      (v128_t)i8x16(0, 42, 255, 128, 127, 129, 6, 29, 103, 196, 231, 142, 17, 250, 1, 73),
+      (v128_t)i8x16(3, 231, 1, 128, 129, 6, 103, 17, 42, 29, 73, 42, 0, 255, 127, 142)
     ),
-    ((i8x16){253, 67, 254, 0, 254, 123, 159, 12, 61, 167, 158, 100, 17, 251, 130, 187})
+    i8x16(253, 67, 254, 0, 254, 123, 159, 12, 61, 167, 158, 100, 17, 251, 130, 187)
   );
   expect_vec(
     i8x16_sub_saturate_s(
-      (i8x16){0, 42, 255, 128, 127, 129, 6, 29, 103, 196, 231, 142, 17, 250, 1, 73},
-      (i8x16){3, 231, 1, 128, 129, 6, 103, 17, 42, 29, 73, 42, 0, 255, 127, 142}
+      (v128_t)i8x16(0, 42, 255, 128, 127, 129, 6, 29, 103, 196, 231, 142, 17, 250, 1, 73),
+      (v128_t)i8x16(3, 231, 1, 128, 129, 6, 103, 17, 42, 29, 73, 42, 0, 255, 127, 142)
     ),
-    ((i8x16){253, 67, 254, 0, 127, 128, 159, 12, 61, 167, 158, 128, 17, 251, 130, 127})
+    i8x16(253, 67, 254, 0, 127, 128, 159, 12, 61, 167, 158, 128, 17, 251, 130, 127)
   );
   expect_vec(
     i8x16_sub_saturate_u(
-      (u8x16){0, 42, 255, 128, 127, 129, 6, 29, 103, 196, 231, 142, 17, 250, 1, 73},
-      (u8x16){3, 231, 1, 128, 129, 6, 103, 17, 42, 29, 73, 42, 0, 255, 127, 142}
+      (v128_t)u8x16(0, 42, 255, 128, 127, 129, 6, 29, 103, 196, 231, 142, 17, 250, 1, 73),
+      (v128_t)u8x16(3, 231, 1, 128, 129, 6, 103, 17, 42, 29, 73, 42, 0, 255, 127, 142)
     ),
-    ((u8x16){0, 0, 254, 0, 0, 123, 0, 12, 61, 167, 158, 100, 17, 0, 0, 0})
+    u8x16(0, 0, 254, 0, 0, 123, 0, 12, 61, 167, 158, 100, 17, 0, 0, 0)
   );
   expect_vec(
     i8x16_mul(
-      (i8x16){0, 42, 255, 128, 127, 129, 6, 29, 103, 196, 231, 142, 17, 250, 1, 73},
-      (i8x16){3, 231, 1, 128, 129, 6, 103, 17, 42, 29, 73, 42, 0, 255, 127, 142}
+      (v128_t)i8x16(0, 42, 255, 128, 127, 129, 6, 29, 103, 196, 231, 142, 17, 250, 1, 73),
+      (v128_t)i8x16(3, 231, 1, 128, 129, 6, 103, 17, 42, 29, 73, 42, 0, 255, 127, 142)
     ),
-    ((i8x16){0, 230, 255, 0, 255, 6, 106, 237, 230, 52, 223, 76, 0, 6, 127, 126})
+    i8x16(0, 230, 255, 0, 255, 6, 106, 237, 230, 52, 223, 76, 0, 6, 127, 126)
   );
 
   // i16x8 arithmetic
   expect_vec(
-    i16x8_neg((i16x8){0, 1, 42, -3, -56, 32767, -32768, 32766}),
-    ((i16x8){0, -1, -42, 3, 56, -32767, -32768, -32766})
+    i16x8_neg((v128_t)i16x8(0, 1, 42, -3, -56, 32767, -32768, 32766)),
+    i16x8(0, -1, -42, 3, 56, -32767, -32768, -32766)
   );
-  expect_eq(i16x8_any_true((i16x8){0, 0, 0, 0, 0, 0, 0, 0}), 0);
-  expect_eq(i16x8_any_true((i16x8){0, 0, 1, 0, 0, 0, 0, 0}), 1);
-  expect_eq(i16x8_any_true((i16x8){1, 1, 1, 1, 1, 0, 1, 1}), 1);
-  expect_eq(i16x8_any_true((i16x8){1, 1, 1, 1, 1, 1, 1, 1}), 1);
-  expect_eq(i16x8_all_true((i16x8){0, 0, 0, 0, 0, 0, 0, 0}), 0);
-  expect_eq(i16x8_all_true((i16x8){0, 0, 1, 0, 0, 0, 0, 0}), 0);
-  expect_eq(i16x8_all_true((i16x8){1, 1, 1, 1, 1, 0, 1, 1}), 0);
-  expect_eq(i16x8_all_true((i16x8){1, 1, 1, 1, 1, 1, 1, 1}), 1);
+  expect_eq(i16x8_any_true((v128_t)i16x8(0, 0, 0, 0, 0, 0, 0, 0)), 0);
+  expect_eq(i16x8_any_true((v128_t)i16x8(0, 0, 1, 0, 0, 0, 0, 0)), 1);
+  expect_eq(i16x8_any_true((v128_t)i16x8(1, 1, 1, 1, 1, 0, 1, 1)), 1);
+  expect_eq(i16x8_any_true((v128_t)i16x8(1, 1, 1, 1, 1, 1, 1, 1)), 1);
+  expect_eq(i16x8_all_true((v128_t)i16x8(0, 0, 0, 0, 0, 0, 0, 0)), 0);
+  expect_eq(i16x8_all_true((v128_t)i16x8(0, 0, 1, 0, 0, 0, 0, 0)), 0);
+  expect_eq(i16x8_all_true((v128_t)i16x8(1, 1, 1, 1, 1, 0, 1, 1)), 0);
+  expect_eq(i16x8_all_true((v128_t)i16x8(1, 1, 1, 1, 1, 1, 1, 1)), 1);
   expect_vec(
-    i16x8_shl((i16x8){0, 8, 16, 128, 256, 2048, 4096, -32768}, 1),
-    ((i16x8){0, 16, 32, 256, 512, 4096, 8192, 0})
+    i16x8_shl((v128_t)i16x8(0, 8, 16, 128, 256, 2048, 4096, -32768), 1),
+    i16x8(0, 16, 32, 256, 512, 4096, 8192, 0)
   );
   expect_vec(
-    i16x8_shl((i16x8){0, 8, 16, 128, 256, 2048, 4096, -32768}, 16),
-    ((i16x8){0, 8, 16, 128, 256, 2048, 4096, -32768})
+    i16x8_shl((v128_t)i16x8(0, 8, 16, 128, 256, 2048, 4096, -32768), 16),
+    i16x8(0, 8, 16, 128, 256, 2048, 4096, -32768)
   );
   expect_vec(
-    i16x8_shr_u((u16x8){0, 8, 16, 128, 256, 2048, 4096, -32768}, 1),
-    ((u16x8){0, 4, 8, 64, 128, 1024, 2048, 16384})
+    i16x8_shr_u((v128_t)u16x8(0, 8, 16, 128, 256, 2048, 4096, -32768), 1),
+    u16x8(0, 4, 8, 64, 128, 1024, 2048, 16384)
   );
   expect_vec(
-    i16x8_shr_u((u16x8){0, 8, 16, 128, 256, 2048, 4096, -32768}, 16),
-    ((u16x8){0, 8, 16, 128, 256, 2048, 4096, -32768})
+    i16x8_shr_u((v128_t)u16x8(0, 8, 16, 128, 256, 2048, 4096, -32768), 16),
+    u16x8(0, 8, 16, 128, 256, 2048, 4096, -32768)
   );
   expect_vec(
-    i16x8_shr_s((i16x8){0, 8, 16, 128, 256, 2048, 4096, -32768}, 1),
-    ((i16x8){0, 4, 8, 64, 128, 1024, 2048, -16384})
+    i16x8_shr_s((v128_t)i16x8(0, 8, 16, 128, 256, 2048, 4096, -32768), 1),
+    i16x8(0, 4, 8, 64, 128, 1024, 2048, -16384)
   );
   expect_vec(
-    i16x8_shr_s((i16x8){0, 8, 16, 128, 256, 2048, 4096, -32768}, 16),
-    ((i16x8){0, 8, 16, 128, 256, 2048, 4096, -32768})
+    i16x8_shr_s((v128_t)i16x8(0, 8, 16, 128, 256, 2048, 4096, -32768), 16),
+    i16x8(0, 8, 16, 128, 256, 2048, 4096, -32768)
   );
   expect_vec(
     i16x8_add(
-      (i16x8){0, -256, -32768, 32512, -32512, -6400, -1536, 32766},
-      (i16x8){768, 1, -32768, -32512, 1536, 18688, -256, 2}
+      (v128_t)i16x8(0, -256, -32768, 32512, -32512, -6400, -1536, 32766),
+      (v128_t)i16x8(768, 1, -32768, -32512, 1536, 18688, -256, 2)
     ),
-    ((i16x8){768, -255, 0, 0, -30976, 12288, -1792, -32768})
+    i16x8(768, -255, 0, 0, -30976, 12288, -1792, -32768)
   );
   expect_vec(
     i16x8_add_saturate_s(
-      (i16x8){0, -256, -32768, 32512, -32512, -6400, -1536, 32766},
-      (i16x8){768, 1, -32768, -32512, 1536, 18688, -256, 2}
+      (v128_t)i16x8(0, -256, -32768, 32512, -32512, -6400, -1536, 32766),
+      (v128_t)i16x8(768, 1, -32768, -32512, 1536, 18688, -256, 2)
     ),
-    ((i16x8){768, -255, -32768, 0, -30976, 12288, -1792, 32767})
+    i16x8(768, -255, -32768, 0, -30976, 12288, -1792, 32767)
   );
   expect_vec(
     i16x8_add_saturate_u(
-      (u16x8){0, -256, -32768, 32512, -32512, -6400, -1536, 32766},
-      (u16x8){768, 1, -32768, -32512, 1536, 18688, -256, 2}
+      (v128_t)u16x8(0, -256, -32768, 32512, -32512, -6400, -1536, 32766),
+      (v128_t)u16x8(768, 1, -32768, -32512, 1536, 18688, -256, 2)
     ),
-    ((u16x8){768, -255, -1, -1, -30976, -1, -1, -32768})
+    u16x8(768, -255, -1, -1, -30976, -1, -1, -32768)
   );
   expect_vec(
     i16x8_sub(
-      (i16x8){0, -256, -32768, 32512, -32512, -6400, -1536, 32766},
-      (i16x8){768, 1, -32768, -32512, 1536, 18688, -256, 2}
+      (v128_t)i16x8(0, -256, -32768, 32512, -32512, -6400, -1536, 32766),
+      (v128_t)i16x8(768, 1, -32768, -32512, 1536, 18688, -256, 2)
     ),
-    ((i16x8){-768, -257, 0, -512, 31488, -25088, -1280, 32764})
+    i16x8(-768, -257, 0, -512, 31488, -25088, -1280, 32764)
   );
   expect_vec(
     i16x8_sub_saturate_s(
-      (i16x8){0, -256, -32768, 32512, -32512, -6400, -1536, 32766},
-      (i16x8){768, 1, -32768, -32512, 1536, 18688, -256, 2}
+      (v128_t)i16x8(0, -256, -32768, 32512, -32512, -6400, -1536, 32766),
+      (v128_t)i16x8(768, 1, -32768, -32512, 1536, 18688, -256, 2)
     ),
-    ((i16x8){-768, -257, 0, 32767, -32768, -25088, -1280, 32764})
+    i16x8(-768, -257, 0, 32767, -32768, -25088, -1280, 32764)
   );
   expect_vec(
     i16x8_sub_saturate_u(
-      (u16x8){0, -256, -32768, 32512, -32512, -6400, -1536, 32766},
-      (u16x8){768, 1, -32768, -32512, 1536, 18688, -256, 2}
+      (v128_t)u16x8(0, -256, -32768, 32512, -32512, -6400, -1536, 32766),
+      (v128_t)u16x8(768, 1, -32768, -32512, 1536, 18688, -256, 2)
     ),
-    ((u16x8){0, -257, 0, 0, 31488, -25088, 0, 32764})
+    u16x8(0, -257, 0, 0, 31488, -25088, 0, 32764)
   );
   expect_vec(
     i16x8_mul(
-      (i16x8){0, -256, -32768, 32512, -32512, -6400, -1536, 32766},
-      (i16x8){768, 1, -32768, -32512, 1536, 18688, -256, 2}
+      (v128_t)i16x8(0, -256, -32768, 32512, -32512, -6400, -1536, 32766),
+      (v128_t)i16x8(768, 1, -32768, -32512, 1536, 18688, -256, 2)
     ),
-    ((i16x8){0, -256, 0, 0, 0, 0, 0, -4})
+    i16x8(0, -256, 0, 0, 0, 0, 0, -4)
   );
 
   // i32x4 arithmetic
-  expect_vec(i32x4_neg((i32x4){0, 1, 0x80000000, 0x7fffffff}), ((i32x4){0, -1, 0x80000000, 0x80000001}));
-  expect_eq(i32x4_any_true((i32x4){0, 0, 0, 0}), 0);
-  expect_eq(i32x4_any_true((i32x4){0, 0, 1, 0}), 1);
-  expect_eq(i32x4_any_true((i32x4){1, 0, 1, 1}), 1);
-  expect_eq(i32x4_any_true((i32x4){1, 1, 1, 1}), 1);
-  expect_eq(i32x4_all_true((i32x4){0, 0, 0, 0}), 0);
-  expect_eq(i32x4_all_true((i32x4){0, 0, 1, 0}), 0);
-  expect_eq(i32x4_all_true((i32x4){1, 0, 1, 1}), 0);
-  expect_eq(i32x4_all_true((i32x4){1, 1, 1, 1}), 1);
-  expect_vec(i32x4_shl((i32x4){1, 0x40000000, 0x80000000, -1}, 1), ((i32x4){2, 0x80000000, 0, -2}));
-  expect_vec(i32x4_shl((i32x4){1, 0x40000000, 0x80000000, -1}, 32), ((i32x4){1, 0x40000000, 0x80000000, -1}));
-  expect_vec(i32x4_shr_s((i32x4){1, 0x40000000, 0x80000000, -1}, 1), ((i32x4){0, 0x20000000, 0xc0000000, -1}));
-  expect_vec(i32x4_shr_s((i32x4){1, 0x40000000, 0x80000000, -1}, 32), ((i32x4){1, 0x40000000, 0x80000000, -1}));
-  expect_vec(i32x4_shr_u((u32x4){1, 0x40000000, 0x80000000, -1}, 1), ((u32x4){0, 0x20000000, 0x40000000, 0x7fffffff}));
-  expect_vec(i32x4_shr_u((u32x4){1, 0x40000000, 0x80000000, -1}, 32), ((u32x4){1, 0x40000000, 0x80000000, -1}));
-  expect_vec(i32x4_add((i32x4){0, 0x80000001, 42, 5}, (i32x4){0, 0x80000001, 5, 42}), ((i32x4){0, 2, 47, 47}));
-  expect_vec(i32x4_sub((i32x4){0, 2, 47, 47}, (i32x4){0, 0x80000001, 42, 5}), ((i32x4){0, 0x80000001, 5, 42}));
-  expect_vec(i32x4_mul((i32x4){0, 0x80000001, 42, 5}, (i32x4){0, 0x80000001, 42, 5}), ((i32x4){0, 1, 1764, 25}));
-
+  expect_vec(
+    i32x4_neg((v128_t)i32x4(0, 1, 0x80000000, 0x7fffffff)),
+    i32x4(0, -1, 0x80000000, 0x80000001)
+  );
+  expect_eq(i32x4_any_true((v128_t)i32x4(0, 0, 0, 0)), 0);
+  expect_eq(i32x4_any_true((v128_t)i32x4(0, 0, 1, 0)), 1);
+  expect_eq(i32x4_any_true((v128_t)i32x4(1, 0, 1, 1)), 1);
+  expect_eq(i32x4_any_true((v128_t)i32x4(1, 1, 1, 1)), 1);
+  expect_eq(i32x4_all_true((v128_t)i32x4(0, 0, 0, 0)), 0);
+  expect_eq(i32x4_all_true((v128_t)i32x4(0, 0, 1, 0)), 0);
+  expect_eq(i32x4_all_true((v128_t)i32x4(1, 0, 1, 1)), 0);
+  expect_eq(i32x4_all_true((v128_t)i32x4(1, 1, 1, 1)), 1);
+  expect_vec(
+    i32x4_shl((v128_t)i32x4(1, 0x40000000, 0x80000000, -1), 1),
+    i32x4(2, 0x80000000, 0, -2)
+  );
+  expect_vec(
+    i32x4_shl((v128_t)i32x4(1, 0x40000000, 0x80000000, -1), 32),
+    i32x4(1, 0x40000000, 0x80000000, -1)
+  );
+  expect_vec(
+    i32x4_shr_s((v128_t)i32x4(1, 0x40000000, 0x80000000, -1), 1),
+    i32x4(0, 0x20000000, 0xc0000000, -1)
+  );
+  expect_vec(
+    i32x4_shr_s((v128_t)i32x4(1, 0x40000000, 0x80000000, -1), 32),
+    i32x4(1, 0x40000000, 0x80000000, -1)
+  );
+  expect_vec(
+    i32x4_shr_u((v128_t)u32x4(1, 0x40000000, 0x80000000, -1), 1),
+    u32x4(0, 0x20000000, 0x40000000, 0x7fffffff)
+  );
+  expect_vec(
+    i32x4_shr_u((v128_t)u32x4(1, 0x40000000, 0x80000000, -1), 32),
+    u32x4(1, 0x40000000, 0x80000000, -1)
+  );
+  expect_vec(
+    i32x4_add((v128_t)i32x4(0, 0x80000001, 42, 5), (v128_t)i32x4(0, 0x80000001, 5, 42)),
+    i32x4(0, 2, 47, 47)
+  );
+  expect_vec(
+    i32x4_sub((v128_t)i32x4(0, 2, 47, 47), (v128_t)i32x4(0, 0x80000001, 42, 5)),
+    i32x4(0, 0x80000001, 5, 42)
+  );
+  expect_vec(
+    i32x4_mul((v128_t)i32x4(0, 0x80000001, 42, 5), (v128_t)i32x4(0, 0x80000001, 42, 5)),
+    i32x4(0, 1, 1764, 25)
+  );
 
 #ifdef __wasm_unimplemented_simd128__
 
   // i64x2 arithmetic
-  expect_vec(i64x2_neg((i64x2){0x8000000000000000, 42}), ((i64x2){0x8000000000000000, -42}));
-  expect_eq(i64x2_any_true((i64x2){0, 0}), 0);
-  expect_eq(i64x2_any_true((i64x2){1, 0}), 1);
-  expect_eq(i64x2_any_true((i64x2){1, 1}), 1);
-  expect_eq(i64x2_all_true((i64x2){0, 0}), 0);
-  expect_eq(i64x2_all_true((i64x2){1, 0}), 0);
-  expect_eq(i64x2_all_true((i64x2){1, 1}), 1);
-
-  expect_vec(i64x2_shl((i64x2){1, 0x8000000000000000}, 1), ((i64x2){2, 0}));
-  expect_vec(i64x2_shl((i64x2){1, 0x8000000000000000}, 64), ((i64x2){1, 0x8000000000000000}));
-  expect_vec(i64x2_shr_s((i64x2){1, 0x8000000000000000}, 1), ((i64x2){0, 0xc000000000000000}));
-  expect_vec(i64x2_shr_s((i64x2){1, 0x8000000000000000}, 64), ((i64x2){1, 0x8000000000000000}));
-  expect_vec(i64x2_shr_u((u64x2){1, 0x8000000000000000}, 1), ((u64x2){0, 0x4000000000000000}));
-  expect_vec(i64x2_shr_u((u64x2){1, 0x8000000000000000}, 64), ((u64x2){1, 0x8000000000000000}));
-  expect_vec(i64x2_add((i64x2){0x8000000000000001, 42}, (i64x2){0x8000000000000001, 0}), ((i64x2){2, 42}));
-  expect_vec(i64x2_sub((i64x2){2, 42}, (i64x2){0x8000000000000001, 0}), ((i64x2){0x8000000000000001, 42}));
+  expect_vec(i64x2_neg((v128_t)i64x2(0x8000000000000000, 42)), i64x2(0x8000000000000000, -42));
+  expect_eq(i64x2_any_true((v128_t)i64x2(0, 0)), 0);
+  expect_eq(i64x2_any_true((v128_t)i64x2(1, 0)), 1);
+  expect_eq(i64x2_any_true((v128_t)i64x2(1, 1)), 1);
+  expect_eq(i64x2_all_true((v128_t)i64x2(0, 0)), 0);
+  expect_eq(i64x2_all_true((v128_t)i64x2(1, 0)), 0);
+  expect_eq(i64x2_all_true((v128_t)i64x2(1, 1)), 1);
+
+  expect_vec(i64x2_shl((v128_t)i64x2(1, 0x8000000000000000), 1), i64x2(2, 0));
+  expect_vec(i64x2_shl((v128_t)i64x2(1, 0x8000000000000000), 64), i64x2(1, 0x8000000000000000));
+  expect_vec(i64x2_shr_s((v128_t)i64x2(1, 0x8000000000000000), 1), i64x2(0, 0xc000000000000000));
+  expect_vec(i64x2_shr_s((v128_t)i64x2(1, 0x8000000000000000), 64), i64x2(1, 0x8000000000000000));
+  expect_vec(i64x2_shr_u((v128_t)u64x2(1, 0x8000000000000000), 1), u64x2(0, 0x4000000000000000));
+  expect_vec(i64x2_shr_u((v128_t)u64x2(1, 0x8000000000000000), 64), u64x2(1, 0x8000000000000000));
+  expect_vec(
+    i64x2_add((v128_t)i64x2(0x8000000000000001, 42), (v128_t)i64x2(0x8000000000000001, 0)),
+    i64x2(2, 42)
+  );
+  expect_vec(
+    i64x2_sub((v128_t)i64x2(2, 42), (v128_t)i64x2(0x8000000000000001, 0)),
+    i64x2(0x8000000000000001, 42)
+  );
 
 #endif // __wasm_unimplemented_simd128__
 
   // f32x4 arithmetic
-  expect_vec(f32x4_abs((f32x4){-0., NAN, -INFINITY, 5}), ((f32x4){0, NAN, INFINITY, 5}));
-  expect_vec(f32x4_neg((f32x4){-0., NAN, -INFINITY, 5}), ((f32x4){0, -NAN, INFINITY, -5}));
+  expect_vec(f32x4_abs((v128_t)f32x4(-0., NAN, -INFINITY, 5)), f32x4(0, NAN, INFINITY, 5));
+  expect_vec(f32x4_neg((v128_t)f32x4(-0., NAN, -INFINITY, 5)), f32x4(0, -NAN, INFINITY, -5));
 
 #ifdef __wasm_unimplemented_simd128__
 
-  expect_vec(f32x4_sqrt((f32x4){-0., NAN, INFINITY, 4}), ((f32x4){-0., NAN, INFINITY, 2}));
+  expect_vec(f32x4_sqrt((v128_t)f32x4(0., NAN, INFINITY, 4)), f32x4(-0., NAN, INFINITY, 2));
 
 #endif // __wasm_unimplemented_simd128__
 
-  expect_vec(f32x4_add((f32x4){NAN, -NAN, INFINITY, 42}, (f32x4){42, INFINITY, INFINITY, 1}), ((f32x4){NAN, -NAN, INFINITY, 43}));
-  expect_vec(f32x4_sub((f32x4){NAN, -NAN, INFINITY, 42}, (f32x4){42, INFINITY, -INFINITY, 1}), ((f32x4){NAN, -NAN, INFINITY, 41}));
-  expect_vec(f32x4_mul((f32x4){NAN, -NAN, INFINITY, 42}, (f32x4){42, INFINITY, INFINITY, 2}), ((f32x4){NAN, -NAN, INFINITY, 84}));
-  expect_vec(f32x4_div((f32x4){NAN, -NAN, INFINITY, 42}, (f32x4){42, INFINITY, 2, 2}), ((f32x4){NAN, -NAN, INFINITY, 21}));
-  // expect_vec(f32x4_min((f32x4){-0., 0, NAN, 5}, (f32x4){0, -0., 5, NAN}), ((f32x4){-0., -0., NAN, NAN}));
-  // expect_vec(f32x4_max((f32x4){-0., 0, NAN, 5}, (f32x4){0, -0., 5, NAN}), ((f32x4){0, 0, NAN, NAN}));
+  expect_vec(
+    f32x4_add((v128_t)f32x4(NAN, -NAN, INFINITY, 42), (v128_t)f32x4(42, INFINITY, INFINITY, 1)),
+    f32x4(NAN, -NAN, INFINITY, 43)
+  );
+  expect_vec(
+    f32x4_sub((v128_t)f32x4(NAN, -NAN, INFINITY, 42), (v128_t)f32x4(42, INFINITY, -INFINITY, 1)),
+    f32x4(NAN, -NAN, INFINITY, 41)
+  );
+  expect_vec(
+    f32x4_mul((v128_t)f32x4(NAN, -NAN, INFINITY, 42), (v128_t)f32x4(42, INFINITY, INFINITY, 2)),
+    f32x4(NAN, -NAN, INFINITY, 84)
+  );
+  expect_vec(
+    f32x4_div((v128_t)f32x4(NAN, -NAN, INFINITY, 42), (v128_t)f32x4(42, INFINITY, 2, 2)),
+    f32x4(NAN, -NAN, INFINITY, 21)
+  );
+  expect_vec(
+    f32x4_min((v128_t)f32x4(-0., 0, NAN, 5), (v128_t)f32x4(0, -0., 5, NAN)),
+    f32x4(-0., -0., NAN, NAN)
+  );
+  expect_vec(
+    f32x4_max((v128_t)f32x4(-0., 0, NAN, 5), (v128_t)f32x4(0, -0., 5, NAN)),
+    f32x4(0, 0, NAN, NAN)
+  );
 
 #ifdef __wasm_unimplemented_simd128__
 
   // f64x2 arithmetic
-  expect_vec(f64x2_abs((f64x2){-0., NAN}), ((f64x2){0, NAN}));
-  expect_vec(f64x2_abs((f64x2){-INFINITY, 5}), ((f64x2){INFINITY, 5}));
+  expect_vec(f64x2_abs((v128_t)f64x2(-0., NAN)), f64x2(0, NAN));
+  expect_vec(f64x2_abs((v128_t)f64x2(-INFINITY, 5)), f64x2(INFINITY, 5));
 
-  expect_vec(f64x2_neg((f64x2){-0., NAN}), ((f64x2){0, -NAN}));
-  expect_vec(f64x2_neg((f64x2){-INFINITY, 5}), ((f64x2){INFINITY, -5}));
+  expect_vec(f64x2_neg((v128_t)f64x2(-0., NAN)), f64x2(0, -NAN));
+  expect_vec(f64x2_neg((v128_t)f64x2(-INFINITY, 5)), f64x2(INFINITY, -5));
 
-  expect_vec(f64x2_sqrt((f64x2){-0., NAN}), ((f64x2){-0., NAN}));
-  expect_vec(f64x2_sqrt((f64x2){INFINITY, 4}), ((f64x2){INFINITY, 2}));
+  expect_vec(f64x2_sqrt((v128_t)f64x2(-0., NAN)), f64x2(-0., NAN));
+  expect_vec(f64x2_sqrt((v128_t)f64x2(INFINITY, 4)), f64x2(INFINITY, 2));
 
-  expect_vec(f64x2_add((f64x2){NAN, -NAN}, (f64x2){42, INFINITY}), ((f64x2){NAN, -NAN}));
-  expect_vec(f64x2_add((f64x2){INFINITY, 42}, (f64x2){INFINITY, 1}), ((f64x2){INFINITY, 43}));
-  expect_vec(f64x2_sub((f64x2){NAN, -NAN}, (f64x2){42, INFINITY}), ((f64x2){NAN, -NAN}));
-  expect_vec(f64x2_sub((f64x2){INFINITY, 42}, (f64x2){-INFINITY, 1}), ((f64x2){INFINITY, 41}));
-  expect_vec(f64x2_mul((f64x2){NAN, -NAN}, (f64x2){42, INFINITY}), ((f64x2){NAN, -NAN}));
-  expect_vec(f64x2_mul((f64x2){INFINITY, 42}, (f64x2){INFINITY, 2}), ((f64x2){INFINITY, 84}));
-  expect_vec(f64x2_div((f64x2){NAN, -NAN}, (f64x2){42, INFINITY}), ((f64x2){NAN, -NAN}));
-  expect_vec(f64x2_div((f64x2){INFINITY, 42}, (f64x2){2, 2}), ((f64x2){INFINITY, 21}));
+  expect_vec(
+    f64x2_add((v128_t)f64x2(NAN, -NAN), (v128_t)f64x2(42, INFINITY)),
+    f64x2(NAN, -NAN)
+  );
+  expect_vec(
+    f64x2_add((v128_t)f64x2(INFINITY, 42), (v128_t)f64x2(INFINITY, 1)),
+    f64x2(INFINITY, 43)
+  );
+  expect_vec(
+    f64x2_sub((v128_t)f64x2(NAN, -NAN), (v128_t)f64x2(42, INFINITY)),
+    f64x2(NAN, -NAN)
+  );
+  expect_vec(
+    f64x2_sub((v128_t)f64x2(INFINITY, 42), (v128_t)f64x2(-INFINITY, 1)),
+    f64x2(INFINITY, 41)
+  );
+  expect_vec(
+    f64x2_mul((v128_t)f64x2(NAN, -NAN), (v128_t)f64x2(42, INFINITY)),
+    f64x2(NAN, -NAN)
+  );
+  expect_vec(
+    f64x2_mul((v128_t)f64x2(INFINITY, 42), (v128_t)f64x2(INFINITY, 2)),
+    f64x2(INFINITY, 84)
+  );
+  expect_vec(
+    f64x2_div((v128_t)f64x2(NAN, -NAN), (v128_t)f64x2(42, INFINITY)),
+    f64x2(NAN, -NAN)
+  );
+  expect_vec(
+    f64x2_div((v128_t)f64x2(INFINITY, 42), (v128_t)f64x2(2, 2)),
+    f64x2(INFINITY, 21)
+  );
 
-  expect_vec(f64x2_min((f64x2){-0., 0}, (f64x2){0, -0}), ((f64x2){-0., -0}));
-  expect_vec(f64x2_min((f64x2){NAN, 5}, (f64x2){5, NAN}), ((f64x2){NAN, NAN}));
-  expect_vec(f64x2_max((f64x2){-0., 0}, (f64x2){0, -0}), ((f64x2){0, 0}));
-  expect_vec(f64x2_max((f64x2){NAN, 5}, (f64x2){5, NAN}), ((f64x2){NAN, NAN}));
+  expect_vec(f64x2_min((v128_t)f64x2(-0., 0), (v128_t)f64x2(0, -0)), f64x2(-0., -0));
+  expect_vec(f64x2_min((v128_t)f64x2(NAN, 5), (v128_t)f64x2(5, NAN)), f64x2(NAN, NAN));
+  expect_vec(f64x2_max((v128_t)f64x2(-0., 0), (v128_t)f64x2(0, -0)), f64x2(0, 0));
+  expect_vec(f64x2_max((v128_t)f64x2(NAN, 5), (v128_t)f64x2(5, NAN)), f64x2(NAN, NAN));
 
 #endif // __wasm_unimplemented_simd128__
 
   // conversions
-  expect_vec(i32x4_trunc_s_f32x4_sat((f32x4){42, NAN, INFINITY, -INFINITY}), ((i32x4){42, 0, 2147483647, -2147483648ll}));
-  expect_vec(i32x4_trunc_u_f32x4_sat((f32x4){42, NAN, INFINITY, -INFINITY}), ((u32x4){42, 0, 4294967295ull, 0}));
+  expect_vec(
+    i32x4_trunc_s_f32x4_sat((v128_t)f32x4(42, NAN, INFINITY, -INFINITY)),
+    i32x4(42, 0, 2147483647, -2147483648ll)
+  );
+  expect_vec(
+    i32x4_trunc_u_f32x4_sat((v128_t)f32x4(42, NAN, INFINITY, -INFINITY)),
+    u32x4(42, 0, 4294967295ull, 0)
+  );
 
 #ifdef __wasm_unimplemented_simd128__
 
-  expect_vec(i64x2_trunc_s_f64x2_sat((f64x2){42, NAN}), ((i64x2){42, 0}));
-  expect_vec(i64x2_trunc_s_f64x2_sat((f64x2){INFINITY, -INFINITY}), ((i64x2){9223372036854775807ll, -9223372036854775807ll - 1}));
-  expect_vec(i64x2_trunc_u_f64x2_sat((f64x2){42, NAN}), ((u64x2){42, 0}));
-  expect_vec(i64x2_trunc_u_f64x2_sat((f64x2){INFINITY, -INFINITY}), ((u64x2){18446744073709551615ull, 0}));
+  expect_vec(i64x2_trunc_s_f64x2_sat((v128_t)f64x2(42, NAN)), i64x2(42, 0));
+  expect_vec(
+    i64x2_trunc_s_f64x2_sat((v128_t)f64x2(INFINITY, -INFINITY)),
+    i64x2(9223372036854775807ll, -9223372036854775807ll - 1)
+  );
+  expect_vec(i64x2_trunc_u_f64x2_sat((v128_t)f64x2(42, NAN)), u64x2(42, 0));
+  expect_vec(
+    i64x2_trunc_u_f64x2_sat((v128_t)f64x2(INFINITY, -INFINITY)),
+    u64x2(18446744073709551615ull, 0)
+  );
 
 #endif // __wasm_unimplemented_simd128__
 
-  expect_vec(f32x4_convert_s_i32x4((i32x4){0, -1, 2147483647, -2147483647 - 1}), ((f32x4){0, -1, 2147483648., -2147483648.}));
-  expect_vec(f32x4_convert_u_i32x4((u32x4){0, -1, 2147483647, -2147483647 - 1}), ((f32x4){0, 4294967296., 2147483648., 2147483648.}));
+  expect_vec(
+    f32x4_convert_s_i32x4((v128_t)i32x4(0, -1, 2147483647, -2147483647 - 1)),
+    f32x4(0, -1, 2147483648., -2147483648.)
+  );
+  expect_vec(
+    f32x4_convert_u_i32x4((v128_t)u32x4(0, -1, 2147483647, -2147483647 - 1)),
+    f32x4(0, 4294967296., 2147483648., 2147483648.)
+  );
 
 #ifdef __wasm_unimplemented_simd128__
 
-  expect_vec(f64x2_convert_s_i64x2((i64x2){0, -1}), ((f64x2){0, -1}));
-  expect_vec(f64x2_convert_s_i64x2((i64x2){9223372036854775807, -9223372036854775807 - 1}), ((f64x2){9223372036854775807., -9223372036854775808.}));
-  expect_vec(f64x2_convert_u_i64x2((u64x2){0, -1}), ((f64x2){0, 18446744073709551616.}));
-  expect_vec(f64x2_convert_u_i64x2((u64x2)(i64x2){9223372036854775807 , -9223372036854775808.}), ((f64x2){9223372036854775807., 9223372036854775808.}));
+  expect_vec(f64x2_convert_s_i64x2((v128_t)i64x2(0, -1)), f64x2(0, -1));
+  expect_vec(
+    f64x2_convert_s_i64x2((v128_t)i64x2(9223372036854775807, -9223372036854775807 - 1)),
+    f64x2(9223372036854775807., -9223372036854775808.)
+  );
+  expect_vec(
+    f64x2_convert_u_i64x2((v128_t)u64x2(0, -1)),
+    f64x2(0, 18446744073709551616.)
+  );
+  expect_vec(
+    f64x2_convert_u_i64x2((v128_t)i64x2(9223372036854775807 , -9223372036854775808.)),
+    f64x2(9223372036854775807., 9223372036854775808.)
+  );
 
 #endif // __wasm_unimplemented_simd128__
 

From 4bf50a0d9841104a8b5d21f25383d100e9052b76 Mon Sep 17 00:00:00 2001
From: Thomas Lively <tlively@google.com>
Date: Wed, 19 Jun 2019 16:26:30 -0700
Subject: [PATCH 08/16] Address recent comments

 - fix copy-paste error where intrinsics weren't used in test fns
 - fix some inconsistent return types
 - change some names and comments
---
 system/include/simd128.h          | 25 +++++++++--------
 tests/test_wasm_builtin_simd.c    |  2 +-
 tests/test_wasm_intrinsics_simd.c | 45 +++++++++++++++----------------
 3 files changed, 37 insertions(+), 35 deletions(-)

diff --git a/system/include/simd128.h b/system/include/simd128.h
index 13f1fc1f1bb16..37c0b6ff27d62 100644
--- a/system/include/simd128.h
+++ b/system/include/simd128.h
@@ -5,7 +5,7 @@ WebAssembly SIMD128 Intrinsics
 #include <stdint.h>
 
 // User-facing type
-typedef int32_t v128_t __attribute__((vector_size(16), __aligned__(16)));
+typedef int32_t v128_t __attribute__((__vector_size__(16), __aligned__(16)));
 
 // Internal types determined by clang builtin definitions
 typedef int32_t __v128_u __attribute__((__vector_size__(16), __aligned__(1)));
@@ -24,6 +24,7 @@ typedef double __f64x2 __attribute__((__vector_size__(16), __aligned__(16)));
 
 // v128 wasm_v128_load(void* mem)
 static __inline__ v128_t __DEFAULT_FN_ATTRS wasm_v128_load(const void* __mem) {
+  // UB-free unaligned access copied from xmmintrin.h
   struct __wasm_v128_load_struct {
     __v128_u __v;
   } __attribute__((__packed__, __may_alias__));
@@ -32,6 +33,7 @@ static __inline__ v128_t __DEFAULT_FN_ATTRS wasm_v128_load(const void* __mem) {
 
 // wasm_v128_store(void* mem, v128 a)
 static __inline__ void __DEFAULT_FN_ATTRS wasm_v128_store(void* __mem, v128_t __a) {
+  // UB-free unaligned access copied from xmmintrin.h
   struct __wasm_v128_store_struct {
     __v128_u __v;
   } __attribute__((__packed__, __may_alias__));
@@ -361,14 +363,15 @@ static __inline__ v128_t __DEFAULT_FN_ATTRS wasm_v128_or(v128_t a, v128_t b) { r
 // v128_t wasm_v128_xor(v128_t a, v128_t b)
 static __inline__ v128_t __DEFAULT_FN_ATTRS wasm_v128_xor(v128_t a, v128_t b) { return a ^ b; }
 
-// v128_t wasm_v128_bitselect(v128_t a, v128_t b, v128_t c)
-static __inline__ v128_t __DEFAULT_FN_ATTRS wasm_v128_bitselect(v128_t a, v128_t b, v128_t c) {
-  return (v128_t)__builtin_wasm_bitselect((__i32x4)a, (__i32x4)b, (__i32x4)c);
+// v128_t wasm_v128_bitselect(v128_t a, v128_t b, v128_t mask)
+// `a` is selected for each lane for which `mask` is nonzero.
+static __inline__ v128_t __DEFAULT_FN_ATTRS wasm_v128_bitselect(v128_t a, v128_t b, v128_t mask) {
+  return (v128_t)__builtin_wasm_bitselect((__i32x4)a, (__i32x4)b, (__i32x4)mask);
 }
 
 // v128_t wasm_i8x16_neg(v128_t a)
 static __inline__ v128_t __DEFAULT_FN_ATTRS wasm_i8x16_neg(v128_t a) {
-  return (v128_t)(-(__i8x16)a);
+  return (v128_t)(-(__u8x16)a);
 }
 
 // bool wasm_i8x16_any_true(v128_t a)
@@ -428,12 +431,12 @@ static __inline__ v128_t __DEFAULT_FN_ATTRS wasm_u8x16_sub_saturate(v128_t a, v1
 
 // v128_t wasm_i8x16_mul(v128_t a, v128_t b)
 static __inline__ v128_t __DEFAULT_FN_ATTRS wasm_i8x16_mul(v128_t a, v128_t b) {
-  return (v128_t)((__i8x16)a * (__i8x16)b);
+  return (v128_t)((__u8x16)a * (__u8x16)b);
 }
 
 // v128_t wasm_i16x8_neg(v128_t a)
 static __inline__ v128_t __DEFAULT_FN_ATTRS wasm_i16x8_neg(v128_t a) {
-  return (v128_t)(-(__i16x8)a);
+  return (v128_t)(-(__u16x8)a);
 }
 
 // bool wasm_i16x8_any_true(v128_t a)
@@ -493,12 +496,12 @@ static __inline__ v128_t __DEFAULT_FN_ATTRS wasm_u16x8_sub_saturate(v128_t a, v1
 
 // v128_t wasm_i16x8_mul(v128_t a v128_t b)
 static __inline__ v128_t __DEFAULT_FN_ATTRS wasm_i16x8_mul(v128_t a, v128_t b) {
-  return (v128_t)((__i16x8)a * (__i16x8)b);
+  return (v128_t)((__u16x8)a * (__u16x8)b);
 }
 
 // v128_t wasm_i32x4_neg(v128_t a)
 static __inline__ v128_t __DEFAULT_FN_ATTRS wasm_i32x4_neg(v128_t a) {
-  return (v128_t)(-(__i32x4)a);
+  return (v128_t)(-(__u32x4)a);
 }
 
 // bool wasm_i32x4_any_true(v128_t a)
@@ -538,14 +541,14 @@ static __inline__ v128_t __DEFAULT_FN_ATTRS wasm_i32x4_sub(v128_t a, v128_t b) {
 
 // v128_t wasm_i32x4_mul(v128_t a v128_t b)
 static __inline__ v128_t __DEFAULT_FN_ATTRS wasm_i32x4_mul(v128_t a, v128_t b) {
-  return (v128_t)((__i32x4)a * (__i32x4)b);
+  return (v128_t)((__u32x4)a * (__u32x4)b);
 }
 
 #ifdef __wasm_unimplemented_simd128__
 
 // v128_t wasm_i64x2_neg(v128_t a)
 static __inline__ v128_t __DEFAULT_FN_ATTRS wasm_i64x2_neg(v128_t a) {
-  return (v128_t)(-(__i64x2)a);
+  return (v128_t)(-(__u64x2)a);
 }
 
 // bool wasm_i64x2_any_true(v128_t a)
diff --git a/tests/test_wasm_builtin_simd.c b/tests/test_wasm_builtin_simd.c
index 9e20af393bf2b..a769d30c9e882 100644
--- a/tests/test_wasm_builtin_simd.c
+++ b/tests/test_wasm_builtin_simd.c
@@ -576,7 +576,7 @@ static int failures = 0;
       }                                                         \
     })
 
-int EMSCRIPTEN_KEEPALIVE main(int argc, char** argv) {
+int EMSCRIPTEN_KEEPALIVE __attribute__((__optnone__)) main(int argc, char** argv) {
   {
     i8x16 vec = {3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3};
     expect_vec(i8x16_load(&vec),
diff --git a/tests/test_wasm_intrinsics_simd.c b/tests/test_wasm_intrinsics_simd.c
index 071d57d12bab0..e31f0e225e865 100644
--- a/tests/test_wasm_intrinsics_simd.c
+++ b/tests/test_wasm_intrinsics_simd.c
@@ -168,7 +168,7 @@ v128_t TESTFN i8x16_gt_u(v128_t x, v128_t y) {
 v128_t TESTFN i8x16_le_s(v128_t x, v128_t y) {
   return wasm_i8x16_le(x,y);
 }
-v128_t TESTFN u8x16_le_u(v128_t x, v128_t y) {
+v128_t TESTFN i8x16_le_u(v128_t x, v128_t y) {
    return wasm_u8x16_le(x, y);
 }
 v128_t TESTFN i8x16_ge_s(v128_t x, v128_t y) {
@@ -339,7 +339,7 @@ v128_t TESTFN i16x8_neg(v128_t vec) {
 bool TESTFN i16x8_any_true(v128_t vec) {
   return wasm_i16x8_any_true(vec);
 }
-int32_t TESTFN i16x8_all_true(v128_t vec) {
+bool TESTFN i16x8_all_true(v128_t vec) {
   return wasm_i16x8_all_true(vec);
 }
 v128_t TESTFN i16x8_shl(v128_t vec, int32_t shift) {
@@ -477,22 +477,22 @@ v128_t TESTFN f64x2_abs(v128_t vec) {
   return wasm_f64x2_abs(vec);
 }
 v128_t TESTFN f64x2_neg(v128_t vec) {
-  return -vec;
+  return wasm_f64x2_neg(vec);
 }
 v128_t TESTFN f64x2_sqrt(v128_t vec) {
   return wasm_f64x2_sqrt(vec);
 }
 v128_t TESTFN f64x2_add(v128_t x, v128_t y) {
-  return x + y;
+  return wasm_f64x2_add(x, y);
 }
 v128_t TESTFN f64x2_sub(v128_t x, v128_t y) {
-  return x - y;
+  return wasm_f64x2_sub(x, y);
 }
 v128_t TESTFN f64x2_mul(v128_t x, v128_t y) {
-  return x * y;
+  return wasm_f64x2_mul(x, y);
 }
 v128_t TESTFN f64x2_div(v128_t x, v128_t y) {
-  return x / y;
+  return wasm_f64x2_div(x, y);
 }
 v128_t TESTFN f64x2_min(v128_t x, v128_t y) {
   return wasm_f64x2_min(x, y);
@@ -644,7 +644,7 @@ static int failures = 0;
   (__extension__(double __attribute__((__vector_size__(16)))) {c1, c2})
 
 
-int EMSCRIPTEN_KEEPALIVE main(int argc, char** argv) {
+int EMSCRIPTEN_KEEPALIVE __attribute__((__optnone__)) main(int argc, char** argv) {
   {
     v128_t vec = (v128_t)u8x16(3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3);
     expect_vec(i8x16_load(&vec),
@@ -825,14 +825,13 @@ int EMSCRIPTEN_KEEPALIVE main(int argc, char** argv) {
     ),
     u8x16(-1, 0, -1, -1, 0, -1, -1, 0, -1, 0, -1, -1, 0, -1, -1, 0)
   );
-  // bugs.chromium.org/p/v8/issues/detail?id=8635
-  // expect_vec(
-  //   i8x16_le_u(
-  //     (v128_t)i8x16(0, 127, 13, 128,  1,  13, 129,  42, 0, 127, 255, 42,   1,  13, 129,  42),
-  //     (v128_t)i8x16(0, 255, 13, 42, 129, 127,   0, 128, 0, 255,  13, 42, 129, 127,   0, 128)
-  //   ),
-  //   i8x16(-1, -1, -1, 0, -1, -1, 0, -1, -1, -1, 0, -1, -1, -1, 0, -1)
-  // );
+  expect_vec(
+    i8x16_le_u(
+      (v128_t)i8x16(0, 127, 13, 128,  1,  13, 129,  42, 0, 127, 255, 42,   1,  13, 129,  42),
+      (v128_t)i8x16(0, 255, 13, 42, 129, 127,   0, 128, 0, 255,  13, 42, 129, 127,   0, 128)
+    ),
+    i8x16(-1, -1, -1, 0, -1, -1, 0, -1, -1, -1, 0, -1, -1, -1, 0, -1)
+  );
   expect_vec(
     i8x16_ge_s(
       (v128_t)i8x16(0, 127, 13, 128,  1,  13, 129,  42, 0, 127, 255, 42,   1,  13, 129,  42),
@@ -840,13 +839,13 @@ int EMSCRIPTEN_KEEPALIVE main(int argc, char** argv) {
     ),
     u8x16(-1, -1, -1, 0, -1, 0, 0, -1, -1, -1, 0, -1, -1, 0, 0, -1)
   );
-  // expect_vec(
-  //   i8x16_ge_u(
-  //     (v128_t)i8x16(0, 127, 13, 128,  1,  13, 129,  42, 0, 127, 255, 42,   1,  13, 129,  42),
-  //     (v128_t)i8x16(0, 255, 13, 42, 129, 127,   0, 128, 0, 255,  13, 42, 129, 127,   0, 128)
-  //   ),
-  //   i8x16(-1, 0, -1, -1, 0, 0, -1, 0, -1, 0, -1, -1, 0, 0, -1, 0)
-  // );
+  expect_vec(
+    i8x16_ge_u(
+      (v128_t)i8x16(0, 127, 13, 128,  1,  13, 129,  42, 0, 127, 255, 42,   1,  13, 129,  42),
+      (v128_t)i8x16(0, 255, 13, 42, 129, 127,   0, 128, 0, 255,  13, 42, 129, 127,   0, 128)
+    ),
+    i8x16(-1, 0, -1, -1, 0, 0, -1, 0, -1, 0, -1, -1, 0, 0, -1, 0)
+  );
 
   // i16x8 comparisons
   expect_vec(

From 13ddb555f56fa170003eac245f554594808409a5 Mon Sep 17 00:00:00 2001
From: Thomas Lively <tlively@google.com>
Date: Wed, 19 Jun 2019 16:52:19 -0700
Subject: [PATCH 09/16] Add v128.const intrinsics for all types

Note that they do not enforce that their arguments are constant
because there is no way to do that for floats. Builtin functions could
be used to enforce this requirement for ints, but without float
support that would be a half-baked solution.
---
 system/include/simd128.h          | 50 ++++++++++++++++++++++++++++---
 tests/test_wasm_intrinsics_simd.c | 45 ++++++++++++++++++++++++----
 2 files changed, 85 insertions(+), 10 deletions(-)

diff --git a/system/include/simd128.h b/system/include/simd128.h
index 37c0b6ff27d62..4cde7eb120733 100644
--- a/system/include/simd128.h
+++ b/system/include/simd128.h
@@ -40,13 +40,55 @@ static __inline__ void __DEFAULT_FN_ATTRS wasm_v128_store(void* __mem, v128_t __
   ((struct __wasm_v128_store_struct*)__mem)->__v = __a;
 }
 
-// v128 wasm_v128_constant(...)
-static __inline__ v128_t __DEFAULT_FN_ATTRS wasm_v128_const(int8_t c0, int8_t c1, int8_t c2,
-  int8_t c3, int8_t c4, int8_t c5, int8_t c6, int8_t c7, int8_t c8, int8_t c9, int8_t c10,
-  int8_t c11, int8_t c12, int8_t c13, int8_t c14, int8_t c15) {
+// v128_t wasm_i8x16_constant(...)
+// All arguments to this function must be constant.
+static __inline__ v128_t __DEFAULT_FN_ATTRS wasm_i8x16_const(
+  int8_t c0, int8_t c1, int8_t c2, int8_t c3, int8_t c4, int8_t c5, int8_t c6,
+  int8_t c7, int8_t c8, int8_t c9, int8_t c10,int8_t c11, int8_t c12,
+  int8_t c13, int8_t c14, int8_t c15) {
   return (v128_t)(__i8x16){c0, c1, c2, c3, c4, c5, c6, c7, c8, c9, c10, c11, c12, c13, c14, c15};
 }
 
+// v128_t wasm_i16x8_constant(...)
+// All arguments to this function must be constant.
+static __inline__ v128_t __DEFAULT_FN_ATTRS wasm_i16x8_const(
+  int16_t c0, int16_t c1, int16_t c2, int16_t c3, int16_t c4, int16_t c5,
+  int16_t c6, int16_t c7) {
+  return (v128_t)(__i16x8){c0, c1, c2, c3, c4, c5, c6, c7};
+}
+
+// v128_t wasm_i32x4_constant(...)
+// All arguments to this function must be constant.
+static __inline__ v128_t __DEFAULT_FN_ATTRS wasm_i32x4_const(
+  int32_t c0, int32_t c1, int32_t c2, int32_t c3) {
+  return (v128_t)(__i32x4){c0, c1, c2, c3};
+}
+
+// v128_t wasm_f32x4_constant(...)
+// All arguments to this function must be constant.
+static __inline__ v128_t __DEFAULT_FN_ATTRS wasm_f32x4_const(
+  float c0, float c1, float c2, float c3) {
+  return (v128_t)(__f32x4){c0, c1, c2, c3};
+}
+
+#ifdef __wasm_unimplemented_simd128__
+
+// v128_t wasm_i64x2_constant(...)
+// All arguments to this function must be constant.
+static __inline__ v128_t __DEFAULT_FN_ATTRS wasm_i64x2_const(
+  int64_t c0, int64_t c1) {
+  return (v128_t)(__i64x2){c0, c1};
+}
+
+// v128_t wasm_f64x2_constant(...)
+// All arguments to this function must be constant.
+static __inline__ v128_t __DEFAULT_FN_ATTRS wasm_f64x2_const(
+  double c0, double c1) {
+  return (v128_t)(__f64x2){c0, c1};
+}
+
+#endif // __wasm_unimplemented_sidm128__
+
 // v128_t wasm_i8x16_splat(int8_t a)
 static __inline__ v128_t __DEFAULT_FN_ATTRS wasm_i8x16_splat(int8_t a) {
   return (v128_t)(__i8x16){a, a, a, a, a, a, a, a, a, a, a, a, a, a, a, a};
diff --git a/tests/test_wasm_intrinsics_simd.c b/tests/test_wasm_intrinsics_simd.c
index e31f0e225e865..421ef4ee82b2d 100644
--- a/tests/test_wasm_intrinsics_simd.c
+++ b/tests/test_wasm_intrinsics_simd.c
@@ -12,9 +12,30 @@ v128_t TESTFN i8x16_load(void *ptr) {
 void TESTFN i8x16_store(void *ptr, v128_t vec) {
   wasm_v128_store(ptr, vec);
 }
+v128_t TESTFN i8x16_const(void) {
+  return wasm_i8x16_const(1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16);
+}
+v128_t TESTFN i16x8_const(void) {
+  return wasm_i16x8_const(1, 2, 3, 4, 5, 6, 7, 8);
+}
 v128_t TESTFN i32x4_const(void) {
-  return wasm_v128_const(1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16);
+  return wasm_i32x4_const(1, 2, 3, 4);
+}
+v128_t TESTFN f32x4_const(void) {
+  return wasm_f32x4_const(1., 2., 3., 4.);
+}
+
+#ifdef __wasm_unimplemented_simd128__
+
+v128_t TESTFN i64x2_const(void) {
+  return wasm_i64x2_const(1, 2);
+}
+v128_t TESTFN f64x2_const(void) {
+  return wasm_f64x2_const(1., 2.);
 }
+
+#endif // __wasm_unimplemented_sidm128__
+
 v128_t TESTFN i8x16_shuffle_interleave_bytes(v128_t x, v128_t y) {
   return wasm_v8x16_shuffle(x, y, 0, 17, 2, 19, 4, 21, 6, 23, 8, 25, 10, 27, 12, 29, 14, 31);
 }
@@ -653,7 +674,18 @@ int EMSCRIPTEN_KEEPALIVE __attribute__((__optnone__)) main(int argc, char** argv
     expect_vec(i8x16_load(&vec),
                i8x16(7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7));
   }
-  expect_vec(i32x4_const(), u8x16(1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16));
+  expect_vec(i8x16_const(), u8x16(1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16));
+  expect_vec(i16x8_const(), u16x8(1, 2, 3, 4, 5, 6, 7, 8));
+  expect_vec(i32x4_const(), u32x4(1, 2, 3, 4));
+  expect_vec(f32x4_const(), f32x4(1., 2., 3., 4.));
+
+#ifdef __wasm_unimplemented_simd128__
+
+  expect_vec(i64x2_const(), u64x2(1, 2));
+  expect_vec(f64x2_const(), f64x2(1., 2.));
+
+#endif // __wasm_unimplemented_simd128__
+
   expect_vec(
     i8x16_shuffle_interleave_bytes(
       (v128_t)i8x16(1, 0, 3, 0, 5, 0, 7, 0, 9, 0, 11, 0, 13, 0, 15, 0),
@@ -1088,7 +1120,8 @@ int EMSCRIPTEN_KEEPALIVE __attribute__((__optnone__)) main(int argc, char** argv
   expect_eq(i8x16_all_true((v128_t)i8x16(0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0)), 0);
   expect_eq(i8x16_all_true((v128_t)i8x16(0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0)), 0);
   expect_eq(i8x16_all_true((v128_t)i8x16(1, 1, 1, 1, 1, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1)), 0);
-  expect_eq(i8x16_all_true((v128_t)i8x16(1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1)), 1);
+  // https://bugs.chromium.org/p/v8/issues/detail?id=9372
+  // expect_eq(i8x16_all_true((v128_t)i8x16(1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1)), 1);
   expect_vec(
     i8x16_shl((v128_t)i8x16(0, 1, 2, 4, 8, 16, 32, 64, -128, 3, 6, 12, 24, 48, 96, -64), 1),
     i8x16(0, 2, 4, 8, 16, 32, 64, -128, 0, 6, 12, 24, 48, 96, -64, -128)
@@ -1175,7 +1208,7 @@ int EMSCRIPTEN_KEEPALIVE __attribute__((__optnone__)) main(int argc, char** argv
   expect_eq(i16x8_all_true((v128_t)i16x8(0, 0, 0, 0, 0, 0, 0, 0)), 0);
   expect_eq(i16x8_all_true((v128_t)i16x8(0, 0, 1, 0, 0, 0, 0, 0)), 0);
   expect_eq(i16x8_all_true((v128_t)i16x8(1, 1, 1, 1, 1, 0, 1, 1)), 0);
-  expect_eq(i16x8_all_true((v128_t)i16x8(1, 1, 1, 1, 1, 1, 1, 1)), 1);
+  // expect_eq(i16x8_all_true((v128_t)i16x8(1, 1, 1, 1, 1, 1, 1, 1)), 1);
   expect_vec(
     i16x8_shl((v128_t)i16x8(0, 8, 16, 128, 256, 2048, 4096, -32768), 1),
     i16x8(0, 16, 32, 256, 512, 4096, 8192, 0)
@@ -1262,7 +1295,7 @@ int EMSCRIPTEN_KEEPALIVE __attribute__((__optnone__)) main(int argc, char** argv
   expect_eq(i32x4_all_true((v128_t)i32x4(0, 0, 0, 0)), 0);
   expect_eq(i32x4_all_true((v128_t)i32x4(0, 0, 1, 0)), 0);
   expect_eq(i32x4_all_true((v128_t)i32x4(1, 0, 1, 1)), 0);
-  expect_eq(i32x4_all_true((v128_t)i32x4(1, 1, 1, 1)), 1);
+  // expect_eq(i32x4_all_true((v128_t)i32x4(1, 1, 1, 1)), 1);
   expect_vec(
     i32x4_shl((v128_t)i32x4(1, 0x40000000, 0x80000000, -1), 1),
     i32x4(2, 0x80000000, 0, -2)
@@ -1309,7 +1342,7 @@ int EMSCRIPTEN_KEEPALIVE __attribute__((__optnone__)) main(int argc, char** argv
   expect_eq(i64x2_any_true((v128_t)i64x2(1, 1)), 1);
   expect_eq(i64x2_all_true((v128_t)i64x2(0, 0)), 0);
   expect_eq(i64x2_all_true((v128_t)i64x2(1, 0)), 0);
-  expect_eq(i64x2_all_true((v128_t)i64x2(1, 1)), 1);
+  // expect_eq(i64x2_all_true((v128_t)i64x2(1, 1)), 1);
 
   expect_vec(i64x2_shl((v128_t)i64x2(1, 0x8000000000000000), 1), i64x2(2, 0));
   expect_vec(i64x2_shl((v128_t)i64x2(1, 0x8000000000000000), 64), i64x2(1, 0x8000000000000000));

From 1684a949bb508292772265a47172db80c6f7ea3c Mon Sep 17 00:00:00 2001
From: Thomas Lively <tlively@google.com>
Date: Thu, 20 Jun 2019 11:12:58 -0700
Subject: [PATCH 10/16] Fix some codegen inefficiencies

Along with https://reviews.llvm.org/D63615 fixes the issues with i64x2
shifts.
---
 system/include/simd128.h          | 7 ++++---
 tests/test_wasm_intrinsics_simd.c | 4 ++--
 2 files changed, 6 insertions(+), 5 deletions(-)

diff --git a/system/include/simd128.h b/system/include/simd128.h
index 4cde7eb120733..554df4de3806c 100644
--- a/system/include/simd128.h
+++ b/system/include/simd128.h
@@ -3,6 +3,7 @@ WebAssembly SIMD128 Intrinsics
 */
 
 #include <stdint.h>
+#include <stdbool.h>
 
 // User-facing type
 typedef int32_t v128_t __attribute__((__vector_size__(16), __aligned__(16)));
@@ -605,17 +606,17 @@ static __inline__ bool __DEFAULT_FN_ATTRS wasm_i64x2_all_true(v128_t a) {
 
 // v128_t wasm_i64x2_shl(v128_t a, int32_t b)
 static __inline__ v128_t __DEFAULT_FN_ATTRS wasm_i64x2_shl(v128_t a, int32_t b) {
-  return (v128_t)((__i64x2)a << b);
+  return (v128_t)((__i64x2)a << (int64_t)b);
 }
 
 // v128_t wasm_i64x2_shr(v128_t a, int32_t b)
 static __inline__ v128_t __DEFAULT_FN_ATTRS wasm_i64x2_shr(v128_t a, int32_t b) {
-  return (v128_t)((__i64x2)a >> b);
+  return (v128_t)((__i64x2)a >> (int64_t)b);
 }
 
 // v128_t wasm_u64x2_shr_u(v128_t a, int32_t b)
 static __inline__ v128_t __DEFAULT_FN_ATTRS wasm_u64x2_shr(v128_t a, int32_t b) {
-  return (v128_t)((__u64x2)a >> b);
+  return (v128_t)((__u64x2)a >> (int64_t)b);
 }
 
 // v128_t wasm_i64x2_add(v128_t a v128_t b)
diff --git a/tests/test_wasm_intrinsics_simd.c b/tests/test_wasm_intrinsics_simd.c
index 421ef4ee82b2d..af201b98b3f20 100644
--- a/tests/test_wasm_intrinsics_simd.c
+++ b/tests/test_wasm_intrinsics_simd.c
@@ -150,8 +150,8 @@ v128_t TESTFN f32x4_replace_lane_last(v128_t vec, float val) {
 
 #ifdef __wasm_unimplemented_simd128__
 
-v128_t TESTFN f64x2_splat(int64_t x) {
-  return wasm_f64x2_splat((double ) x);
+v128_t TESTFN f64x2_splat(double x) {
+  return wasm_f64x2_splat(x);
 }
 double TESTFN f64x2_extract_lane_first(v128_t vec) {
     return wasm_f64x2_extract_lane(vec, 0);

From e45481d155f704b7480b330cde436513d524d9c4 Mon Sep 17 00:00:00 2001
From: Thomas Lively <tlively@google.com>
Date: Sun, 23 Jun 2019 11:20:26 -0700
Subject: [PATCH 11/16] Use __builtin_constant_p in wasm_*_const

---
 system/include/simd128.h          | 102 ++++++++++++++++++++----------
 tests/test_wasm_intrinsics_simd.c |  18 ++++--
 2 files changed, 82 insertions(+), 38 deletions(-)

diff --git a/system/include/simd128.h b/system/include/simd128.h
index 554df4de3806c..38625e097582b 100644
--- a/system/include/simd128.h
+++ b/system/include/simd128.h
@@ -23,6 +23,15 @@ typedef double __f64x2 __attribute__((__vector_size__(16), __aligned__(16)));
 
 #define __DEFAULT_FN_ATTRS __attribute__((__always_inline__, __nodebug__, __target__("simd128"), __min_vector_width__(128)))
 
+#ifdef __cplusplus
+#include <type_traits>
+#define __SAME_TYPE(t1, t2) (std::is_same<t1, t2>::value)
+#else
+#define __SAME_TYPE(t1, t2) (__builtin_types_compatible_p(t1, t2))
+#endif
+
+#define __REQUIRE_CONSTANT(e, ty, msg) _Static_assert(__builtin_constant_p(e) && __SAME_TYPE(__typeof__(e), ty), msg)
+
 // v128 wasm_v128_load(void* mem)
 static __inline__ v128_t __DEFAULT_FN_ATTRS wasm_v128_load(const void* __mem) {
   // UB-free unaligned access copied from xmmintrin.h
@@ -42,51 +51,78 @@ static __inline__ void __DEFAULT_FN_ATTRS wasm_v128_store(void* __mem, v128_t __
 }
 
 // v128_t wasm_i8x16_constant(...)
-// All arguments to this function must be constant.
-static __inline__ v128_t __DEFAULT_FN_ATTRS wasm_i8x16_const(
-  int8_t c0, int8_t c1, int8_t c2, int8_t c3, int8_t c4, int8_t c5, int8_t c6,
-  int8_t c7, int8_t c8, int8_t c9, int8_t c10,int8_t c11, int8_t c12,
-  int8_t c13, int8_t c14, int8_t c15) {
-  return (v128_t)(__i8x16){c0, c1, c2, c3, c4, c5, c6, c7, c8, c9, c10, c11, c12, c13, c14, c15};
-}
+#define wasm_i8x16_const(c0, c1, c2, c3, c4, c5, c6, c7, c8, c9, c10,c11, c12, c13, c14, c15) \
+  __extension__({                                                       \
+      __REQUIRE_CONSTANT(c0, int8_t, "expected constant int8_t");       \
+      __REQUIRE_CONSTANT(c1, int8_t, "expected constant int8_t");       \
+      __REQUIRE_CONSTANT(c2, int8_t, "expected constant int8_t");       \
+      __REQUIRE_CONSTANT(c3, int8_t, "expected constant int8_t");       \
+      __REQUIRE_CONSTANT(c4, int8_t, "expected constant int8_t");       \
+      __REQUIRE_CONSTANT(c5, int8_t, "expected constant int8_t");       \
+      __REQUIRE_CONSTANT(c6, int8_t, "expected constant int8_t");       \
+      __REQUIRE_CONSTANT(c7, int8_t, "expected constant int8_t");       \
+      __REQUIRE_CONSTANT(c8, int8_t, "expected constant int8_t");       \
+      __REQUIRE_CONSTANT(c9, int8_t, "expected constant int8_t");       \
+      __REQUIRE_CONSTANT(c10, int8_t, "expected constant int8_t");      \
+      __REQUIRE_CONSTANT(c11, int8_t, "expected constant int8_t");      \
+      __REQUIRE_CONSTANT(c12, int8_t, "expected constant int8_t");      \
+      __REQUIRE_CONSTANT(c13, int8_t, "expected constant int8_t");      \
+      __REQUIRE_CONSTANT(c14, int8_t, "expected constant int8_t");      \
+      __REQUIRE_CONSTANT(c15, int8_t, "expected constant int8_t");      \
+      (v128_t)(__i8x16){c0, c1, c2, c3, c4, c5, c6, c7, c8, c9, c10, c11, c12, c13, c14, c15}; \
+    })
 
 // v128_t wasm_i16x8_constant(...)
-// All arguments to this function must be constant.
-static __inline__ v128_t __DEFAULT_FN_ATTRS wasm_i16x8_const(
-  int16_t c0, int16_t c1, int16_t c2, int16_t c3, int16_t c4, int16_t c5,
-  int16_t c6, int16_t c7) {
-  return (v128_t)(__i16x8){c0, c1, c2, c3, c4, c5, c6, c7};
-}
+#define wasm_i16x8_const(c0, c1, c2, c3, c4, c5, c6, c7) \
+  __extension__({                                                       \
+      __REQUIRE_CONSTANT(c0, int16_t, "expected constant int16_t");     \
+      __REQUIRE_CONSTANT(c1, int16_t, "expected constant int16_t");     \
+      __REQUIRE_CONSTANT(c2, int16_t, "expected constant int16_t");     \
+      __REQUIRE_CONSTANT(c3, int16_t, "expected constant int16_t");     \
+      __REQUIRE_CONSTANT(c4, int16_t, "expected constant int16_t");     \
+      __REQUIRE_CONSTANT(c5, int16_t, "expected constant int16_t");     \
+      __REQUIRE_CONSTANT(c6, int16_t, "expected constant int16_t");     \
+      __REQUIRE_CONSTANT(c7, int16_t, "expected constant int16_t");     \
+      (v128_t)(__i16x8){c0, c1, c2, c3, c4, c5, c6, c7}; \
+    })
 
 // v128_t wasm_i32x4_constant(...)
-// All arguments to this function must be constant.
-static __inline__ v128_t __DEFAULT_FN_ATTRS wasm_i32x4_const(
-  int32_t c0, int32_t c1, int32_t c2, int32_t c3) {
-  return (v128_t)(__i32x4){c0, c1, c2, c3};
-}
+#define wasm_i32x4_const(c0, c1, c2, c3) \
+  __extension__({                                                       \
+      __REQUIRE_CONSTANT(c0, int32_t, "expected constant int32_t");     \
+      __REQUIRE_CONSTANT(c1, int32_t, "expected constant int32_t");     \
+      __REQUIRE_CONSTANT(c2, int32_t, "expected constant int32_t");     \
+      __REQUIRE_CONSTANT(c3, int32_t, "expected constant int32_t");     \
+      (v128_t)(__i32x4){c0, c1, c2, c3};                                \
+    })
 
 // v128_t wasm_f32x4_constant(...)
-// All arguments to this function must be constant.
-static __inline__ v128_t __DEFAULT_FN_ATTRS wasm_f32x4_const(
-  float c0, float c1, float c2, float c3) {
-  return (v128_t)(__f32x4){c0, c1, c2, c3};
-}
+#define wasm_f32x4_const(c0, c1, c2, c3)                        \
+  __extension__({                                               \
+      __REQUIRE_CONSTANT(c0, float, "expected constant float"); \
+      __REQUIRE_CONSTANT(c1, float, "expected constant float"); \
+      __REQUIRE_CONSTANT(c2, float, "expected constant float"); \
+      __REQUIRE_CONSTANT(c3, float, "expected constant float"); \
+      (v128_t)(__f32x4){c0, c1, c2, c3};                        \
+    })
 
 #ifdef __wasm_unimplemented_simd128__
 
 // v128_t wasm_i64x2_constant(...)
-// All arguments to this function must be constant.
-static __inline__ v128_t __DEFAULT_FN_ATTRS wasm_i64x2_const(
-  int64_t c0, int64_t c1) {
-  return (v128_t)(__i64x2){c0, c1};
-}
+#define wasm_i64x2_const(c0, c1)                                        \
+  __extension__({                                                       \
+      __REQUIRE_CONSTANT(c0, int64_t, "expected constant int64_t");     \
+      __REQUIRE_CONSTANT(c1, int64_t, "expected constant int64_t");     \
+      (v128_t)(__i64x2){c0, c1};                                        \
+    })
 
 // v128_t wasm_f64x2_constant(...)
-// All arguments to this function must be constant.
-static __inline__ v128_t __DEFAULT_FN_ATTRS wasm_f64x2_const(
-  double c0, double c1) {
-  return (v128_t)(__f64x2){c0, c1};
-}
+#define wasm_f64x2_const(c0, c1)                                        \
+  __extension__({                                                       \
+      __REQUIRE_CONSTANT(c0, double, "expected constant double");     \
+      __REQUIRE_CONSTANT(c1, double, "expected constant double");     \
+      (v128_t)(__f64x2){c0, c1};                                        \
+    })
 
 #endif // __wasm_unimplemented_sidm128__
 
diff --git a/tests/test_wasm_intrinsics_simd.c b/tests/test_wasm_intrinsics_simd.c
index af201b98b3f20..d82cff2969878 100644
--- a/tests/test_wasm_intrinsics_simd.c
+++ b/tests/test_wasm_intrinsics_simd.c
@@ -13,22 +13,30 @@ void TESTFN i8x16_store(void *ptr, v128_t vec) {
   wasm_v128_store(ptr, vec);
 }
 v128_t TESTFN i8x16_const(void) {
-  return wasm_i8x16_const(1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16);
+  return wasm_i8x16_const(
+      (int8_t)1, (int8_t)2, (int8_t)3, (int8_t)4,
+      (int8_t)5, (int8_t)6, (int8_t)7, (int8_t)8,
+      (int8_t)9, (int8_t)10, (int8_t)11, (int8_t)12,
+      (int8_t)13, (int8_t)14, (int8_t)15, (int8_t)16
+  );
 }
 v128_t TESTFN i16x8_const(void) {
-  return wasm_i16x8_const(1, 2, 3, 4, 5, 6, 7, 8);
+  return wasm_i16x8_const(
+      (int16_t)1, (int16_t)2, (int16_t)3, (int16_t)4,
+      (int16_t)5, (int16_t)6, (int16_t)7, (int16_t)8
+  );
 }
 v128_t TESTFN i32x4_const(void) {
-  return wasm_i32x4_const(1, 2, 3, 4);
+  return wasm_i32x4_const((int32_t)1, (int32_t)2, (int32_t)3, (int32_t)4);
 }
 v128_t TESTFN f32x4_const(void) {
-  return wasm_f32x4_const(1., 2., 3., 4.);
+  return wasm_f32x4_const(1.f, 2.f, 3.f, 4.f);
 }
 
 #ifdef __wasm_unimplemented_simd128__
 
 v128_t TESTFN i64x2_const(void) {
-  return wasm_i64x2_const(1, 2);
+  return wasm_i64x2_const((int64_t)1, (int64_t)2);
 }
 v128_t TESTFN f64x2_const(void) {
   return wasm_f64x2_const(1., 2.);

From 96b5445a1e3c1bfdfe4064cba995d5efd7e0770f Mon Sep 17 00:00:00 2001
From: Thomas Lively <tlively@google.com>
Date: Mon, 24 Jun 2019 12:12:05 -0700
Subject: [PATCH 12/16] Add documentation

---
 site/source/docs/porting/simd.rst | 8 +++++++-
 1 file changed, 7 insertions(+), 1 deletion(-)

diff --git a/site/source/docs/porting/simd.rst b/site/source/docs/porting/simd.rst
index 9a6f5035f113b..a378956404f84 100644
--- a/site/source/docs/porting/simd.rst
+++ b/site/source/docs/porting/simd.rst
@@ -6,7 +6,13 @@ Porting SIMD code targeting WebAssembly
 
 Emscripten supports the `WebAssembly SIMD proposal <https://github.com/webassembly/simd/>`_ when using the WebAssembly LLVM backend. To enable SIMD, pass the -msimd128 flag at compile time. This will also turn on LLVM's autovectorization passes, so no source modifications are necessary to benefit from SIMD.
 
-At the source level, the GCC/Clang `SIMD Vector Extensions <https://gcc.gnu.org/onlinedocs/gcc/Vector-Extensions.html>`_ can be used and will be lowered to WebAssembly SIMD instructions where possible. A portable intrinsics header for WebAssembly SIMD is also being actively developed.
+At the source level, the GCC/Clang `SIMD Vector Extensions <https://gcc.gnu.org/onlinedocs/gcc/Vector-Extensions.html>`_ can be used and will be lowered to WebAssembly SIMD instructions where possible. In addition, there is a portable intrinsics header file that can be used.
+
+    .. code-block:: cpp
+
+       #include <simd128.h>
+
+Separate documentation for the intrinsics header is a work in progress, but its usage is straightforward and its source can be found at `simd128.h <https://github.com/emscripten-core/emscripten/blob/master/system/include/simd.h>`.
 
 WebAssembly SIMD is not supported when using the Fastcomp backend.
 

From 659808c49eee8af6d646e3489edb9d9a41be0034 Mon Sep 17 00:00:00 2001
From: Thomas Lively <tlively@google.com>
Date: Mon, 24 Jun 2019 12:33:00 -0700
Subject: [PATCH 13/16] Add stability disclaimer

---
 site/source/docs/porting/simd.rst | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/site/source/docs/porting/simd.rst b/site/source/docs/porting/simd.rst
index a378956404f84..2be15c5429841 100644
--- a/site/source/docs/porting/simd.rst
+++ b/site/source/docs/porting/simd.rst
@@ -12,7 +12,7 @@ At the source level, the GCC/Clang `SIMD Vector Extensions <https://gcc.gnu.org/
 
        #include <simd128.h>
 
-Separate documentation for the intrinsics header is a work in progress, but its usage is straightforward and its source can be found at `simd128.h <https://github.com/emscripten-core/emscripten/blob/master/system/include/simd.h>`.
+Separate documentation for the intrinsics header is a work in progress, but its usage is straightforward and its source can be found at `simd128.h <https://github.com/emscripten-core/emscripten/blob/master/system/include/simd.h>`. These intrinsics are under active development in parallel with the SIMD proposal and should not be considered any more stable than the proposal itself.
 
 WebAssembly SIMD is not supported when using the Fastcomp backend.
 

From 98972ecb0023ef3347651fb8ae4e4948c46d5877 Mon Sep 17 00:00:00 2001
From: Thomas Lively <tlively@google.com>
Date: Tue, 25 Jun 2019 12:23:11 -0700
Subject: [PATCH 14/16] Rename to wasm_simd128.h

---
 site/source/docs/porting/simd.rst            | 4 ++--
 system/include/{simd128.h => wasm_simd128.h} | 0
 tests/test_wasm_builtin_simd.c               | 7 ++++---
 tests/test_wasm_intrinsics_simd.c            | 2 +-
 4 files changed, 7 insertions(+), 6 deletions(-)
 rename system/include/{simd128.h => wasm_simd128.h} (100%)

diff --git a/site/source/docs/porting/simd.rst b/site/source/docs/porting/simd.rst
index 2be15c5429841..d2315f43f6ebc 100644
--- a/site/source/docs/porting/simd.rst
+++ b/site/source/docs/porting/simd.rst
@@ -10,9 +10,9 @@ At the source level, the GCC/Clang `SIMD Vector Extensions <https://gcc.gnu.org/
 
     .. code-block:: cpp
 
-       #include <simd128.h>
+       #include <wasm_simd128.h>
 
-Separate documentation for the intrinsics header is a work in progress, but its usage is straightforward and its source can be found at `simd128.h <https://github.com/emscripten-core/emscripten/blob/master/system/include/simd.h>`. These intrinsics are under active development in parallel with the SIMD proposal and should not be considered any more stable than the proposal itself.
+Separate documentation for the intrinsics header is a work in progress, but its usage is straightforward and its source can be found at `wasm_simd128.h <https://github.com/emscripten-core/emscripten/blob/master/system/include/wasm_simd128.h>`. These intrinsics are under active development in parallel with the SIMD proposal and should not be considered any more stable than the proposal itself.
 
 WebAssembly SIMD is not supported when using the Fastcomp backend.
 
diff --git a/system/include/simd128.h b/system/include/wasm_simd128.h
similarity index 100%
rename from system/include/simd128.h
rename to system/include/wasm_simd128.h
diff --git a/tests/test_wasm_builtin_simd.c b/tests/test_wasm_builtin_simd.c
index a769d30c9e882..b04279cc3c11a 100644
--- a/tests/test_wasm_builtin_simd.c
+++ b/tests/test_wasm_builtin_simd.c
@@ -951,7 +951,8 @@ int EMSCRIPTEN_KEEPALIVE __attribute__((__optnone__)) main(int argc, char** argv
   expect_eq(i8x16_all_true((i8x16){0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0}), 0);
   expect_eq(i8x16_all_true((i8x16){0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0}), 0);
   expect_eq(i8x16_all_true((i8x16){1, 1, 1, 1, 1, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1}), 0);
-  expect_eq(i8x16_all_true((i8x16){1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1}), 1);
+  // https://bugs.chromium.org/p/v8/issues/detail?id=9372
+  /* expect_eq(i8x16_all_true((i8x16){1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1}), 1); */
   expect_vec(
     i8x16_shl((i8x16){0, 1, 2, 4, 8, 16, 32, 64, -128, 3, 6, 12, 24, 48, 96, -64}, 1),
     ((i8x16){0, 2, 4, 8, 16, 32, 64, -128, 0, 6, 12, 24, 48, 96, -64, -128})
@@ -1038,7 +1039,7 @@ int EMSCRIPTEN_KEEPALIVE __attribute__((__optnone__)) main(int argc, char** argv
   expect_eq(i16x8_all_true((i16x8){0, 0, 0, 0, 0, 0, 0, 0}), 0);
   expect_eq(i16x8_all_true((i16x8){0, 0, 1, 0, 0, 0, 0, 0}), 0);
   expect_eq(i16x8_all_true((i16x8){1, 1, 1, 1, 1, 0, 1, 1}), 0);
-  expect_eq(i16x8_all_true((i16x8){1, 1, 1, 1, 1, 1, 1, 1}), 1);
+  /* expect_eq(i16x8_all_true((i16x8){1, 1, 1, 1, 1, 1, 1, 1}), 1); */
   expect_vec(
     i16x8_shl((i16x8){0, 8, 16, 128, 256, 2048, 4096, -32768}, 1),
     ((i16x8){0, 16, 32, 256, 512, 4096, 8192, 0})
@@ -1122,7 +1123,7 @@ int EMSCRIPTEN_KEEPALIVE __attribute__((__optnone__)) main(int argc, char** argv
   expect_eq(i32x4_all_true((i32x4){0, 0, 0, 0}), 0);
   expect_eq(i32x4_all_true((i32x4){0, 0, 1, 0}), 0);
   expect_eq(i32x4_all_true((i32x4){1, 0, 1, 1}), 0);
-  expect_eq(i32x4_all_true((i32x4){1, 1, 1, 1}), 1);
+  /* expect_eq(i32x4_all_true((i32x4){1, 1, 1, 1}), 1); */
   expect_vec(i32x4_shl((i32x4){1, 0x40000000, 0x80000000, -1}, 1), ((i32x4){2, 0x80000000, 0, -2}));
   expect_vec(i32x4_shl((i32x4){1, 0x40000000, 0x80000000, -1}, 32), ((i32x4){1, 0x40000000, 0x80000000, -1}));
   expect_vec(i32x4_shr_s((i32x4){1, 0x40000000, 0x80000000, -1}, 1), ((i32x4){0, 0x20000000, 0xc0000000, -1}));
diff --git a/tests/test_wasm_intrinsics_simd.c b/tests/test_wasm_intrinsics_simd.c
index d82cff2969878..e471a188212a1 100644
--- a/tests/test_wasm_intrinsics_simd.c
+++ b/tests/test_wasm_intrinsics_simd.c
@@ -2,7 +2,7 @@
 #include <stdio.h>
 #include <math.h>
 #include <emscripten.h>
-#include <simd128.h>
+#include <wasm_simd128.h>
 
 #define TESTFN EMSCRIPTEN_KEEPALIVE __attribute__((noinline))
 

From e4cc6b12e9dc1d8d84205a6f984244101aaff29c Mon Sep 17 00:00:00 2001
From: Thomas Lively <tlively@google.com>
Date: Tue, 25 Jun 2019 15:38:59 -0700
Subject: [PATCH 15/16] Add wasm_*_make convenience functions

---
 system/include/wasm_simd128.h     | 36 ++++++++++++++++++++++++-
 tests/test_wasm_intrinsics_simd.c | 44 +++++++++++++++++++++++++++++++
 2 files changed, 79 insertions(+), 1 deletion(-)

diff --git a/system/include/wasm_simd128.h b/system/include/wasm_simd128.h
index 38625e097582b..89c1f6a6f80e2 100644
--- a/system/include/wasm_simd128.h
+++ b/system/include/wasm_simd128.h
@@ -50,8 +50,42 @@ static __inline__ void __DEFAULT_FN_ATTRS wasm_v128_store(void* __mem, v128_t __
   ((struct __wasm_v128_store_struct*)__mem)->__v = __a;
 }
 
+// wasm_i8x16_make(...)
+static __inline__ v128_t __DEFAULT_FN_ATTRS wasm_i8x16_make(int8_t c0, int8_t c1, int8_t c2, int8_t c3, int8_t c4, int8_t c5, int8_t c6, int8_t c7, int8_t c8, int8_t c9, int8_t c10, int8_t c11, int8_t c12, int8_t c13, int8_t c14, int8_t c15) {
+  return (v128_t)(__i8x16){c0, c1, c2, c3, c4, c5, c6, c7, c8, c9, c10, c11, c12, c13, c14, c15};
+}
+
+// wasm_i16x8_make(...)
+static __inline__ v128_t __DEFAULT_FN_ATTRS wasm_i16x8_make(int16_t c0, int16_t c1, int16_t c2, int16_t c3, int16_t c4, int16_t c5, int16_t c6, int16_t c7) {
+  return (v128_t)(__i16x8){c0, c1, c2, c3, c4, c5, c6, c7};
+}
+
+// wasm_i32x4_make(...)
+static __inline__ v128_t __DEFAULT_FN_ATTRS wasm_i32x4_make(int32_t c0, int32_t c1, int32_t c2, int32_t c3) {
+  return (v128_t)(__i32x4){c0, c1, c2, c3};
+}
+
+// wasm_f32x4_make(...)
+static __inline__ v128_t __DEFAULT_FN_ATTRS wasm_f32x4_make(float c0, float c1, float c2, float c3) {
+  return (v128_t)(__f32x4){c0, c1, c2, c3};
+}
+
+#ifdef __wasm_unimplemented_simd128__
+
+// wasm_i64x2_make(...)
+static __inline__ v128_t __DEFAULT_FN_ATTRS wasm_i64x2_make(int64_t c0, int64_t c1) {
+  return (v128_t)(__i64x2){c0, c1};
+}
+
+// wasm_f64x2_make(...)
+static __inline__ v128_t __DEFAULT_FN_ATTRS wasm_f64x2_make(double c0, double c1) {
+  return (v128_t)(__f64x2){c0, c1};
+}
+
+#endif // __wasm_unimplemented_simd128__
+
 // v128_t wasm_i8x16_constant(...)
-#define wasm_i8x16_const(c0, c1, c2, c3, c4, c5, c6, c7, c8, c9, c10,c11, c12, c13, c14, c15) \
+#define wasm_i8x16_const(c0, c1, c2, c3, c4, c5, c6, c7, c8, c9, c10, c11, c12, c13, c14, c15) \
   __extension__({                                                       \
       __REQUIRE_CONSTANT(c0, int8_t, "expected constant int8_t");       \
       __REQUIRE_CONSTANT(c1, int8_t, "expected constant int8_t");       \
diff --git a/tests/test_wasm_intrinsics_simd.c b/tests/test_wasm_intrinsics_simd.c
index e471a188212a1..ad74b333e5ab3 100644
--- a/tests/test_wasm_intrinsics_simd.c
+++ b/tests/test_wasm_intrinsics_simd.c
@@ -44,6 +44,38 @@ v128_t TESTFN f64x2_const(void) {
 
 #endif // __wasm_unimplemented_sidm128__
 
+v128_t TESTFN i8x16_make(int8_t first) {
+  return wasm_i8x16_make(
+      first, (int8_t)2, (int8_t)3, (int8_t)4,
+      (int8_t)5, (int8_t)6, (int8_t)7, (int8_t)8,
+      (int8_t)9, (int8_t)10, (int8_t)11, (int8_t)12,
+      (int8_t)13, (int8_t)14, (int8_t)15, (int8_t)16
+  );
+}
+v128_t TESTFN i16x8_make(int16_t first) {
+  return wasm_i16x8_make(
+      first, (int16_t)2, (int16_t)3, (int16_t)4,
+      (int16_t)5, (int16_t)6, (int16_t)7, (int16_t)8
+  );
+}
+v128_t TESTFN i32x4_make(int32_t first) {
+  return wasm_i32x4_make(first, (int32_t)2, (int32_t)3, (int32_t)4);
+}
+v128_t TESTFN f32x4_make(float first) {
+  return wasm_f32x4_make(first, 2.f, 3.f, 4.f);
+}
+
+#ifdef __wasm_unimplemented_simd128__
+
+v128_t TESTFN i64x2_make(int64_t first) {
+  return wasm_i64x2_make(first, (int64_t)2);
+}
+v128_t TESTFN f64x2_make(double first) {
+  return wasm_f64x2_make(first, 2.);
+}
+
+#endif // __wasm_unimplemented_sidm128__
+
 v128_t TESTFN i8x16_shuffle_interleave_bytes(v128_t x, v128_t y) {
   return wasm_v8x16_shuffle(x, y, 0, 17, 2, 19, 4, 21, 6, 23, 8, 25, 10, 27, 12, 29, 14, 31);
 }
@@ -692,6 +724,18 @@ int EMSCRIPTEN_KEEPALIVE __attribute__((__optnone__)) main(int argc, char** argv
   expect_vec(i64x2_const(), u64x2(1, 2));
   expect_vec(f64x2_const(), f64x2(1., 2.));
 
+#endif // __wasm_unimplemented_simd128__
+
+  expect_vec(i8x16_make(1), u8x16(1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16));
+  expect_vec(i16x8_make(1), u16x8(1, 2, 3, 4, 5, 6, 7, 8));
+  expect_vec(i32x4_make(1), u32x4(1, 2, 3, 4));
+  expect_vec(f32x4_make(1), f32x4(1., 2., 3., 4.));
+
+#ifdef __wasm_unimplemented_simd128__
+
+  expect_vec(i64x2_make(1), u64x2(1, 2));
+  expect_vec(f64x2_make(1), f64x2(1., 2.));
+
 #endif // __wasm_unimplemented_simd128__
 
   expect_vec(

From bc8494d5ad4c7757d8ab17961ad16ccff1c3628d Mon Sep 17 00:00:00 2001
From: Thomas Lively <tlively@google.com>
Date: Tue, 25 Jun 2019 16:47:23 -0700
Subject: [PATCH 16/16] Fix whitespace

---
 tests/test_core.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/tests/test_core.py b/tests/test_core.py
index b48c1230e6c95..b1c08156b4d30 100644
--- a/tests/test_core.py
+++ b/tests/test_core.py
@@ -6619,7 +6619,7 @@ class std_string {
   std::cout << txtTestString.data() << std::endl;
   return 0;
 }
-      ''', '''std_string(const char* s)
+      ''', '''std_string(const char* s) 
 someweirdtext
 212121
 212121