From 1d233f48ae582c8ade03131715363fa54c4f7296 Mon Sep 17 00:00:00 2001
From: gnzlbg <gonzalobg88@gmail.com>
Date: Tue, 6 Mar 2018 18:05:31 +0100
Subject: [PATCH] adds AArch64's {s,u,f}{min,max}{v,p} and ARM's {vmov}{n,l}

* adds {s,u,f}{min,max}{v,p} AArch64 intrinsics
* adds {vmov}{n,l} ARM intrinsics

Closes #314 .
---
 ci/run.sh                |   4 +-
 coresimd/aarch64/neon.rs | 536 ++++++++++++++++++++++++++++++++++++++-
 coresimd/arm/neon.rs     | 200 ++++++++++++++-
 3 files changed, 736 insertions(+), 4 deletions(-)

diff --git a/ci/run.sh b/ci/run.sh
index 1fa475f27f..37fa4a79e5 100755
--- a/ci/run.sh
+++ b/ci/run.sh
@@ -5,8 +5,8 @@ set -ex
 # Tests are all super fast anyway, and they fault often enough on travis that
 # having only one thread increases debuggability to be worth it.
 export RUST_TEST_THREADS=1
-export RUST_BACKTRACE=1
-export RUST_TEST_NOCAPTURE=1
+#export RUST_BACKTRACE=1
+#export RUST_TEST_NOCAPTURE=1
 
 # FIXME(rust-lang-nursery/stdsimd#120) run-time feature detection for ARM Neon
 case ${TARGET} in
diff --git a/coresimd/aarch64/neon.rs b/coresimd/aarch64/neon.rs
index 30adbf309b..c411bc9115 100644
--- a/coresimd/aarch64/neon.rs
+++ b/coresimd/aarch64/neon.rs
@@ -39,9 +39,319 @@ pub unsafe fn vaddd_u64(a: u64, b: u64) -> u64 {
     a + b
 }
 
+#[allow(improper_ctypes)]
+extern "C" {
+    #[link_name = "llvm.aarch64.neon.smaxv.i8.v8i8"]
+    fn vmaxv_s8_(a: i8x8) -> i8;
+    #[link_name = "llvm.aarch64.neon.smaxv.i8.6i8"]
+    fn vmaxvq_s8_(a: i8x16) -> i8;
+    #[link_name = "llvm.aarch64.neon.smaxv.i16.v4i16"]
+    fn vmaxv_s16_(a: i16x4) -> i16;
+    #[link_name = "llvm.aarch64.neon.smaxv.i16.v8i16"]
+    fn vmaxvq_s16_(a: i16x8) -> i16;
+    #[link_name = "llvm.aarch64.neon.smaxv.i32.v2i32"]
+    fn vmaxv_s32_(a: i32x2) -> i32;
+    #[link_name = "llvm.aarch64.neon.smaxv.i32.v4i32"]
+    fn vmaxvq_s32_(a: i32x4) -> i32;
+
+    #[link_name = "llvm.aarch64.neon.umaxv.i8.v8i8"]
+    fn vmaxv_u8_(a: u8x8) -> u8;
+    #[link_name = "llvm.aarch64.neon.umaxv.i8.6i8"]
+    fn vmaxvq_u8_(a: u8x16) -> u8;
+    #[link_name = "llvm.aarch64.neon.umaxv.i16.v4i16"]
+    fn vmaxv_u16_(a: u16x4) -> u16;
+    #[link_name = "llvm.aarch64.neon.umaxv.i16.v8i16"]
+    fn vmaxvq_u16_(a: u16x8) -> u16;
+    #[link_name = "llvm.aarch64.neon.umaxv.i32.v2i32"]
+    fn vmaxv_u32_(a: u32x2) -> u32;
+    #[link_name = "llvm.aarch64.neon.umaxv.i32.v4i32"]
+    fn vmaxvq_u32_(a: u32x4) -> u32;
+
+    #[link_name = "llvm.aarch64.neon.fmaxv.f32.v2f32"]
+    fn vmaxv_f32_(a: f32x2) -> f32;
+    #[link_name = "llvm.aarch64.neon.fmaxv.f32.v4f32"]
+    fn vmaxvq_f32_(a: f32x4) -> f32;
+    #[link_name = "llvm.aarch64.neon.fmaxv.f64.v2f64"]
+    fn vmaxvq_f64_(a: f64x2) -> f64;
+
+    #[link_name = "llvm.aarch64.neon.sminv.i8.v8i8"]
+    fn vminv_s8_(a: i8x8) -> i8;
+    #[link_name = "llvm.aarch64.neon.sminv.i8.6i8"]
+    fn vminvq_s8_(a: i8x16) -> i8;
+    #[link_name = "llvm.aarch64.neon.sminv.i16.v4i16"]
+    fn vminv_s16_(a: i16x4) -> i16;
+    #[link_name = "llvm.aarch64.neon.sminv.i16.v8i16"]
+    fn vminvq_s16_(a: i16x8) -> i16;
+    #[link_name = "llvm.aarch64.neon.sminv.i32.v2i32"]
+    fn vminv_s32_(a: i32x2) -> i32;
+    #[link_name = "llvm.aarch64.neon.sminv.i32.v4i32"]
+    fn vminvq_s32_(a: i32x4) -> i32;
+
+    #[link_name = "llvm.aarch64.neon.uminv.i8.v8i8"]
+    fn vminv_u8_(a: u8x8) -> u8;
+    #[link_name = "llvm.aarch64.neon.uminv.i8.6i8"]
+    fn vminvq_u8_(a: u8x16) -> u8;
+    #[link_name = "llvm.aarch64.neon.uminv.i16.v4i16"]
+    fn vminv_u16_(a: u16x4) -> u16;
+    #[link_name = "llvm.aarch64.neon.uminv.i16.v8i16"]
+    fn vminvq_u16_(a: u16x8) -> u16;
+    #[link_name = "llvm.aarch64.neon.uminv.i32.v2i32"]
+    fn vminv_u32_(a: u32x2) -> u32;
+    #[link_name = "llvm.aarch64.neon.uminv.i32.v4i32"]
+    fn vminvq_u32_(a: u32x4) -> u32;
+
+    #[link_name = "llvm.aarch64.neon.fminv.f32.v2f32"]
+    fn vminv_f32_(a: f32x2) -> f32;
+    #[link_name = "llvm.aarch64.neon.fminv.f32.v4f32"]
+    fn vminvq_f32_(a: f32x4) -> f32;
+    #[link_name = "llvm.aarch64.neon.fminv.f64.v2f64"]
+    fn vminvq_f64_(a: f64x2) -> f64;
+
+}
+
+/// Horizontal vector max.
+#[inline]
+#[target_feature(enable = "neon")]
+#[cfg_attr(test, assert_instr(smaxv))]
+pub unsafe fn vmaxv_s8(a: i8x8) -> i8  {
+    vmaxv_s8_(a)
+}
+
+/// Horizontal vector max.
+#[inline]
+#[target_feature(enable = "neon")]
+#[cfg_attr(test, assert_instr(smaxv))]
+pub unsafe fn vmaxvq_s8(a: i8x16) -> i8  {
+    vmaxvq_s8_(a)
+}
+
+/// Horizontal vector max.
+#[inline]
+#[target_feature(enable = "neon")]
+#[cfg_attr(test, assert_instr(smaxv))]
+pub unsafe fn vmaxv_s16(a: i16x4) -> i16  {
+    vmaxv_s16_(a)
+}
+
+/// Horizontal vector max.
+#[inline]
+#[target_feature(enable = "neon")]
+#[cfg_attr(test, assert_instr(smaxv))]
+pub unsafe fn vmaxvq_s16(a: i16x8) -> i16  {
+    vmaxvq_s16_(a)
+}
+
+/// Horizontal vector max.
+#[inline]
+#[target_feature(enable = "neon")]
+#[cfg_attr(test, assert_instr(smaxp))]
+pub unsafe fn vmaxv_s32(a: i32x2) -> i32  {
+    vmaxv_s32_(a)
+}
+
+/// Horizontal vector max.
+#[inline]
+#[target_feature(enable = "neon")]
+#[cfg_attr(test, assert_instr(smaxv))]
+pub unsafe fn vmaxvq_s32(a: i32x4) -> i32  {
+    vmaxvq_s32_(a)
+}
+
+/// Horizontal vector max.
+#[inline]
+#[target_feature(enable = "neon")]
+#[cfg_attr(test, assert_instr(umaxv))]
+pub unsafe fn vmaxv_u8(a: u8x8) -> u8  {
+    vmaxv_u8_(a)
+}
+
+/// Horizontal vector max.
+#[inline]
+#[target_feature(enable = "neon")]
+#[cfg_attr(test, assert_instr(umaxv))]
+pub unsafe fn vmaxvq_u8(a: u8x16) -> u8  {
+    vmaxvq_u8_(a)
+}
+
+/// Horizontal vector max.
+#[inline]
+#[target_feature(enable = "neon")]
+#[cfg_attr(test, assert_instr(umaxv))]
+pub unsafe fn vmaxv_u16(a: u16x4) -> u16  {
+    vmaxv_u16_(a)
+}
+
+/// Horizontal vector max.
+#[inline]
+#[target_feature(enable = "neon")]
+#[cfg_attr(test, assert_instr(umaxv))]
+pub unsafe fn vmaxvq_u16(a: u16x8) -> u16  {
+    vmaxvq_u16_(a)
+}
+
+/// Horizontal vector max.
+#[inline]
+#[target_feature(enable = "neon")]
+#[cfg_attr(test, assert_instr(umaxp))]
+pub unsafe fn vmaxv_u32(a: u32x2) -> u32  {
+    vmaxv_u32_(a)
+}
+
+/// Horizontal vector max.
+#[inline]
+#[target_feature(enable = "neon")]
+#[cfg_attr(test, assert_instr(umaxv))]
+pub unsafe fn vmaxvq_u32(a: u32x4) -> u32  {
+    vmaxvq_u32_(a)
+}
+
+/// Horizontal vector max.
+#[inline]
+#[target_feature(enable = "neon")]
+#[cfg_attr(test, assert_instr(fmaxp))]
+pub unsafe fn vmaxv_f32(a: f32x2) -> f32  {
+    vmaxv_f32_(a)
+}
+
+/// Horizontal vector max.
+#[inline]
+#[target_feature(enable = "neon")]
+#[cfg_attr(test, assert_instr(fmaxv))]
+pub unsafe fn vmaxvq_f32(a: f32x4) -> f32  {
+    vmaxvq_f32_(a)
+}
+
+/// Horizontal vector max.
+#[inline]
+#[target_feature(enable = "neon")]
+#[cfg_attr(test, assert_instr(fmaxp))]
+pub unsafe fn vmaxvq_f64(a: f64x2) -> f64  {
+    vmaxvq_f64_(a)
+}
+
+/// Horizontal vector min.
+#[inline]
+#[target_feature(enable = "neon")]
+#[cfg_attr(test, assert_instr(sminv))]
+pub unsafe fn vminv_s8(a: i8x8) -> i8  {
+    vminv_s8_(a)
+}
+
+/// Horizontal vector min.
+#[inline]
+#[target_feature(enable = "neon")]
+#[cfg_attr(test, assert_instr(sminv))]
+pub unsafe fn vminvq_s8(a: i8x16) -> i8  {
+    vminvq_s8_(a)
+}
+
+/// Horizontal vector min.
+#[inline]
+#[target_feature(enable = "neon")]
+#[cfg_attr(test, assert_instr(sminv))]
+pub unsafe fn vminv_s16(a: i16x4) -> i16  {
+    vminv_s16_(a)
+}
+
+/// Horizontal vector min.
+#[inline]
+#[target_feature(enable = "neon")]
+#[cfg_attr(test, assert_instr(sminv))]
+pub unsafe fn vminvq_s16(a: i16x8) -> i16  {
+    vminvq_s16_(a)
+}
+
+/// Horizontal vector min.
+#[inline]
+#[target_feature(enable = "neon")]
+#[cfg_attr(test, assert_instr(sminp))]
+pub unsafe fn vminv_s32(a: i32x2) -> i32  {
+    vminv_s32_(a)
+}
+
+/// Horizontal vector min.
+#[inline]
+#[target_feature(enable = "neon")]
+#[cfg_attr(test, assert_instr(sminv))]
+pub unsafe fn vminvq_s32(a: i32x4) -> i32  {
+    vminvq_s32_(a)
+}
+
+/// Horizontal vector min.
+#[inline]
+#[target_feature(enable = "neon")]
+#[cfg_attr(test, assert_instr(uminv))]
+pub unsafe fn vminv_u8(a: u8x8) -> u8  {
+    vminv_u8_(a)
+}
+
+/// Horizontal vector min.
+#[inline]
+#[target_feature(enable = "neon")]
+#[cfg_attr(test, assert_instr(uminv))]
+pub unsafe fn vminvq_u8(a: u8x16) -> u8  {
+    vminvq_u8_(a)
+}
+
+/// Horizontal vector min.
+#[inline]
+#[target_feature(enable = "neon")]
+#[cfg_attr(test, assert_instr(uminv))]
+pub unsafe fn vminv_u16(a: u16x4) -> u16  {
+    vminv_u16_(a)
+}
+
+/// Horizontal vector min.
+#[inline]
+#[target_feature(enable = "neon")]
+#[cfg_attr(test, assert_instr(uminv))]
+pub unsafe fn vminvq_u16(a: u16x8) -> u16  {
+    vminvq_u16_(a)
+}
+
+/// Horizontal vector min.
+#[inline]
+#[target_feature(enable = "neon")]
+#[cfg_attr(test, assert_instr(uminp))]
+pub unsafe fn vminv_u32(a: u32x2) -> u32  {
+    vminv_u32_(a)
+}
+
+/// Horizontal vector min.
+#[inline]
+#[target_feature(enable = "neon")]
+#[cfg_attr(test, assert_instr(uminv))]
+pub unsafe fn vminvq_u32(a: u32x4) -> u32  {
+    vminvq_u32_(a)
+}
+
+/// Horizontal vector min.
+#[inline]
+#[target_feature(enable = "neon")]
+#[cfg_attr(test, assert_instr(fminp))]
+pub unsafe fn vminv_f32(a: f32x2) -> f32  {
+    vminv_f32_(a)
+}
+
+/// Horizontal vector min.
+#[inline]
+#[target_feature(enable = "neon")]
+#[cfg_attr(test, assert_instr(fminv))]
+pub unsafe fn vminvq_f32(a: f32x4) -> f32  {
+    vminvq_f32_(a)
+}
+
+/// Horizontal vector min.
+#[inline]
+#[target_feature(enable = "neon")]
+#[cfg_attr(test, assert_instr(fminp))]
+pub unsafe fn vminvq_f64(a: f64x2) -> f64  {
+    vminvq_f64_(a)
+}
+
 #[cfg(test)]
 mod tests {
-    use simd::f64x2;
+    use simd::*;
     use coresimd::aarch64::neon;
     use stdsimd_test::simd_test;
 
@@ -80,4 +390,228 @@ mod tests {
         let r = neon::vaddd_u64(a, b);
         assert_eq!(r, e);
     }
+
+    #[simd_test = "neon"]
+    unsafe fn vmaxv_s8() {
+        let r = neon::vmaxv_s8(i8x8::new(1, 2, 3, 4, -8, 6, 7, 5));
+        assert_eq!(r, 7_i8);
+    }
+
+    #[simd_test = "neon"]
+    unsafe fn vmaxvq_s8() {
+        let r = neon::vmaxvq_s8(i8x16::new(
+            1, 2, 3, 4,
+            -16, 6, 7, 5,
+            8, 1, 1, 1,
+            1, 1, 1, 1
+        ));
+        assert_eq!(r, 8_i8);
+    }
+
+    #[simd_test = "neon"]
+    unsafe fn vmaxv_s16() {
+        let r = neon::vmaxv_s16(i16x4::new(1, 2, -4, 3));
+        assert_eq!(r, 3_i16);
+    }
+
+    #[simd_test = "neon"]
+    unsafe fn vmaxvq_s16() {
+        let r = neon::vmaxvq_s16(i16x8::new(
+            1, 2, 7, 4,
+            -16, 6, 7, 5
+        ));
+        assert_eq!(r, 7_i16);
+    }
+
+    #[simd_test = "neon"]
+    unsafe fn vmaxv_s32() {
+        let r = neon::vmaxv_s32(i32x2::new(1, -4));
+        assert_eq!(r, 1_i32);
+    }
+
+    #[simd_test = "neon"]
+    unsafe fn vmaxvq_s32() {
+        let r = neon::vmaxvq_s32(i32x4::new(
+            1, 2, -32, 4
+        ));
+        assert_eq!(r, 4_i32);
+    }
+
+    #[simd_test = "neon"]
+    unsafe fn vmaxv_u8() {
+        let r = neon::vmaxv_u8(u8x8::new(1, 2, 3, 4, 8, 6, 7, 5));
+        assert_eq!(r, 8_u8);
+    }
+
+    #[simd_test = "neon"]
+    unsafe fn vmaxvq_u8() {
+        let r = neon::vmaxvq_u8(u8x16::new(
+            1, 2, 3, 4,
+            16, 6, 7, 5,
+            8, 1, 1, 1,
+            1, 1, 1, 1
+        ));
+        assert_eq!(r, 16_u8);
+    }
+
+    #[simd_test = "neon"]
+    unsafe fn vmaxv_u16() {
+        let r = neon::vmaxv_u16(u16x4::new(1, 2, 4, 3));
+        assert_eq!(r, 4_u16);
+    }
+
+    #[simd_test = "neon"]
+    unsafe fn vmaxvq_u16() {
+        let r = neon::vmaxvq_u16(u16x8::new(
+            1, 2, 7, 4,
+            16, 6, 7, 5
+        ));
+        assert_eq!(r, 16_u16);
+    }
+
+    #[simd_test = "neon"]
+    unsafe fn vmaxv_u32() {
+        let r = neon::vmaxv_u32(u32x2::new(1, 4));
+        assert_eq!(r, 4_u32);
+    }
+
+    #[simd_test = "neon"]
+    unsafe fn vmaxvq_u32() {
+        let r = neon::vmaxvq_u32(u32x4::new(
+            1, 2, 32, 4
+        ));
+        assert_eq!(r, 32_u32);
+    }
+
+    #[simd_test = "neon"]
+    unsafe fn vmaxv_f32() {
+        let r = neon::vmaxv_f32(f32x2::new(1., 4.));
+        assert_eq!(r, 4_f32);
+    }
+
+    #[simd_test = "neon"]
+    unsafe fn vmaxvq_f32() {
+        let r = neon::vmaxvq_f32(f32x4::new(
+            1., 2., 32., 4.
+        ));
+        assert_eq!(r, 32_f32);
+    }
+
+    #[simd_test = "neon"]
+    unsafe fn vmaxvq_f64() {
+        let r = neon::vmaxvq_f64(f64x2::new(1., 4.));
+        assert_eq!(r, 4_f64);
+    }
+
+    #[simd_test = "neon"]
+    unsafe fn vminv_s8() {
+        let r = neon::vminv_s8(i8x8::new(1, 2, 3, 4, -8, 6, 7, 5));
+        assert_eq!(r, -8_i8);
+    }
+
+    #[simd_test = "neon"]
+    unsafe fn vminvq_s8() {
+        let r = neon::vminvq_s8(i8x16::new(
+            1, 2, 3, 4,
+            -16, 6, 7, 5,
+            8, 1, 1, 1,
+            1, 1, 1, 1
+        ));
+        assert_eq!(r, -16_i8);
+    }
+
+    #[simd_test = "neon"]
+    unsafe fn vminv_s16() {
+        let r = neon::vminv_s16(i16x4::new(1, 2, -4, 3));
+        assert_eq!(r, -4_i16);
+    }
+
+    #[simd_test = "neon"]
+    unsafe fn vminvq_s16() {
+        let r = neon::vminvq_s16(i16x8::new(
+            1, 2, 7, 4,
+            -16, 6, 7, 5
+        ));
+        assert_eq!(r, -16_i16);
+    }
+
+    #[simd_test = "neon"]
+    unsafe fn vminv_s32() {
+        let r = neon::vminv_s32(i32x2::new(1, -4));
+        assert_eq!(r, -4_i32);
+    }
+
+    #[simd_test = "neon"]
+    unsafe fn vminvq_s32() {
+        let r = neon::vminvq_s32(i32x4::new(
+            1, 2, -32, 4
+        ));
+        assert_eq!(r, -32_i32);
+    }
+
+    #[simd_test = "neon"]
+    unsafe fn vminv_u8() {
+        let r = neon::vminv_u8(u8x8::new(1, 2, 3, 4, 8, 6, 7, 5));
+        assert_eq!(r, 1_u8);
+    }
+
+    #[simd_test = "neon"]
+    unsafe fn vminvq_u8() {
+        let r = neon::vminvq_u8(u8x16::new(
+            1, 2, 3, 4,
+            16, 6, 7, 5,
+            8, 1, 1, 1,
+            1, 1, 1, 1
+        ));
+        assert_eq!(r, 1_u8);
+    }
+
+    #[simd_test = "neon"]
+    unsafe fn vminv_u16() {
+        let r = neon::vminv_u16(u16x4::new(1, 2, 4, 3));
+        assert_eq!(r, 1_u16);
+    }
+
+    #[simd_test = "neon"]
+    unsafe fn vminvq_u16() {
+        let r = neon::vminvq_u16(u16x8::new(
+            1, 2, 7, 4,
+            16, 6, 7, 5
+        ));
+        assert_eq!(r, 1_u16);
+    }
+
+    #[simd_test = "neon"]
+    unsafe fn vminv_u32() {
+        let r = neon::vminv_u32(u32x2::new(1, 4));
+        assert_eq!(r, 1_u32);
+    }
+
+    #[simd_test = "neon"]
+    unsafe fn vminvq_u32() {
+        let r = neon::vminvq_u32(u32x4::new(
+            1, 2, 32, 4
+        ));
+        assert_eq!(r, 1_u32);
+    }
+
+    #[simd_test = "neon"]
+    unsafe fn vminv_f32() {
+        let r = neon::vminv_f32(f32x2::new(1., 4.));
+        assert_eq!(r, 1_f32);
+    }
+
+    #[simd_test = "neon"]
+    unsafe fn vminvq_f32() {
+        let r = neon::vminvq_f32(f32x4::new(
+            1., 2., 32., 4.
+        ));
+        assert_eq!(r, 1_f32);
+    }
+
+    #[simd_test = "neon"]
+    unsafe fn vminvq_f64() {
+        let r = neon::vminvq_f64(f64x2::new(1., 4.));
+        assert_eq!(r, 1_f64);
+    }
 }
diff --git a/coresimd/arm/neon.rs b/coresimd/arm/neon.rs
index 70f39fc391..e19fa7d4a3 100644
--- a/coresimd/arm/neon.rs
+++ b/coresimd/arm/neon.rs
@@ -5,7 +5,7 @@ use stdsimd_test::assert_instr;
 
 use coresimd::simd_llvm::simd_add;
 use coresimd::simd::*;
-use convert::From;
+use convert::{From, Into};
 
 /// Vector add.
 #[inline]
@@ -211,6 +211,108 @@ pub unsafe fn vrsqrte_f32(a: f32x2) -> f32x2 {
     frsqrte_v2f32(a)
 }
 
+/// Vector narrow integer.
+#[inline]
+#[target_feature(enable = "neon")]
+#[cfg_attr(test, assert_instr(xtn))]
+pub unsafe fn vmovn_s16(a: i16x8) -> i8x8 {
+    a.into()
+}
+
+/// Vector narrow integer.
+#[inline]
+#[target_feature(enable = "neon")]
+#[cfg_attr(test, assert_instr(xtn))]
+pub unsafe fn vmovn_s32(a: i32x4) -> i16x4 {
+    a.into()
+}
+
+/// Vector narrow integer.
+#[inline]
+#[target_feature(enable = "neon")]
+#[cfg_attr(test, assert_instr(xtn))]
+pub unsafe fn vmovn_s64(a: i64x2) -> i32x2 {
+    a.into()
+}
+
+/// Vector narrow integer.
+#[inline]
+#[target_feature(enable = "neon")]
+#[cfg_attr(test, assert_instr(xtn))]
+pub unsafe fn vmovn_u16(a: u16x8) -> u8x8 {
+    a.into()
+}
+
+/// Vector narrow integer.
+#[inline]
+#[target_feature(enable = "neon")]
+#[cfg_attr(test, assert_instr(xtn))]
+pub unsafe fn vmovn_u32(a: u32x4) -> u16x4 {
+    a.into()
+}
+
+/// Vector narrow integer.
+#[inline]
+#[target_feature(enable = "neon")]
+#[cfg_attr(test, assert_instr(xtn))]
+pub unsafe fn vmovn_u64(a: u64x2) -> u32x2 {
+    a.into()
+}
+
+/// Vector long move.
+#[inline]
+#[target_feature(enable = "neon")]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(sshll))]
+#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(sxtl))]
+pub unsafe fn vmovl_s8(a: i8x8) -> i16x8 {
+    a.into()
+}
+
+/// Vector long move.
+#[inline]
+#[target_feature(enable = "neon")]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(sshll))]
+#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(sxtl))]
+pub unsafe fn vmovl_s16(a: i16x4) -> i32x4 {
+    a.into()
+}
+
+/// Vector long move.
+#[inline]
+#[target_feature(enable = "neon")]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(sshll))]
+#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(sxtl))]
+pub unsafe fn vmovl_s32(a: i32x2) -> i64x2 {
+    a.into()
+}
+
+/// Vector long move.
+#[inline]
+#[target_feature(enable = "neon")]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(sshll))]
+#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(uxtl))]
+pub unsafe fn vmovl_u8(a: u8x8) -> u16x8 {
+    a.into()
+}
+
+/// Vector long move.
+#[inline]
+#[target_feature(enable = "neon")]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(sshll))]
+#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(uxtl))]
+pub unsafe fn vmovl_u16(a: u16x4) -> u32x4 {
+    a.into()
+}
+
+/// Vector long move.
+#[inline]
+#[target_feature(enable = "neon")]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(sshll))]
+#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(uxtl))]
+pub unsafe fn vmovl_u32(a: u32x2) -> u64x2 {
+    a.into()
+}
+
 #[cfg(test)]
 mod tests {
     use stdsimd_test::simd_test;
@@ -410,4 +512,100 @@ mod tests {
         let r = neon::vrsqrte_f32(a);
         assert_eq!(r, e);
     }
+
+    #[simd_test = "neon"]
+    unsafe fn vmovn_s16() {
+        let a = i16x8::new(1, 2, 3, 4, 5, 6, 7, 8);
+        let e = i8x8::new(1, 2, 3, 4, 5, 6, 7, 8);
+        let r = neon::vmovn_s16(a);
+        assert_eq!(r, e);
+    }
+
+    #[simd_test = "neon"]
+    unsafe fn vmovn_s32() {
+        let a = i32x4::new(1, 2, 3, 4);
+        let e = i16x4::new(1, 2, 3, 4);
+        let r = neon::vmovn_s32(a);
+        assert_eq!(r, e);
+    }
+
+    #[simd_test = "neon"]
+    unsafe fn vmovn_s64() {
+        let a = i64x2::new(1, 2);
+        let e = i32x2::new(1, 2);
+        let r = neon::vmovn_s64(a);
+        assert_eq!(r, e);
+    }
+
+    #[simd_test = "neon"]
+    unsafe fn vmovn_u16() {
+        let a = u16x8::new(1, 2, 3, 4, 5, 6, 7, 8);
+        let e = u8x8::new(1, 2, 3, 4, 5, 6, 7, 8);
+        let r = neon::vmovn_u16(a);
+        assert_eq!(r, e);
+    }
+
+    #[simd_test = "neon"]
+    unsafe fn vmovn_u32() {
+        let a = u32x4::new(1, 2, 3, 4);
+        let e = u16x4::new(1, 2, 3, 4);
+        let r = neon::vmovn_u32(a);
+        assert_eq!(r, e);
+    }
+
+    #[simd_test = "neon"]
+    unsafe fn vmovn_u64() {
+        let a = u64x2::new(1, 2);
+        let e = u32x2::new(1, 2);
+        let r = neon::vmovn_u64(a);
+        assert_eq!(r, e);
+    }
+
+    #[simd_test = "neon"]
+    unsafe fn vmovl_s8() {
+        let e = i16x8::new(1, 2, 3, 4, 5, 6, 7, 8);
+        let a = i8x8::new(1, 2, 3, 4, 5, 6, 7, 8);
+        let r = neon::vmovl_s8(a);
+        assert_eq!(r, e);
+    }
+
+    #[simd_test = "neon"]
+    unsafe fn vmovl_s16() {
+        let e = i32x4::new(1, 2, 3, 4);
+        let a = i16x4::new(1, 2, 3, 4);
+        let r = neon::vmovl_s16(a);
+        assert_eq!(r, e);
+    }
+
+    #[simd_test = "neon"]
+    unsafe fn vmovl_s32() {
+        let e = i64x2::new(1, 2);
+        let a = i32x2::new(1, 2);
+        let r = neon::vmovl_s32(a);
+        assert_eq!(r, e);
+    }
+
+    #[simd_test = "neon"]
+    unsafe fn vmovl_u8() {
+        let e = u16x8::new(1, 2, 3, 4, 5, 6, 7, 8);
+        let a = u8x8::new(1, 2, 3, 4, 5, 6, 7, 8);
+        let r = neon::vmovl_u8(a);
+        assert_eq!(r, e);
+    }
+
+    #[simd_test = "neon"]
+    unsafe fn vmovl_u16() {
+        let e = u32x4::new(1, 2, 3, 4);
+        let a = u16x4::new(1, 2, 3, 4);
+        let r = neon::vmovl_u16(a);
+        assert_eq!(r, e);
+    }
+
+    #[simd_test = "neon"]
+    unsafe fn vmovl_u32() {
+        let e = u64x2::new(1, 2);
+        let a = u32x2::new(1, 2);
+        let r = neon::vmovl_u32(a);
+        assert_eq!(r, e);
+    }
 }