diff --git a/crates/core_arch/src/aarch64/neon/generated.rs b/crates/core_arch/src/aarch64/neon/generated.rs index fc60ea2c69..c276e0749c 100644 --- a/crates/core_arch/src/aarch64/neon/generated.rs +++ b/crates/core_arch/src/aarch64/neon/generated.rs @@ -1162,6 +1162,868 @@ pub unsafe fn vcaleq_f64(a: float64x2_t, b: float64x2_t) -> uint64x2_t { vcageq_f64(b, a) } +/// Insert vector element from another vector element +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(test, assert_instr(mov, LANE1 = 0, LANE2 = 1))] +#[rustc_legacy_const_generics(1, 3)] +pub unsafe fn vcopy_lane_s8(a: int8x8_t, b: int8x8_t) -> int8x8_t { + static_assert_imm3!(LANE1); + static_assert_imm3!(LANE2); + match LANE1 & 0b111 { + 0 => simd_shuffle8(a, b, [8 + LANE2 as u32, 1, 2, 3, 4, 5, 6, 7]), + 1 => simd_shuffle8(a, b, [0, 8 + LANE2 as u32, 2, 3, 4, 5, 6, 7]), + 2 => simd_shuffle8(a, b, [0, 1, 8 + LANE2 as u32, 3, 4, 5, 6, 7]), + 3 => simd_shuffle8(a, b, [0, 1, 2, 8 + LANE2 as u32, 4, 5, 6, 7]), + 4 => simd_shuffle8(a, b, [0, 1, 2, 3, 8 + LANE2 as u32, 5, 6, 7]), + 5 => simd_shuffle8(a, b, [0, 1, 2, 3, 4, 8 + LANE2 as u32, 6, 7]), + 6 => simd_shuffle8(a, b, [0, 1, 2, 3, 4, 5, 8 + LANE2 as u32, 7]), + 7 => simd_shuffle8(a, b, [0, 1, 2, 3, 4, 5, 6, 8 + LANE2 as u32]), + _ => unreachable_unchecked(), + } +} + +/// Insert vector element from another vector element +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(test, assert_instr(mov, LANE1 = 0, LANE2 = 1))] +#[rustc_legacy_const_generics(1, 3)] +pub unsafe fn vcopyq_laneq_s8(a: int8x16_t, b: int8x16_t) -> int8x16_t { + static_assert_imm4!(LANE1); + static_assert_imm4!(LANE2); + match LANE1 & 0b1111 { + 0 => simd_shuffle16(a, b, [16 + LANE2 as u32, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15]), + 1 => simd_shuffle16(a, b, [0, 16 + LANE2 as u32, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15]), + 2 => simd_shuffle16(a, b, [0, 1, 16 + LANE2 as u32, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15]), + 3 => simd_shuffle16(a, b, [0, 1, 2, 16 + LANE2 as u32, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15]), + 4 => simd_shuffle16(a, b, [0, 1, 2, 3, 16 + LANE2 as u32, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15]), + 5 => simd_shuffle16(a, b, [0, 1, 2, 3, 4, 16 + LANE2 as u32, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15]), + 6 => simd_shuffle16(a, b, [0, 1, 2, 3, 4, 5, 16 + LANE2 as u32, 7, 8, 9, 10, 11, 12, 13, 14, 15]), + 7 => simd_shuffle16(a, b, [0, 1, 2, 3, 4, 5, 6, 16 + LANE2 as u32, 8, 9, 10, 11, 12, 13, 14, 15]), + 8 => simd_shuffle16(a, b, [0, 1, 2, 3, 4, 5, 6, 7, 16 + LANE2 as u32, 9, 10, 11, 12, 13, 14, 15]), + 9 => simd_shuffle16(a, b, [0, 1, 2, 3, 4, 5, 6, 7, 8, 16 + LANE2 as u32, 10, 11, 12, 13, 14, 15]), + 10 => simd_shuffle16(a, b, [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 16 + LANE2 as u32, 11, 12, 13, 14, 15]), + 11 => simd_shuffle16(a, b, [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 16 + LANE2 as u32, 12, 13, 14, 15]), + 12 => simd_shuffle16(a, b, [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 16 + LANE2 as u32, 13, 14, 15]), + 13 => simd_shuffle16(a, b, [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 16 + LANE2 as u32, 14, 15]), + 14 => simd_shuffle16(a, b, [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 16 + LANE2 as u32, 15]), + 15 => simd_shuffle16(a, b, [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 16 + LANE2 as u32]), + _ => unreachable_unchecked(), + } +} + +/// Insert vector element from another vector element +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(test, assert_instr(mov, LANE1 = 0, LANE2 = 1))] +#[rustc_legacy_const_generics(1, 3)] +pub unsafe fn vcopy_lane_s16(a: int16x4_t, b: int16x4_t) -> int16x4_t { + static_assert_imm2!(LANE1); + static_assert_imm2!(LANE2); + match LANE1 & 0b11 { + 0 => simd_shuffle4(a, b, [4 + LANE2 as u32, 1, 2, 3]), + 1 => simd_shuffle4(a, b, [0, 4 + LANE2 as u32, 2, 3]), + 2 => simd_shuffle4(a, b, [0, 1, 4 + LANE2 as u32, 3]), + 3 => simd_shuffle4(a, b, [0, 1, 2, 4 + LANE2 as u32]), + _ => unreachable_unchecked(), + } +} + +/// Insert vector element from another vector element +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(test, assert_instr(mov, LANE1 = 0, LANE2 = 1))] +#[rustc_legacy_const_generics(1, 3)] +pub unsafe fn vcopyq_laneq_s16(a: int16x8_t, b: int16x8_t) -> int16x8_t { + static_assert_imm3!(LANE1); + static_assert_imm3!(LANE2); + match LANE1 & 0b111 { + 0 => simd_shuffle8(a, b, [8 + LANE2 as u32, 1, 2, 3, 4, 5, 6, 7]), + 1 => simd_shuffle8(a, b, [0, 8 + LANE2 as u32, 2, 3, 4, 5, 6, 7]), + 2 => simd_shuffle8(a, b, [0, 1, 8 + LANE2 as u32, 3, 4, 5, 6, 7]), + 3 => simd_shuffle8(a, b, [0, 1, 2, 8 + LANE2 as u32, 4, 5, 6, 7]), + 4 => simd_shuffle8(a, b, [0, 1, 2, 3, 8 + LANE2 as u32, 5, 6, 7]), + 5 => simd_shuffle8(a, b, [0, 1, 2, 3, 4, 8 + LANE2 as u32, 6, 7]), + 6 => simd_shuffle8(a, b, [0, 1, 2, 3, 4, 5, 8 + LANE2 as u32, 7]), + 7 => simd_shuffle8(a, b, [0, 1, 2, 3, 4, 5, 6, 8 + LANE2 as u32]), + _ => unreachable_unchecked(), + } +} + +/// Insert vector element from another vector element +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(test, assert_instr(mov, LANE1 = 0, LANE2 = 1))] +#[rustc_legacy_const_generics(1, 3)] +pub unsafe fn vcopy_lane_s32(a: int32x2_t, b: int32x2_t) -> int32x2_t { + static_assert_imm1!(LANE1); + static_assert_imm1!(LANE2); + match LANE1 & 0b1 { + 0 => simd_shuffle2(a, b, [2 + LANE2 as u32, 1]), + 1 => simd_shuffle2(a, b, [0, 2 + LANE2 as u32]), + _ => unreachable_unchecked(), + } +} + +/// Insert vector element from another vector element +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(test, assert_instr(mov, LANE1 = 0, LANE2 = 1))] +#[rustc_legacy_const_generics(1, 3)] +pub unsafe fn vcopyq_laneq_s32(a: int32x4_t, b: int32x4_t) -> int32x4_t { + static_assert_imm2!(LANE1); + static_assert_imm2!(LANE2); + match LANE1 & 0b11 { + 0 => simd_shuffle4(a, b, [4 + LANE2 as u32, 1, 2, 3]), + 1 => simd_shuffle4(a, b, [0, 4 + LANE2 as u32, 2, 3]), + 2 => simd_shuffle4(a, b, [0, 1, 4 + LANE2 as u32, 3]), + 3 => simd_shuffle4(a, b, [0, 1, 2, 4 + LANE2 as u32]), + _ => unreachable_unchecked(), + } +} + +/// Insert vector element from another vector element +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(test, assert_instr(mov, LANE1 = 0, LANE2 = 1))] +#[rustc_legacy_const_generics(1, 3)] +pub unsafe fn vcopyq_laneq_s64(a: int64x2_t, b: int64x2_t) -> int64x2_t { + static_assert_imm1!(LANE1); + static_assert_imm1!(LANE2); + match LANE1 & 0b1 { + 0 => simd_shuffle2(a, b, [2 + LANE2 as u32, 1]), + 1 => simd_shuffle2(a, b, [0, 2 + LANE2 as u32]), + _ => unreachable_unchecked(), + } +} + +/// Insert vector element from another vector element +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(test, assert_instr(mov, LANE1 = 0, LANE2 = 1))] +#[rustc_legacy_const_generics(1, 3)] +pub unsafe fn vcopy_lane_u8(a: uint8x8_t, b: uint8x8_t) -> uint8x8_t { + static_assert_imm3!(LANE1); + static_assert_imm3!(LANE2); + match LANE1 & 0b111 { + 0 => simd_shuffle8(a, b, [8 + LANE2 as u32, 1, 2, 3, 4, 5, 6, 7]), + 1 => simd_shuffle8(a, b, [0, 8 + LANE2 as u32, 2, 3, 4, 5, 6, 7]), + 2 => simd_shuffle8(a, b, [0, 1, 8 + LANE2 as u32, 3, 4, 5, 6, 7]), + 3 => simd_shuffle8(a, b, [0, 1, 2, 8 + LANE2 as u32, 4, 5, 6, 7]), + 4 => simd_shuffle8(a, b, [0, 1, 2, 3, 8 + LANE2 as u32, 5, 6, 7]), + 5 => simd_shuffle8(a, b, [0, 1, 2, 3, 4, 8 + LANE2 as u32, 6, 7]), + 6 => simd_shuffle8(a, b, [0, 1, 2, 3, 4, 5, 8 + LANE2 as u32, 7]), + 7 => simd_shuffle8(a, b, [0, 1, 2, 3, 4, 5, 6, 8 + LANE2 as u32]), + _ => unreachable_unchecked(), + } +} + +/// Insert vector element from another vector element +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(test, assert_instr(mov, LANE1 = 0, LANE2 = 1))] +#[rustc_legacy_const_generics(1, 3)] +pub unsafe fn vcopyq_laneq_u8(a: uint8x16_t, b: uint8x16_t) -> uint8x16_t { + static_assert_imm4!(LANE1); + static_assert_imm4!(LANE2); + match LANE1 & 0b1111 { + 0 => simd_shuffle16(a, b, [16 + LANE2 as u32, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15]), + 1 => simd_shuffle16(a, b, [0, 16 + LANE2 as u32, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15]), + 2 => simd_shuffle16(a, b, [0, 1, 16 + LANE2 as u32, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15]), + 3 => simd_shuffle16(a, b, [0, 1, 2, 16 + LANE2 as u32, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15]), + 4 => simd_shuffle16(a, b, [0, 1, 2, 3, 16 + LANE2 as u32, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15]), + 5 => simd_shuffle16(a, b, [0, 1, 2, 3, 4, 16 + LANE2 as u32, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15]), + 6 => simd_shuffle16(a, b, [0, 1, 2, 3, 4, 5, 16 + LANE2 as u32, 7, 8, 9, 10, 11, 12, 13, 14, 15]), + 7 => simd_shuffle16(a, b, [0, 1, 2, 3, 4, 5, 6, 16 + LANE2 as u32, 8, 9, 10, 11, 12, 13, 14, 15]), + 8 => simd_shuffle16(a, b, [0, 1, 2, 3, 4, 5, 6, 7, 16 + LANE2 as u32, 9, 10, 11, 12, 13, 14, 15]), + 9 => simd_shuffle16(a, b, [0, 1, 2, 3, 4, 5, 6, 7, 8, 16 + LANE2 as u32, 10, 11, 12, 13, 14, 15]), + 10 => simd_shuffle16(a, b, [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 16 + LANE2 as u32, 11, 12, 13, 14, 15]), + 11 => simd_shuffle16(a, b, [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 16 + LANE2 as u32, 12, 13, 14, 15]), + 12 => simd_shuffle16(a, b, [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 16 + LANE2 as u32, 13, 14, 15]), + 13 => simd_shuffle16(a, b, [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 16 + LANE2 as u32, 14, 15]), + 14 => simd_shuffle16(a, b, [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 16 + LANE2 as u32, 15]), + 15 => simd_shuffle16(a, b, [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 16 + LANE2 as u32]), + _ => unreachable_unchecked(), + } +} + +/// Insert vector element from another vector element +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(test, assert_instr(mov, LANE1 = 0, LANE2 = 1))] +#[rustc_legacy_const_generics(1, 3)] +pub unsafe fn vcopy_lane_u16(a: uint16x4_t, b: uint16x4_t) -> uint16x4_t { + static_assert_imm2!(LANE1); + static_assert_imm2!(LANE2); + match LANE1 & 0b11 { + 0 => simd_shuffle4(a, b, [4 + LANE2 as u32, 1, 2, 3]), + 1 => simd_shuffle4(a, b, [0, 4 + LANE2 as u32, 2, 3]), + 2 => simd_shuffle4(a, b, [0, 1, 4 + LANE2 as u32, 3]), + 3 => simd_shuffle4(a, b, [0, 1, 2, 4 + LANE2 as u32]), + _ => unreachable_unchecked(), + } +} + +/// Insert vector element from another vector element +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(test, assert_instr(mov, LANE1 = 0, LANE2 = 1))] +#[rustc_legacy_const_generics(1, 3)] +pub unsafe fn vcopyq_laneq_u16(a: uint16x8_t, b: uint16x8_t) -> uint16x8_t { + static_assert_imm3!(LANE1); + static_assert_imm3!(LANE2); + match LANE1 & 0b111 { + 0 => simd_shuffle8(a, b, [8 + LANE2 as u32, 1, 2, 3, 4, 5, 6, 7]), + 1 => simd_shuffle8(a, b, [0, 8 + LANE2 as u32, 2, 3, 4, 5, 6, 7]), + 2 => simd_shuffle8(a, b, [0, 1, 8 + LANE2 as u32, 3, 4, 5, 6, 7]), + 3 => simd_shuffle8(a, b, [0, 1, 2, 8 + LANE2 as u32, 4, 5, 6, 7]), + 4 => simd_shuffle8(a, b, [0, 1, 2, 3, 8 + LANE2 as u32, 5, 6, 7]), + 5 => simd_shuffle8(a, b, [0, 1, 2, 3, 4, 8 + LANE2 as u32, 6, 7]), + 6 => simd_shuffle8(a, b, [0, 1, 2, 3, 4, 5, 8 + LANE2 as u32, 7]), + 7 => simd_shuffle8(a, b, [0, 1, 2, 3, 4, 5, 6, 8 + LANE2 as u32]), + _ => unreachable_unchecked(), + } +} + +/// Insert vector element from another vector element +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(test, assert_instr(mov, LANE1 = 0, LANE2 = 1))] +#[rustc_legacy_const_generics(1, 3)] +pub unsafe fn vcopy_lane_u32(a: uint32x2_t, b: uint32x2_t) -> uint32x2_t { + static_assert_imm1!(LANE1); + static_assert_imm1!(LANE2); + match LANE1 & 0b1 { + 0 => simd_shuffle2(a, b, [2 + LANE2 as u32, 1]), + 1 => simd_shuffle2(a, b, [0, 2 + LANE2 as u32]), + _ => unreachable_unchecked(), + } +} + +/// Insert vector element from another vector element +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(test, assert_instr(mov, LANE1 = 0, LANE2 = 1))] +#[rustc_legacy_const_generics(1, 3)] +pub unsafe fn vcopyq_laneq_u32(a: uint32x4_t, b: uint32x4_t) -> uint32x4_t { + static_assert_imm2!(LANE1); + static_assert_imm2!(LANE2); + match LANE1 & 0b11 { + 0 => simd_shuffle4(a, b, [4 + LANE2 as u32, 1, 2, 3]), + 1 => simd_shuffle4(a, b, [0, 4 + LANE2 as u32, 2, 3]), + 2 => simd_shuffle4(a, b, [0, 1, 4 + LANE2 as u32, 3]), + 3 => simd_shuffle4(a, b, [0, 1, 2, 4 + LANE2 as u32]), + _ => unreachable_unchecked(), + } +} + +/// Insert vector element from another vector element +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(test, assert_instr(mov, LANE1 = 0, LANE2 = 1))] +#[rustc_legacy_const_generics(1, 3)] +pub unsafe fn vcopyq_laneq_u64(a: uint64x2_t, b: uint64x2_t) -> uint64x2_t { + static_assert_imm1!(LANE1); + static_assert_imm1!(LANE2); + match LANE1 & 0b1 { + 0 => simd_shuffle2(a, b, [2 + LANE2 as u32, 1]), + 1 => simd_shuffle2(a, b, [0, 2 + LANE2 as u32]), + _ => unreachable_unchecked(), + } +} + +/// Insert vector element from another vector element +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(test, assert_instr(mov, LANE1 = 0, LANE2 = 1))] +#[rustc_legacy_const_generics(1, 3)] +pub unsafe fn vcopy_lane_p8(a: poly8x8_t, b: poly8x8_t) -> poly8x8_t { + static_assert_imm3!(LANE1); + static_assert_imm3!(LANE2); + match LANE1 & 0b111 { + 0 => simd_shuffle8(a, b, [8 + LANE2 as u32, 1, 2, 3, 4, 5, 6, 7]), + 1 => simd_shuffle8(a, b, [0, 8 + LANE2 as u32, 2, 3, 4, 5, 6, 7]), + 2 => simd_shuffle8(a, b, [0, 1, 8 + LANE2 as u32, 3, 4, 5, 6, 7]), + 3 => simd_shuffle8(a, b, [0, 1, 2, 8 + LANE2 as u32, 4, 5, 6, 7]), + 4 => simd_shuffle8(a, b, [0, 1, 2, 3, 8 + LANE2 as u32, 5, 6, 7]), + 5 => simd_shuffle8(a, b, [0, 1, 2, 3, 4, 8 + LANE2 as u32, 6, 7]), + 6 => simd_shuffle8(a, b, [0, 1, 2, 3, 4, 5, 8 + LANE2 as u32, 7]), + 7 => simd_shuffle8(a, b, [0, 1, 2, 3, 4, 5, 6, 8 + LANE2 as u32]), + _ => unreachable_unchecked(), + } +} + +/// Insert vector element from another vector element +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(test, assert_instr(mov, LANE1 = 0, LANE2 = 1))] +#[rustc_legacy_const_generics(1, 3)] +pub unsafe fn vcopyq_laneq_p8(a: poly8x16_t, b: poly8x16_t) -> poly8x16_t { + static_assert_imm4!(LANE1); + static_assert_imm4!(LANE2); + match LANE1 & 0b1111 { + 0 => simd_shuffle16(a, b, [16 + LANE2 as u32, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15]), + 1 => simd_shuffle16(a, b, [0, 16 + LANE2 as u32, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15]), + 2 => simd_shuffle16(a, b, [0, 1, 16 + LANE2 as u32, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15]), + 3 => simd_shuffle16(a, b, [0, 1, 2, 16 + LANE2 as u32, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15]), + 4 => simd_shuffle16(a, b, [0, 1, 2, 3, 16 + LANE2 as u32, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15]), + 5 => simd_shuffle16(a, b, [0, 1, 2, 3, 4, 16 + LANE2 as u32, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15]), + 6 => simd_shuffle16(a, b, [0, 1, 2, 3, 4, 5, 16 + LANE2 as u32, 7, 8, 9, 10, 11, 12, 13, 14, 15]), + 7 => simd_shuffle16(a, b, [0, 1, 2, 3, 4, 5, 6, 16 + LANE2 as u32, 8, 9, 10, 11, 12, 13, 14, 15]), + 8 => simd_shuffle16(a, b, [0, 1, 2, 3, 4, 5, 6, 7, 16 + LANE2 as u32, 9, 10, 11, 12, 13, 14, 15]), + 9 => simd_shuffle16(a, b, [0, 1, 2, 3, 4, 5, 6, 7, 8, 16 + LANE2 as u32, 10, 11, 12, 13, 14, 15]), + 10 => simd_shuffle16(a, b, [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 16 + LANE2 as u32, 11, 12, 13, 14, 15]), + 11 => simd_shuffle16(a, b, [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 16 + LANE2 as u32, 12, 13, 14, 15]), + 12 => simd_shuffle16(a, b, [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 16 + LANE2 as u32, 13, 14, 15]), + 13 => simd_shuffle16(a, b, [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 16 + LANE2 as u32, 14, 15]), + 14 => simd_shuffle16(a, b, [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 16 + LANE2 as u32, 15]), + 15 => simd_shuffle16(a, b, [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 16 + LANE2 as u32]), + _ => unreachable_unchecked(), + } +} + +/// Insert vector element from another vector element +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(test, assert_instr(mov, LANE1 = 0, LANE2 = 1))] +#[rustc_legacy_const_generics(1, 3)] +pub unsafe fn vcopy_lane_p16(a: poly16x4_t, b: poly16x4_t) -> poly16x4_t { + static_assert_imm2!(LANE1); + static_assert_imm2!(LANE2); + match LANE1 & 0b11 { + 0 => simd_shuffle4(a, b, [4 + LANE2 as u32, 1, 2, 3]), + 1 => simd_shuffle4(a, b, [0, 4 + LANE2 as u32, 2, 3]), + 2 => simd_shuffle4(a, b, [0, 1, 4 + LANE2 as u32, 3]), + 3 => simd_shuffle4(a, b, [0, 1, 2, 4 + LANE2 as u32]), + _ => unreachable_unchecked(), + } +} + +/// Insert vector element from another vector element +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(test, assert_instr(mov, LANE1 = 0, LANE2 = 1))] +#[rustc_legacy_const_generics(1, 3)] +pub unsafe fn vcopyq_laneq_p16(a: poly16x8_t, b: poly16x8_t) -> poly16x8_t { + static_assert_imm3!(LANE1); + static_assert_imm3!(LANE2); + match LANE1 & 0b111 { + 0 => simd_shuffle8(a, b, [8 + LANE2 as u32, 1, 2, 3, 4, 5, 6, 7]), + 1 => simd_shuffle8(a, b, [0, 8 + LANE2 as u32, 2, 3, 4, 5, 6, 7]), + 2 => simd_shuffle8(a, b, [0, 1, 8 + LANE2 as u32, 3, 4, 5, 6, 7]), + 3 => simd_shuffle8(a, b, [0, 1, 2, 8 + LANE2 as u32, 4, 5, 6, 7]), + 4 => simd_shuffle8(a, b, [0, 1, 2, 3, 8 + LANE2 as u32, 5, 6, 7]), + 5 => simd_shuffle8(a, b, [0, 1, 2, 3, 4, 8 + LANE2 as u32, 6, 7]), + 6 => simd_shuffle8(a, b, [0, 1, 2, 3, 4, 5, 8 + LANE2 as u32, 7]), + 7 => simd_shuffle8(a, b, [0, 1, 2, 3, 4, 5, 6, 8 + LANE2 as u32]), + _ => unreachable_unchecked(), + } +} + +/// Insert vector element from another vector element +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(test, assert_instr(mov, LANE1 = 0, LANE2 = 1))] +#[rustc_legacy_const_generics(1, 3)] +pub unsafe fn vcopyq_laneq_p64(a: poly64x2_t, b: poly64x2_t) -> poly64x2_t { + static_assert_imm1!(LANE1); + static_assert_imm1!(LANE2); + match LANE1 & 0b1 { + 0 => simd_shuffle2(a, b, [2 + LANE2 as u32, 1]), + 1 => simd_shuffle2(a, b, [0, 2 + LANE2 as u32]), + _ => unreachable_unchecked(), + } +} + +/// Insert vector element from another vector element +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(test, assert_instr(mov, LANE1 = 0, LANE2 = 1))] +#[rustc_legacy_const_generics(1, 3)] +pub unsafe fn vcopy_lane_f32(a: float32x2_t, b: float32x2_t) -> float32x2_t { + static_assert_imm1!(LANE1); + static_assert_imm1!(LANE2); + match LANE1 & 0b1 { + 0 => simd_shuffle2(a, b, [2 + LANE2 as u32, 1]), + 1 => simd_shuffle2(a, b, [0, 2 + LANE2 as u32]), + _ => unreachable_unchecked(), + } +} + +/// Insert vector element from another vector element +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(test, assert_instr(mov, LANE1 = 0, LANE2 = 1))] +#[rustc_legacy_const_generics(1, 3)] +pub unsafe fn vcopyq_laneq_f32(a: float32x4_t, b: float32x4_t) -> float32x4_t { + static_assert_imm2!(LANE1); + static_assert_imm2!(LANE2); + match LANE1 & 0b11 { + 0 => simd_shuffle4(a, b, [4 + LANE2 as u32, 1, 2, 3]), + 1 => simd_shuffle4(a, b, [0, 4 + LANE2 as u32, 2, 3]), + 2 => simd_shuffle4(a, b, [0, 1, 4 + LANE2 as u32, 3]), + 3 => simd_shuffle4(a, b, [0, 1, 2, 4 + LANE2 as u32]), + _ => unreachable_unchecked(), + } +} + +/// Insert vector element from another vector element +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(test, assert_instr(mov, LANE1 = 0, LANE2 = 1))] +#[rustc_legacy_const_generics(1, 3)] +pub unsafe fn vcopyq_laneq_f64(a: float64x2_t, b: float64x2_t) -> float64x2_t { + static_assert_imm1!(LANE1); + static_assert_imm1!(LANE2); + match LANE1 & 0b1 { + 0 => simd_shuffle2(a, b, [2 + LANE2 as u32, 1]), + 1 => simd_shuffle2(a, b, [0, 2 + LANE2 as u32]), + _ => unreachable_unchecked(), + } +} + +/// Insert vector element from another vector element +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(test, assert_instr(mov, LANE1 = 0, LANE2 = 1))] +#[rustc_legacy_const_generics(1, 3)] +pub unsafe fn vcopy_laneq_s8(a: int8x8_t, b: int8x16_t) -> int8x8_t { + static_assert_imm3!(LANE1); + static_assert_imm4!(LANE2); + let a: int8x16_t = simd_shuffle16(a, a, [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15]); + match LANE1 & 0b111 { + 0 => simd_shuffle8(a, b, [16 + LANE2 as u32, 1, 2, 3, 4, 5, 6, 7]), + 1 => simd_shuffle8(a, b, [0, 16 + LANE2 as u32, 2, 3, 4, 5, 6, 7]), + 2 => simd_shuffle8(a, b, [0, 1, 16 + LANE2 as u32, 3, 4, 5, 6, 7]), + 3 => simd_shuffle8(a, b, [0, 1, 2, 16 + LANE2 as u32, 4, 5, 6, 7]), + 4 => simd_shuffle8(a, b, [0, 1, 2, 3, 16 + LANE2 as u32, 5, 6, 7]), + 5 => simd_shuffle8(a, b, [0, 1, 2, 3, 4, 16 + LANE2 as u32, 6, 7]), + 6 => simd_shuffle8(a, b, [0, 1, 2, 3, 4, 5, 16 + LANE2 as u32, 7]), + 7 => simd_shuffle8(a, b, [0, 1, 2, 3, 4, 5, 6, 16 + LANE2 as u32]), + _ => unreachable_unchecked(), + } +} + +/// Insert vector element from another vector element +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(test, assert_instr(mov, LANE1 = 0, LANE2 = 1))] +#[rustc_legacy_const_generics(1, 3)] +pub unsafe fn vcopy_laneq_s16(a: int16x4_t, b: int16x8_t) -> int16x4_t { + static_assert_imm2!(LANE1); + static_assert_imm3!(LANE2); + let a: int16x8_t = simd_shuffle8(a, a, [0, 1, 2, 3, 4, 5, 6, 7]); + match LANE1 & 0b11 { + 0 => simd_shuffle4(a, b, [8 + LANE2 as u32, 1, 2, 3]), + 1 => simd_shuffle4(a, b, [0, 8 + LANE2 as u32, 2, 3]), + 2 => simd_shuffle4(a, b, [0, 1, 8 + LANE2 as u32, 3]), + 3 => simd_shuffle4(a, b, [0, 1, 2, 8 + LANE2 as u32]), + _ => unreachable_unchecked(), + } +} + +/// Insert vector element from another vector element +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(test, assert_instr(mov, LANE1 = 0, LANE2 = 1))] +#[rustc_legacy_const_generics(1, 3)] +pub unsafe fn vcopy_laneq_s32(a: int32x2_t, b: int32x4_t) -> int32x2_t { + static_assert_imm1!(LANE1); + static_assert_imm2!(LANE2); + let a: int32x4_t = simd_shuffle4(a, a, [0, 1, 2, 3]); + match LANE1 & 0b1 { + 0 => simd_shuffle2(a, b, [4 + LANE2 as u32, 1]), + 1 => simd_shuffle2(a, b, [0, 4 + LANE2 as u32]), + _ => unreachable_unchecked(), + } +} + +/// Insert vector element from another vector element +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(test, assert_instr(mov, LANE1 = 0, LANE2 = 1))] +#[rustc_legacy_const_generics(1, 3)] +pub unsafe fn vcopy_laneq_u8(a: uint8x8_t, b: uint8x16_t) -> uint8x8_t { + static_assert_imm3!(LANE1); + static_assert_imm4!(LANE2); + let a: uint8x16_t = simd_shuffle16(a, a, [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15]); + match LANE1 & 0b111 { + 0 => simd_shuffle8(a, b, [16 + LANE2 as u32, 1, 2, 3, 4, 5, 6, 7]), + 1 => simd_shuffle8(a, b, [0, 16 + LANE2 as u32, 2, 3, 4, 5, 6, 7]), + 2 => simd_shuffle8(a, b, [0, 1, 16 + LANE2 as u32, 3, 4, 5, 6, 7]), + 3 => simd_shuffle8(a, b, [0, 1, 2, 16 + LANE2 as u32, 4, 5, 6, 7]), + 4 => simd_shuffle8(a, b, [0, 1, 2, 3, 16 + LANE2 as u32, 5, 6, 7]), + 5 => simd_shuffle8(a, b, [0, 1, 2, 3, 4, 16 + LANE2 as u32, 6, 7]), + 6 => simd_shuffle8(a, b, [0, 1, 2, 3, 4, 5, 16 + LANE2 as u32, 7]), + 7 => simd_shuffle8(a, b, [0, 1, 2, 3, 4, 5, 6, 16 + LANE2 as u32]), + _ => unreachable_unchecked(), + } +} + +/// Insert vector element from another vector element +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(test, assert_instr(mov, LANE1 = 0, LANE2 = 1))] +#[rustc_legacy_const_generics(1, 3)] +pub unsafe fn vcopy_laneq_u16(a: uint16x4_t, b: uint16x8_t) -> uint16x4_t { + static_assert_imm2!(LANE1); + static_assert_imm3!(LANE2); + let a: uint16x8_t = simd_shuffle8(a, a, [0, 1, 2, 3, 4, 5, 6, 7]); + match LANE1 & 0b11 { + 0 => simd_shuffle4(a, b, [8 + LANE2 as u32, 1, 2, 3]), + 1 => simd_shuffle4(a, b, [0, 8 + LANE2 as u32, 2, 3]), + 2 => simd_shuffle4(a, b, [0, 1, 8 + LANE2 as u32, 3]), + 3 => simd_shuffle4(a, b, [0, 1, 2, 8 + LANE2 as u32]), + _ => unreachable_unchecked(), + } +} + +/// Insert vector element from another vector element +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(test, assert_instr(mov, LANE1 = 0, LANE2 = 1))] +#[rustc_legacy_const_generics(1, 3)] +pub unsafe fn vcopy_laneq_u32(a: uint32x2_t, b: uint32x4_t) -> uint32x2_t { + static_assert_imm1!(LANE1); + static_assert_imm2!(LANE2); + let a: uint32x4_t = simd_shuffle4(a, a, [0, 1, 2, 3]); + match LANE1 & 0b1 { + 0 => simd_shuffle2(a, b, [4 + LANE2 as u32, 1]), + 1 => simd_shuffle2(a, b, [0, 4 + LANE2 as u32]), + _ => unreachable_unchecked(), + } +} + +/// Insert vector element from another vector element +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(test, assert_instr(mov, LANE1 = 0, LANE2 = 1))] +#[rustc_legacy_const_generics(1, 3)] +pub unsafe fn vcopy_laneq_p8(a: poly8x8_t, b: poly8x16_t) -> poly8x8_t { + static_assert_imm3!(LANE1); + static_assert_imm4!(LANE2); + let a: poly8x16_t = simd_shuffle16(a, a, [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15]); + match LANE1 & 0b111 { + 0 => simd_shuffle8(a, b, [16 + LANE2 as u32, 1, 2, 3, 4, 5, 6, 7]), + 1 => simd_shuffle8(a, b, [0, 16 + LANE2 as u32, 2, 3, 4, 5, 6, 7]), + 2 => simd_shuffle8(a, b, [0, 1, 16 + LANE2 as u32, 3, 4, 5, 6, 7]), + 3 => simd_shuffle8(a, b, [0, 1, 2, 16 + LANE2 as u32, 4, 5, 6, 7]), + 4 => simd_shuffle8(a, b, [0, 1, 2, 3, 16 + LANE2 as u32, 5, 6, 7]), + 5 => simd_shuffle8(a, b, [0, 1, 2, 3, 4, 16 + LANE2 as u32, 6, 7]), + 6 => simd_shuffle8(a, b, [0, 1, 2, 3, 4, 5, 16 + LANE2 as u32, 7]), + 7 => simd_shuffle8(a, b, [0, 1, 2, 3, 4, 5, 6, 16 + LANE2 as u32]), + _ => unreachable_unchecked(), + } +} + +/// Insert vector element from another vector element +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(test, assert_instr(mov, LANE1 = 0, LANE2 = 1))] +#[rustc_legacy_const_generics(1, 3)] +pub unsafe fn vcopy_laneq_p16(a: poly16x4_t, b: poly16x8_t) -> poly16x4_t { + static_assert_imm2!(LANE1); + static_assert_imm3!(LANE2); + let a: poly16x8_t = simd_shuffle8(a, a, [0, 1, 2, 3, 4, 5, 6, 7]); + match LANE1 & 0b11 { + 0 => simd_shuffle4(a, b, [8 + LANE2 as u32, 1, 2, 3]), + 1 => simd_shuffle4(a, b, [0, 8 + LANE2 as u32, 2, 3]), + 2 => simd_shuffle4(a, b, [0, 1, 8 + LANE2 as u32, 3]), + 3 => simd_shuffle4(a, b, [0, 1, 2, 8 + LANE2 as u32]), + _ => unreachable_unchecked(), + } +} + +/// Insert vector element from another vector element +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(test, assert_instr(mov, LANE1 = 0, LANE2 = 1))] +#[rustc_legacy_const_generics(1, 3)] +pub unsafe fn vcopy_laneq_f32(a: float32x2_t, b: float32x4_t) -> float32x2_t { + static_assert_imm1!(LANE1); + static_assert_imm2!(LANE2); + let a: float32x4_t = simd_shuffle4(a, a, [0, 1, 2, 3]); + match LANE1 & 0b1 { + 0 => simd_shuffle2(a, b, [4 + LANE2 as u32, 1]), + 1 => simd_shuffle2(a, b, [0, 4 + LANE2 as u32]), + _ => unreachable_unchecked(), + } +} + +/// Insert vector element from another vector element +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(test, assert_instr(mov, LANE1 = 0, LANE2 = 1))] +#[rustc_legacy_const_generics(1, 3)] +pub unsafe fn vcopyq_lane_s8(a: int8x16_t, b: int8x8_t) -> int8x16_t { + static_assert_imm4!(LANE1); + static_assert_imm3!(LANE2); + let b: int8x16_t = simd_shuffle16(b, b, [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15]); + match LANE1 & 0b1111 { + 0 => simd_shuffle16(a, b, [16 + LANE2 as u32, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15]), + 1 => simd_shuffle16(a, b, [0, 16 + LANE2 as u32, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15]), + 2 => simd_shuffle16(a, b, [0, 1, 16 + LANE2 as u32, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15]), + 3 => simd_shuffle16(a, b, [0, 1, 2, 16 + LANE2 as u32, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15]), + 4 => simd_shuffle16(a, b, [0, 1, 2, 3, 16 + LANE2 as u32, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15]), + 5 => simd_shuffle16(a, b, [0, 1, 2, 3, 4, 16 + LANE2 as u32, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15]), + 6 => simd_shuffle16(a, b, [0, 1, 2, 3, 4, 5, 16 + LANE2 as u32, 7, 8, 9, 10, 11, 12, 13, 14, 15]), + 7 => simd_shuffle16(a, b, [0, 1, 2, 3, 4, 5, 6, 16 + LANE2 as u32, 8, 9, 10, 11, 12, 13, 14, 15]), + 8 => simd_shuffle16(a, b, [0, 1, 2, 3, 4, 5, 6, 7, 16 + LANE2 as u32, 9, 10, 11, 12, 13, 14, 15]), + 9 => simd_shuffle16(a, b, [0, 1, 2, 3, 4, 5, 6, 7, 8, 16 + LANE2 as u32, 10, 11, 12, 13, 14, 15]), + 10 => simd_shuffle16(a, b, [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 16 + LANE2 as u32, 11, 12, 13, 14, 15]), + 11 => simd_shuffle16(a, b, [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 16 + LANE2 as u32, 12, 13, 14, 15]), + 12 => simd_shuffle16(a, b, [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 16 + LANE2 as u32, 13, 14, 15]), + 13 => simd_shuffle16(a, b, [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 16 + LANE2 as u32, 14, 15]), + 14 => simd_shuffle16(a, b, [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 16 + LANE2 as u32, 15]), + 15 => simd_shuffle16(a, b, [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 16 + LANE2 as u32]), + _ => unreachable_unchecked(), + } +} + +/// Insert vector element from another vector element +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(test, assert_instr(mov, LANE1 = 0, LANE2 = 1))] +#[rustc_legacy_const_generics(1, 3)] +pub unsafe fn vcopyq_lane_s16(a: int16x8_t, b: int16x4_t) -> int16x8_t { + static_assert_imm3!(LANE1); + static_assert_imm2!(LANE2); + let b: int16x8_t = simd_shuffle8(b, b, [0, 1, 2, 3, 4, 5, 6, 7]); + match LANE1 & 0b111 { + 0 => simd_shuffle8(a, b, [8 + LANE2 as u32, 1, 2, 3, 4, 5, 6, 7]), + 1 => simd_shuffle8(a, b, [0, 8 + LANE2 as u32, 2, 3, 4, 5, 6, 7]), + 2 => simd_shuffle8(a, b, [0, 1, 8 + LANE2 as u32, 3, 4, 5, 6, 7]), + 3 => simd_shuffle8(a, b, [0, 1, 2, 8 + LANE2 as u32, 4, 5, 6, 7]), + 4 => simd_shuffle8(a, b, [0, 1, 2, 3, 8 + LANE2 as u32, 5, 6, 7]), + 5 => simd_shuffle8(a, b, [0, 1, 2, 3, 4, 8 + LANE2 as u32, 6, 7]), + 6 => simd_shuffle8(a, b, [0, 1, 2, 3, 4, 5, 8 + LANE2 as u32, 7]), + 7 => simd_shuffle8(a, b, [0, 1, 2, 3, 4, 5, 6, 8 + LANE2 as u32]), + _ => unreachable_unchecked(), + } +} + +/// Insert vector element from another vector element +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(test, assert_instr(mov, LANE1 = 0, LANE2 = 1))] +#[rustc_legacy_const_generics(1, 3)] +pub unsafe fn vcopyq_lane_s32(a: int32x4_t, b: int32x2_t) -> int32x4_t { + static_assert_imm2!(LANE1); + static_assert_imm1!(LANE2); + let b: int32x4_t = simd_shuffle4(b, b, [0, 1, 2, 3]); + match LANE1 & 0b11 { + 0 => simd_shuffle4(a, b, [4 + LANE2 as u32, 1, 2, 3]), + 1 => simd_shuffle4(a, b, [0, 4 + LANE2 as u32, 2, 3]), + 2 => simd_shuffle4(a, b, [0, 1, 4 + LANE2 as u32, 3]), + 3 => simd_shuffle4(a, b, [0, 1, 2, 4 + LANE2 as u32]), + _ => unreachable_unchecked(), + } +} + +/// Insert vector element from another vector element +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(test, assert_instr(mov, LANE1 = 0, LANE2 = 1))] +#[rustc_legacy_const_generics(1, 3)] +pub unsafe fn vcopyq_lane_u8(a: uint8x16_t, b: uint8x8_t) -> uint8x16_t { + static_assert_imm4!(LANE1); + static_assert_imm3!(LANE2); + let b: uint8x16_t = simd_shuffle16(b, b, [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15]); + match LANE1 & 0b1111 { + 0 => simd_shuffle16(a, b, [16 + LANE2 as u32, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15]), + 1 => simd_shuffle16(a, b, [0, 16 + LANE2 as u32, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15]), + 2 => simd_shuffle16(a, b, [0, 1, 16 + LANE2 as u32, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15]), + 3 => simd_shuffle16(a, b, [0, 1, 2, 16 + LANE2 as u32, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15]), + 4 => simd_shuffle16(a, b, [0, 1, 2, 3, 16 + LANE2 as u32, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15]), + 5 => simd_shuffle16(a, b, [0, 1, 2, 3, 4, 16 + LANE2 as u32, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15]), + 6 => simd_shuffle16(a, b, [0, 1, 2, 3, 4, 5, 16 + LANE2 as u32, 7, 8, 9, 10, 11, 12, 13, 14, 15]), + 7 => simd_shuffle16(a, b, [0, 1, 2, 3, 4, 5, 6, 16 + LANE2 as u32, 8, 9, 10, 11, 12, 13, 14, 15]), + 8 => simd_shuffle16(a, b, [0, 1, 2, 3, 4, 5, 6, 7, 16 + LANE2 as u32, 9, 10, 11, 12, 13, 14, 15]), + 9 => simd_shuffle16(a, b, [0, 1, 2, 3, 4, 5, 6, 7, 8, 16 + LANE2 as u32, 10, 11, 12, 13, 14, 15]), + 10 => simd_shuffle16(a, b, [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 16 + LANE2 as u32, 11, 12, 13, 14, 15]), + 11 => simd_shuffle16(a, b, [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 16 + LANE2 as u32, 12, 13, 14, 15]), + 12 => simd_shuffle16(a, b, [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 16 + LANE2 as u32, 13, 14, 15]), + 13 => simd_shuffle16(a, b, [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 16 + LANE2 as u32, 14, 15]), + 14 => simd_shuffle16(a, b, [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 16 + LANE2 as u32, 15]), + 15 => simd_shuffle16(a, b, [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 16 + LANE2 as u32]), + _ => unreachable_unchecked(), + } +} + +/// Insert vector element from another vector element +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(test, assert_instr(mov, LANE1 = 0, LANE2 = 1))] +#[rustc_legacy_const_generics(1, 3)] +pub unsafe fn vcopyq_lane_u16(a: uint16x8_t, b: uint16x4_t) -> uint16x8_t { + static_assert_imm3!(LANE1); + static_assert_imm2!(LANE2); + let b: uint16x8_t = simd_shuffle8(b, b, [0, 1, 2, 3, 4, 5, 6, 7]); + match LANE1 & 0b111 { + 0 => simd_shuffle8(a, b, [8 + LANE2 as u32, 1, 2, 3, 4, 5, 6, 7]), + 1 => simd_shuffle8(a, b, [0, 8 + LANE2 as u32, 2, 3, 4, 5, 6, 7]), + 2 => simd_shuffle8(a, b, [0, 1, 8 + LANE2 as u32, 3, 4, 5, 6, 7]), + 3 => simd_shuffle8(a, b, [0, 1, 2, 8 + LANE2 as u32, 4, 5, 6, 7]), + 4 => simd_shuffle8(a, b, [0, 1, 2, 3, 8 + LANE2 as u32, 5, 6, 7]), + 5 => simd_shuffle8(a, b, [0, 1, 2, 3, 4, 8 + LANE2 as u32, 6, 7]), + 6 => simd_shuffle8(a, b, [0, 1, 2, 3, 4, 5, 8 + LANE2 as u32, 7]), + 7 => simd_shuffle8(a, b, [0, 1, 2, 3, 4, 5, 6, 8 + LANE2 as u32]), + _ => unreachable_unchecked(), + } +} + +/// Insert vector element from another vector element +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(test, assert_instr(mov, LANE1 = 0, LANE2 = 1))] +#[rustc_legacy_const_generics(1, 3)] +pub unsafe fn vcopyq_lane_u32(a: uint32x4_t, b: uint32x2_t) -> uint32x4_t { + static_assert_imm2!(LANE1); + static_assert_imm1!(LANE2); + let b: uint32x4_t = simd_shuffle4(b, b, [0, 1, 2, 3]); + match LANE1 & 0b11 { + 0 => simd_shuffle4(a, b, [4 + LANE2 as u32, 1, 2, 3]), + 1 => simd_shuffle4(a, b, [0, 4 + LANE2 as u32, 2, 3]), + 2 => simd_shuffle4(a, b, [0, 1, 4 + LANE2 as u32, 3]), + 3 => simd_shuffle4(a, b, [0, 1, 2, 4 + LANE2 as u32]), + _ => unreachable_unchecked(), + } +} + +/// Insert vector element from another vector element +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(test, assert_instr(mov, LANE1 = 0, LANE2 = 1))] +#[rustc_legacy_const_generics(1, 3)] +pub unsafe fn vcopyq_lane_p8(a: poly8x16_t, b: poly8x8_t) -> poly8x16_t { + static_assert_imm4!(LANE1); + static_assert_imm3!(LANE2); + let b: poly8x16_t = simd_shuffle16(b, b, [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15]); + match LANE1 & 0b1111 { + 0 => simd_shuffle16(a, b, [16 + LANE2 as u32, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15]), + 1 => simd_shuffle16(a, b, [0, 16 + LANE2 as u32, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15]), + 2 => simd_shuffle16(a, b, [0, 1, 16 + LANE2 as u32, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15]), + 3 => simd_shuffle16(a, b, [0, 1, 2, 16 + LANE2 as u32, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15]), + 4 => simd_shuffle16(a, b, [0, 1, 2, 3, 16 + LANE2 as u32, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15]), + 5 => simd_shuffle16(a, b, [0, 1, 2, 3, 4, 16 + LANE2 as u32, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15]), + 6 => simd_shuffle16(a, b, [0, 1, 2, 3, 4, 5, 16 + LANE2 as u32, 7, 8, 9, 10, 11, 12, 13, 14, 15]), + 7 => simd_shuffle16(a, b, [0, 1, 2, 3, 4, 5, 6, 16 + LANE2 as u32, 8, 9, 10, 11, 12, 13, 14, 15]), + 8 => simd_shuffle16(a, b, [0, 1, 2, 3, 4, 5, 6, 7, 16 + LANE2 as u32, 9, 10, 11, 12, 13, 14, 15]), + 9 => simd_shuffle16(a, b, [0, 1, 2, 3, 4, 5, 6, 7, 8, 16 + LANE2 as u32, 10, 11, 12, 13, 14, 15]), + 10 => simd_shuffle16(a, b, [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 16 + LANE2 as u32, 11, 12, 13, 14, 15]), + 11 => simd_shuffle16(a, b, [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 16 + LANE2 as u32, 12, 13, 14, 15]), + 12 => simd_shuffle16(a, b, [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 16 + LANE2 as u32, 13, 14, 15]), + 13 => simd_shuffle16(a, b, [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 16 + LANE2 as u32, 14, 15]), + 14 => simd_shuffle16(a, b, [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 16 + LANE2 as u32, 15]), + 15 => simd_shuffle16(a, b, [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 16 + LANE2 as u32]), + _ => unreachable_unchecked(), + } +} + +/// Insert vector element from another vector element +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(test, assert_instr(mov, LANE1 = 0, LANE2 = 1))] +#[rustc_legacy_const_generics(1, 3)] +pub unsafe fn vcopyq_lane_p16(a: poly16x8_t, b: poly16x4_t) -> poly16x8_t { + static_assert_imm3!(LANE1); + static_assert_imm2!(LANE2); + let b: poly16x8_t = simd_shuffle8(b, b, [0, 1, 2, 3, 4, 5, 6, 7]); + match LANE1 & 0b111 { + 0 => simd_shuffle8(a, b, [8 + LANE2 as u32, 1, 2, 3, 4, 5, 6, 7]), + 1 => simd_shuffle8(a, b, [0, 8 + LANE2 as u32, 2, 3, 4, 5, 6, 7]), + 2 => simd_shuffle8(a, b, [0, 1, 8 + LANE2 as u32, 3, 4, 5, 6, 7]), + 3 => simd_shuffle8(a, b, [0, 1, 2, 8 + LANE2 as u32, 4, 5, 6, 7]), + 4 => simd_shuffle8(a, b, [0, 1, 2, 3, 8 + LANE2 as u32, 5, 6, 7]), + 5 => simd_shuffle8(a, b, [0, 1, 2, 3, 4, 8 + LANE2 as u32, 6, 7]), + 6 => simd_shuffle8(a, b, [0, 1, 2, 3, 4, 5, 8 + LANE2 as u32, 7]), + 7 => simd_shuffle8(a, b, [0, 1, 2, 3, 4, 5, 6, 8 + LANE2 as u32]), + _ => unreachable_unchecked(), + } +} + +/// Insert vector element from another vector element +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(test, assert_instr(zip1, LANE1 = 1, LANE2 = 0))] +#[rustc_legacy_const_generics(1, 3)] +pub unsafe fn vcopyq_lane_s64(a: int64x2_t, b: int64x1_t) -> int64x2_t { + static_assert_imm1!(LANE1); + static_assert!(LANE2 : i32 where LANE2 == 0); + let b: int64x2_t = simd_shuffle2(b, b, [0, 1]); + match LANE1 & 0b1 { + 0 => simd_shuffle2(a, b, [2 + LANE2 as u32, 1]), + 1 => simd_shuffle2(a, b, [0, 2 + LANE2 as u32]), + _ => unreachable_unchecked(), + } +} + +/// Insert vector element from another vector element +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(test, assert_instr(zip1, LANE1 = 1, LANE2 = 0))] +#[rustc_legacy_const_generics(1, 3)] +pub unsafe fn vcopyq_lane_u64(a: uint64x2_t, b: uint64x1_t) -> uint64x2_t { + static_assert_imm1!(LANE1); + static_assert!(LANE2 : i32 where LANE2 == 0); + let b: uint64x2_t = simd_shuffle2(b, b, [0, 1]); + match LANE1 & 0b1 { + 0 => simd_shuffle2(a, b, [2 + LANE2 as u32, 1]), + 1 => simd_shuffle2(a, b, [0, 2 + LANE2 as u32]), + _ => unreachable_unchecked(), + } +} + +/// Insert vector element from another vector element +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(test, assert_instr(zip1, LANE1 = 1, LANE2 = 0))] +#[rustc_legacy_const_generics(1, 3)] +pub unsafe fn vcopyq_lane_p64(a: poly64x2_t, b: poly64x1_t) -> poly64x2_t { + static_assert_imm1!(LANE1); + static_assert!(LANE2 : i32 where LANE2 == 0); + let b: poly64x2_t = simd_shuffle2(b, b, [0, 1]); + match LANE1 & 0b1 { + 0 => simd_shuffle2(a, b, [2 + LANE2 as u32, 1]), + 1 => simd_shuffle2(a, b, [0, 2 + LANE2 as u32]), + _ => unreachable_unchecked(), + } +} + +/// Insert vector element from another vector element +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(test, assert_instr(mov, LANE1 = 1, LANE2 = 0))] +#[rustc_legacy_const_generics(1, 3)] +pub unsafe fn vcopyq_lane_f32(a: float32x4_t, b: float32x2_t) -> float32x4_t { + static_assert_imm2!(LANE1); + static_assert_imm1!(LANE2); + let b: float32x4_t = simd_shuffle4(b, b, [0, 1, 2, 3]); + match LANE1 & 0b11 { + 0 => simd_shuffle4(a, b, [4 + LANE2 as u32, 1, 2, 3]), + 1 => simd_shuffle4(a, b, [0, 4 + LANE2 as u32, 2, 3]), + 2 => simd_shuffle4(a, b, [0, 1, 4 + LANE2 as u32, 3]), + 3 => simd_shuffle4(a, b, [0, 1, 2, 4 + LANE2 as u32]), + _ => unreachable_unchecked(), + } +} + +/// Insert vector element from another vector element +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(test, assert_instr(zip1, LANE1 = 1, LANE2 = 0))] +#[rustc_legacy_const_generics(1, 3)] +pub unsafe fn vcopyq_lane_f64(a: float64x2_t, b: float64x1_t) -> float64x2_t { + static_assert_imm1!(LANE1); + static_assert!(LANE2 : i32 where LANE2 == 0); + let b: float64x2_t = simd_shuffle2(b, b, [0, 1]); + match LANE1 & 0b1 { + 0 => simd_shuffle2(a, b, [2 + LANE2 as u32, 1]), + 1 => simd_shuffle2(a, b, [0, 2 + LANE2 as u32]), + _ => unreachable_unchecked(), + } +} + /// Floating-point convert to higher precision long #[inline] #[target_feature(enable = "neon")] @@ -7826,6 +8688,402 @@ mod test { assert_eq!(r, e); } + #[simd_test(enable = "neon")] + unsafe fn test_vcopy_lane_s8() { + let a: i8x8 = i8x8::new(1, 2, 3, 4, 5, 6, 7, 8); + let b: i8x8 = i8x8::new(0, 0x7F, 0, 0, 0, 0, 0, 0); + let e: i8x8 = i8x8::new(0x7F, 2, 3, 4, 5, 6, 7, 8); + let r: i8x8 = transmute(vcopy_lane_s8::<0, 1>(transmute(a), transmute(b))); + assert_eq!(r, e); + } + + #[simd_test(enable = "neon")] + unsafe fn test_vcopyq_laneq_s8() { + let a: i8x16 = i8x16::new(1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16); + let b: i8x16 = i8x16::new(0, 0x7F, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0); + let e: i8x16 = i8x16::new(0x7F, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16); + let r: i8x16 = transmute(vcopyq_laneq_s8::<0, 1>(transmute(a), transmute(b))); + assert_eq!(r, e); + } + + #[simd_test(enable = "neon")] + unsafe fn test_vcopy_lane_s16() { + let a: i16x4 = i16x4::new(1, 2, 3, 4); + let b: i16x4 = i16x4::new(0, 0x7F_FF, 0, 0); + let e: i16x4 = i16x4::new(0x7F_FF, 2, 3, 4); + let r: i16x4 = transmute(vcopy_lane_s16::<0, 1>(transmute(a), transmute(b))); + assert_eq!(r, e); + } + + #[simd_test(enable = "neon")] + unsafe fn test_vcopyq_laneq_s16() { + let a: i16x8 = i16x8::new(1, 2, 3, 4, 5, 6, 7, 8); + let b: i16x8 = i16x8::new(0, 0x7F_FF, 0, 0, 0, 0, 0, 0); + let e: i16x8 = i16x8::new(0x7F_FF, 2, 3, 4, 5, 6, 7, 8); + let r: i16x8 = transmute(vcopyq_laneq_s16::<0, 1>(transmute(a), transmute(b))); + assert_eq!(r, e); + } + + #[simd_test(enable = "neon")] + unsafe fn test_vcopy_lane_s32() { + let a: i32x2 = i32x2::new(1, 2); + let b: i32x2 = i32x2::new(0, 0x7F_FF_FF_FF); + let e: i32x2 = i32x2::new(0x7F_FF_FF_FF, 2); + let r: i32x2 = transmute(vcopy_lane_s32::<0, 1>(transmute(a), transmute(b))); + assert_eq!(r, e); + } + + #[simd_test(enable = "neon")] + unsafe fn test_vcopyq_laneq_s32() { + let a: i32x4 = i32x4::new(1, 2, 3, 4); + let b: i32x4 = i32x4::new(0, 0x7F_FF_FF_FF, 0, 0); + let e: i32x4 = i32x4::new(0x7F_FF_FF_FF, 2, 3, 4); + let r: i32x4 = transmute(vcopyq_laneq_s32::<0, 1>(transmute(a), transmute(b))); + assert_eq!(r, e); + } + + #[simd_test(enable = "neon")] + unsafe fn test_vcopyq_laneq_s64() { + let a: i64x2 = i64x2::new(1, 2); + let b: i64x2 = i64x2::new(0, 0x7F_FF_FF_FF_FF_FF_FF_FF); + let e: i64x2 = i64x2::new(0x7F_FF_FF_FF_FF_FF_FF_FF, 2); + let r: i64x2 = transmute(vcopyq_laneq_s64::<0, 1>(transmute(a), transmute(b))); + assert_eq!(r, e); + } + + #[simd_test(enable = "neon")] + unsafe fn test_vcopy_lane_u8() { + let a: u8x8 = u8x8::new(1, 2, 3, 4, 5, 6, 7, 8); + let b: u8x8 = u8x8::new(0, 0xFF, 0, 0, 0, 0, 0, 0); + let e: u8x8 = u8x8::new(0xFF, 2, 3, 4, 5, 6, 7, 8); + let r: u8x8 = transmute(vcopy_lane_u8::<0, 1>(transmute(a), transmute(b))); + assert_eq!(r, e); + } + + #[simd_test(enable = "neon")] + unsafe fn test_vcopyq_laneq_u8() { + let a: u8x16 = u8x16::new(1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16); + let b: u8x16 = u8x16::new(0, 0xFF, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0); + let e: u8x16 = u8x16::new(0xFF, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16); + let r: u8x16 = transmute(vcopyq_laneq_u8::<0, 1>(transmute(a), transmute(b))); + assert_eq!(r, e); + } + + #[simd_test(enable = "neon")] + unsafe fn test_vcopy_lane_u16() { + let a: u16x4 = u16x4::new(1, 2, 3, 4); + let b: u16x4 = u16x4::new(0, 0xFF_FF, 0, 0); + let e: u16x4 = u16x4::new(0xFF_FF, 2, 3, 4); + let r: u16x4 = transmute(vcopy_lane_u16::<0, 1>(transmute(a), transmute(b))); + assert_eq!(r, e); + } + + #[simd_test(enable = "neon")] + unsafe fn test_vcopyq_laneq_u16() { + let a: u16x8 = u16x8::new(1, 2, 3, 4, 5, 6, 7, 8); + let b: u16x8 = u16x8::new(0, 0xFF_FF, 0, 0, 0, 0, 0, 0); + let e: u16x8 = u16x8::new(0xFF_FF, 2, 3, 4, 5, 6, 7, 8); + let r: u16x8 = transmute(vcopyq_laneq_u16::<0, 1>(transmute(a), transmute(b))); + assert_eq!(r, e); + } + + #[simd_test(enable = "neon")] + unsafe fn test_vcopy_lane_u32() { + let a: u32x2 = u32x2::new(1, 2); + let b: u32x2 = u32x2::new(0, 0xFF_FF_FF_FF); + let e: u32x2 = u32x2::new(0xFF_FF_FF_FF, 2); + let r: u32x2 = transmute(vcopy_lane_u32::<0, 1>(transmute(a), transmute(b))); + assert_eq!(r, e); + } + + #[simd_test(enable = "neon")] + unsafe fn test_vcopyq_laneq_u32() { + let a: u32x4 = u32x4::new(1, 2, 3, 4); + let b: u32x4 = u32x4::new(0, 0xFF_FF_FF_FF, 0, 0); + let e: u32x4 = u32x4::new(0xFF_FF_FF_FF, 2, 3, 4); + let r: u32x4 = transmute(vcopyq_laneq_u32::<0, 1>(transmute(a), transmute(b))); + assert_eq!(r, e); + } + + #[simd_test(enable = "neon")] + unsafe fn test_vcopyq_laneq_u64() { + let a: u64x2 = u64x2::new(1, 2); + let b: u64x2 = u64x2::new(0, 0xFF_FF_FF_FF_FF_FF_FF_FF); + let e: u64x2 = u64x2::new(0xFF_FF_FF_FF_FF_FF_FF_FF, 2); + let r: u64x2 = transmute(vcopyq_laneq_u64::<0, 1>(transmute(a), transmute(b))); + assert_eq!(r, e); + } + + #[simd_test(enable = "neon")] + unsafe fn test_vcopy_lane_p8() { + let a: i8x8 = i8x8::new(1, 2, 3, 4, 5, 6, 7, 8); + let b: i8x8 = i8x8::new(0, 0x7F, 0, 0, 0, 0, 0, 0); + let e: i8x8 = i8x8::new(0x7F, 2, 3, 4, 5, 6, 7, 8); + let r: i8x8 = transmute(vcopy_lane_p8::<0, 1>(transmute(a), transmute(b))); + assert_eq!(r, e); + } + + #[simd_test(enable = "neon")] + unsafe fn test_vcopyq_laneq_p8() { + let a: i8x16 = i8x16::new(1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16); + let b: i8x16 = i8x16::new(0, 0x7F, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0); + let e: i8x16 = i8x16::new(0x7F, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16); + let r: i8x16 = transmute(vcopyq_laneq_p8::<0, 1>(transmute(a), transmute(b))); + assert_eq!(r, e); + } + + #[simd_test(enable = "neon")] + unsafe fn test_vcopy_lane_p16() { + let a: i16x4 = i16x4::new(1, 2, 3, 4); + let b: i16x4 = i16x4::new(0, 0x7F_FF, 0, 0); + let e: i16x4 = i16x4::new(0x7F_FF, 2, 3, 4); + let r: i16x4 = transmute(vcopy_lane_p16::<0, 1>(transmute(a), transmute(b))); + assert_eq!(r, e); + } + + #[simd_test(enable = "neon")] + unsafe fn test_vcopyq_laneq_p16() { + let a: i16x8 = i16x8::new(1, 2, 3, 4, 5, 6, 7, 8); + let b: i16x8 = i16x8::new(0, 0x7F_FF, 0, 0, 0, 0, 0, 0); + let e: i16x8 = i16x8::new(0x7F_FF, 2, 3, 4, 5, 6, 7, 8); + let r: i16x8 = transmute(vcopyq_laneq_p16::<0, 1>(transmute(a), transmute(b))); + assert_eq!(r, e); + } + + #[simd_test(enable = "neon")] + unsafe fn test_vcopyq_laneq_p64() { + let a: i64x2 = i64x2::new(1, 2); + let b: i64x2 = i64x2::new(0, 0x7F_FF_FF_FF_FF_FF_FF_FF); + let e: i64x2 = i64x2::new(0x7F_FF_FF_FF_FF_FF_FF_FF, 2); + let r: i64x2 = transmute(vcopyq_laneq_p64::<0, 1>(transmute(a), transmute(b))); + assert_eq!(r, e); + } + + #[simd_test(enable = "neon")] + unsafe fn test_vcopy_lane_f32() { + let a: f32x2 = f32x2::new(1., 2.); + let b: f32x2 = f32x2::new(0., 0.5); + let e: f32x2 = f32x2::new(0.5, 2.); + let r: f32x2 = transmute(vcopy_lane_f32::<0, 1>(transmute(a), transmute(b))); + assert_eq!(r, e); + } + + #[simd_test(enable = "neon")] + unsafe fn test_vcopyq_laneq_f32() { + let a: f32x4 = f32x4::new(1., 2., 3., 4.); + let b: f32x4 = f32x4::new(0., 0.5, 0., 0.); + let e: f32x4 = f32x4::new(0.5, 2., 3., 4.); + let r: f32x4 = transmute(vcopyq_laneq_f32::<0, 1>(transmute(a), transmute(b))); + assert_eq!(r, e); + } + + #[simd_test(enable = "neon")] + unsafe fn test_vcopyq_laneq_f64() { + let a: f64x2 = f64x2::new(1., 2.); + let b: f64x2 = f64x2::new(0., 0.5); + let e: f64x2 = f64x2::new(0.5, 2.); + let r: f64x2 = transmute(vcopyq_laneq_f64::<0, 1>(transmute(a), transmute(b))); + assert_eq!(r, e); + } + + #[simd_test(enable = "neon")] + unsafe fn test_vcopy_laneq_s8() { + let a: i8x8 = i8x8::new(1, 2, 3, 4, 5, 6, 7, 8); + let b: i8x16 = i8x16::new(0, 0x7F, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0); + let e: i8x8 = i8x8::new(0x7F, 2, 3, 4, 5, 6, 7, 8); + let r: i8x8 = transmute(vcopy_laneq_s8::<0, 1>(transmute(a), transmute(b))); + assert_eq!(r, e); + } + + #[simd_test(enable = "neon")] + unsafe fn test_vcopy_laneq_s16() { + let a: i16x4 = i16x4::new(1, 2, 3, 4); + let b: i16x8 = i16x8::new(0, 0x7F_FF, 0, 0, 0, 0, 0, 0); + let e: i16x4 = i16x4::new(0x7F_FF, 2, 3, 4); + let r: i16x4 = transmute(vcopy_laneq_s16::<0, 1>(transmute(a), transmute(b))); + assert_eq!(r, e); + } + + #[simd_test(enable = "neon")] + unsafe fn test_vcopy_laneq_s32() { + let a: i32x2 = i32x2::new(1, 2); + let b: i32x4 = i32x4::new(0, 0x7F_FF_FF_FF, 0, 0); + let e: i32x2 = i32x2::new(0x7F_FF_FF_FF, 2); + let r: i32x2 = transmute(vcopy_laneq_s32::<0, 1>(transmute(a), transmute(b))); + assert_eq!(r, e); + } + + #[simd_test(enable = "neon")] + unsafe fn test_vcopy_laneq_u8() { + let a: u8x8 = u8x8::new(1, 2, 3, 4, 5, 6, 7, 8); + let b: u8x16 = u8x16::new(0, 0xFF, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0); + let e: u8x8 = u8x8::new(0xFF, 2, 3, 4, 5, 6, 7, 8); + let r: u8x8 = transmute(vcopy_laneq_u8::<0, 1>(transmute(a), transmute(b))); + assert_eq!(r, e); + } + + #[simd_test(enable = "neon")] + unsafe fn test_vcopy_laneq_u16() { + let a: u16x4 = u16x4::new(1, 2, 3, 4); + let b: u16x8 = u16x8::new(0, 0xFF_FF, 0, 0, 0, 0, 0, 0); + let e: u16x4 = u16x4::new(0xFF_FF, 2, 3, 4); + let r: u16x4 = transmute(vcopy_laneq_u16::<0, 1>(transmute(a), transmute(b))); + assert_eq!(r, e); + } + + #[simd_test(enable = "neon")] + unsafe fn test_vcopy_laneq_u32() { + let a: u32x2 = u32x2::new(1, 2); + let b: u32x4 = u32x4::new(0, 0xFF_FF_FF_FF, 0, 0); + let e: u32x2 = u32x2::new(0xFF_FF_FF_FF, 2); + let r: u32x2 = transmute(vcopy_laneq_u32::<0, 1>(transmute(a), transmute(b))); + assert_eq!(r, e); + } + + #[simd_test(enable = "neon")] + unsafe fn test_vcopy_laneq_p8() { + let a: i8x8 = i8x8::new(1, 2, 3, 4, 5, 6, 7, 8); + let b: i8x16 = i8x16::new(0, 0x7F, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0); + let e: i8x8 = i8x8::new(0x7F, 2, 3, 4, 5, 6, 7, 8); + let r: i8x8 = transmute(vcopy_laneq_p8::<0, 1>(transmute(a), transmute(b))); + assert_eq!(r, e); + } + + #[simd_test(enable = "neon")] + unsafe fn test_vcopy_laneq_p16() { + let a: i16x4 = i16x4::new(1, 2, 3, 4); + let b: i16x8 = i16x8::new(0, 0x7F_FF, 0, 0, 0, 0, 0, 0); + let e: i16x4 = i16x4::new(0x7F_FF, 2, 3, 4); + let r: i16x4 = transmute(vcopy_laneq_p16::<0, 1>(transmute(a), transmute(b))); + assert_eq!(r, e); + } + + #[simd_test(enable = "neon")] + unsafe fn test_vcopy_laneq_f32() { + let a: f32x2 = f32x2::new(1., 2.); + let b: f32x4 = f32x4::new(0., 0.5, 0., 0.); + let e: f32x2 = f32x2::new(0.5, 2.); + let r: f32x2 = transmute(vcopy_laneq_f32::<0, 1>(transmute(a), transmute(b))); + assert_eq!(r, e); + } + + #[simd_test(enable = "neon")] + unsafe fn test_vcopyq_lane_s8() { + let a: i8x16 = i8x16::new(1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16); + let b: i8x8 = i8x8::new(0, 0x7F, 0, 0, 0, 0, 0, 0); + let e: i8x16 = i8x16::new(0x7F, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16); + let r: i8x16 = transmute(vcopyq_lane_s8::<0, 1>(transmute(a), transmute(b))); + assert_eq!(r, e); + } + + #[simd_test(enable = "neon")] + unsafe fn test_vcopyq_lane_s16() { + let a: i16x8 = i16x8::new(1, 2, 3, 4, 5, 6, 7, 8); + let b: i16x4 = i16x4::new(0, 0x7F_FF, 0, 0); + let e: i16x8 = i16x8::new(0x7F_FF, 2, 3, 4, 5, 6, 7, 8); + let r: i16x8 = transmute(vcopyq_lane_s16::<0, 1>(transmute(a), transmute(b))); + assert_eq!(r, e); + } + + #[simd_test(enable = "neon")] + unsafe fn test_vcopyq_lane_s32() { + let a: i32x4 = i32x4::new(1, 2, 3, 4); + let b: i32x2 = i32x2::new(0, 0x7F_FF_FF_FF); + let e: i32x4 = i32x4::new(0x7F_FF_FF_FF, 2, 3, 4); + let r: i32x4 = transmute(vcopyq_lane_s32::<0, 1>(transmute(a), transmute(b))); + assert_eq!(r, e); + } + + #[simd_test(enable = "neon")] + unsafe fn test_vcopyq_lane_u8() { + let a: u8x16 = u8x16::new(1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16); + let b: u8x8 = u8x8::new(0, 0xFF, 0, 0, 0, 0, 0, 0); + let e: u8x16 = u8x16::new(0xFF, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16); + let r: u8x16 = transmute(vcopyq_lane_u8::<0, 1>(transmute(a), transmute(b))); + assert_eq!(r, e); + } + + #[simd_test(enable = "neon")] + unsafe fn test_vcopyq_lane_u16() { + let a: u16x8 = u16x8::new(1, 2, 3, 4, 5, 6, 7, 8); + let b: u16x4 = u16x4::new(0, 0xFF_FF, 0, 0); + let e: u16x8 = u16x8::new(0xFF_FF, 2, 3, 4, 5, 6, 7, 8); + let r: u16x8 = transmute(vcopyq_lane_u16::<0, 1>(transmute(a), transmute(b))); + assert_eq!(r, e); + } + + #[simd_test(enable = "neon")] + unsafe fn test_vcopyq_lane_u32() { + let a: u32x4 = u32x4::new(1, 2, 3, 4); + let b: u32x2 = u32x2::new(0, 0xFF_FF_FF_FF); + let e: u32x4 = u32x4::new(0xFF_FF_FF_FF, 2, 3, 4); + let r: u32x4 = transmute(vcopyq_lane_u32::<0, 1>(transmute(a), transmute(b))); + assert_eq!(r, e); + } + + #[simd_test(enable = "neon")] + unsafe fn test_vcopyq_lane_p8() { + let a: i8x16 = i8x16::new(1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16); + let b: i8x8 = i8x8::new(0, 0x7F, 0, 0, 0, 0, 0, 0); + let e: i8x16 = i8x16::new(0x7F, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16); + let r: i8x16 = transmute(vcopyq_lane_p8::<0, 1>(transmute(a), transmute(b))); + assert_eq!(r, e); + } + + #[simd_test(enable = "neon")] + unsafe fn test_vcopyq_lane_p16() { + let a: i16x8 = i16x8::new(1, 2, 3, 4, 5, 6, 7, 8); + let b: i16x4 = i16x4::new(0, 0x7F_FF, 0, 0); + let e: i16x8 = i16x8::new(0x7F_FF, 2, 3, 4, 5, 6, 7, 8); + let r: i16x8 = transmute(vcopyq_lane_p16::<0, 1>(transmute(a), transmute(b))); + assert_eq!(r, e); + } + + #[simd_test(enable = "neon")] + unsafe fn test_vcopyq_lane_s64() { + let a: i64x2 = i64x2::new(1, 2); + let b: i64x1 = i64x1::new(0x7F_FF_FF_FF_FF_FF_FF_FF); + let e: i64x2 = i64x2::new(1, 0x7F_FF_FF_FF_FF_FF_FF_FF); + let r: i64x2 = transmute(vcopyq_lane_s64::<1, 0>(transmute(a), transmute(b))); + assert_eq!(r, e); + } + + #[simd_test(enable = "neon")] + unsafe fn test_vcopyq_lane_u64() { + let a: u64x2 = u64x2::new(1, 2); + let b: u64x1 = u64x1::new(0xFF_FF_FF_FF_FF_FF_FF_FF); + let e: u64x2 = u64x2::new(1, 0xFF_FF_FF_FF_FF_FF_FF_FF); + let r: u64x2 = transmute(vcopyq_lane_u64::<1, 0>(transmute(a), transmute(b))); + assert_eq!(r, e); + } + + #[simd_test(enable = "neon")] + unsafe fn test_vcopyq_lane_p64() { + let a: i64x2 = i64x2::new(1, 2); + let b: i64x1 = i64x1::new(0x7F_FF_FF_FF_FF_FF_FF_FF); + let e: i64x2 = i64x2::new(1, 0x7F_FF_FF_FF_FF_FF_FF_FF); + let r: i64x2 = transmute(vcopyq_lane_p64::<1, 0>(transmute(a), transmute(b))); + assert_eq!(r, e); + } + + #[simd_test(enable = "neon")] + unsafe fn test_vcopyq_lane_f32() { + let a: f32x4 = f32x4::new(1., 2., 3., 4.); + let b: f32x2 = f32x2::new(0.5, 0.); + let e: f32x4 = f32x4::new(1., 0.5, 3., 4.); + let r: f32x4 = transmute(vcopyq_lane_f32::<1, 0>(transmute(a), transmute(b))); + assert_eq!(r, e); + } + + #[simd_test(enable = "neon")] + unsafe fn test_vcopyq_lane_f64() { + let a: f64x2 = f64x2::new(1., 2.); + let b: f64 = 0.5; + let e: f64x2 = f64x2::new(1., 0.5); + let r: f64x2 = transmute(vcopyq_lane_f64::<1, 0>(transmute(a), transmute(b))); + assert_eq!(r, e); + } + #[simd_test(enable = "neon")] unsafe fn test_vcvt_f64_f32() { let a: f32x2 = f32x2::new(-1.2, 1.2); diff --git a/crates/core_arch/src/aarch64/neon/mod.rs b/crates/core_arch/src/aarch64/neon/mod.rs index 173003dff0..77a982f326 100644 --- a/crates/core_arch/src/aarch64/neon/mod.rs +++ b/crates/core_arch/src/aarch64/neon/mod.rs @@ -359,6 +359,118 @@ extern "C" { fn vsriq_n_s64_(a: int64x2_t, b: int64x2_t, n: i32) -> int64x2_t; } +/// Duplicate vector element to vector or scalar +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(test, assert_instr(nop, N1 = 0, N2 = 0))] +#[rustc_legacy_const_generics(1, 3)] +pub unsafe fn vcopy_lane_s64( + _a: int64x1_t, + b: int64x1_t, +) -> int64x1_t { + static_assert!(N1 : i32 where N1 == 0); + static_assert!(N2 : i32 where N2 == 0); + b +} + +/// Duplicate vector element to vector or scalar +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(test, assert_instr(nop, N1 = 0, N2 = 0))] +#[rustc_legacy_const_generics(1, 3)] +pub unsafe fn vcopy_lane_u64( + _a: uint64x1_t, + b: uint64x1_t, +) -> uint64x1_t { + static_assert!(N1 : i32 where N1 == 0); + static_assert!(N2 : i32 where N2 == 0); + b +} + +/// Duplicate vector element to vector or scalar +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(test, assert_instr(nop, N1 = 0, N2 = 0))] +#[rustc_legacy_const_generics(1, 3)] +pub unsafe fn vcopy_lane_p64( + _a: poly64x1_t, + b: poly64x1_t, +) -> poly64x1_t { + static_assert!(N1 : i32 where N1 == 0); + static_assert!(N2 : i32 where N2 == 0); + b +} + +/// Duplicate vector element to vector or scalar +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(test, assert_instr(nop, N1 = 0, N2 = 0))] +#[rustc_legacy_const_generics(1, 3)] +pub unsafe fn vcopy_lane_f64( + _a: float64x1_t, + b: float64x1_t, +) -> float64x1_t { + static_assert!(N1 : i32 where N1 == 0); + static_assert!(N2 : i32 where N2 == 0); + b +} + +/// Duplicate vector element to vector or scalar +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(test, assert_instr(nop, LANE1 = 0, LANE2 = 1))] +#[rustc_legacy_const_generics(1, 3)] +pub unsafe fn vcopy_laneq_s64( + _a: int64x1_t, + b: int64x2_t, +) -> int64x1_t { + static_assert!(LANE1 : i32 where LANE1 == 0); + static_assert_imm1!(LANE2); + transmute::(simd_extract(b, LANE2 as u32)) +} + +/// Duplicate vector element to vector or scalar +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(test, assert_instr(nop, LANE1 = 0, LANE2 = 1))] +#[rustc_legacy_const_generics(1, 3)] +pub unsafe fn vcopy_laneq_u64( + _a: uint64x1_t, + b: uint64x2_t, +) -> uint64x1_t { + static_assert!(LANE1 : i32 where LANE1 == 0); + static_assert_imm1!(LANE2); + transmute::(simd_extract(b, LANE2 as u32)) +} + +/// Duplicate vector element to vector or scalar +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(test, assert_instr(nop, LANE1 = 0, LANE2 = 1))] +#[rustc_legacy_const_generics(1, 3)] +pub unsafe fn vcopy_laneq_p64( + _a: poly64x1_t, + b: poly64x2_t, +) -> poly64x1_t { + static_assert!(LANE1 : i32 where LANE1 == 0); + static_assert_imm1!(LANE2); + transmute::(simd_extract(b, LANE2 as u32)) +} + +/// Duplicate vector element to vector or scalar +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(test, assert_instr(nop, LANE1 = 0, LANE2 = 1))] +#[rustc_legacy_const_generics(1, 3)] +pub unsafe fn vcopy_laneq_f64( + _a: float64x1_t, + b: float64x2_t, +) -> float64x1_t { + static_assert!(LANE1 : i32 where LANE1 == 0); + static_assert_imm1!(LANE2); + transmute::(simd_extract(b, LANE2 as u32)) +} + /// Load multiple single-element structures to one, two, three, or four registers. #[inline] #[target_feature(enable = "neon")] @@ -3793,6 +3905,78 @@ mod tests { assert_eq!(r, e); } + #[simd_test(enable = "neon")] + unsafe fn test_vcopy_lane_s64() { + let a: i64x1 = i64x1::new(1); + let b: i64x1 = i64x1::new(0x7F_FF_FF_FF_FF_FF_FF_FF); + let e: i64x1 = i64x1::new(0x7F_FF_FF_FF_FF_FF_FF_FF); + let r: i64x1 = transmute(vcopy_lane_s64::<0, 0>(transmute(a), transmute(b))); + assert_eq!(r, e); + } + + #[simd_test(enable = "neon")] + unsafe fn test_vcopy_lane_u64() { + let a: u64x1 = u64x1::new(1); + let b: u64x1 = u64x1::new(0xFF_FF_FF_FF_FF_FF_FF_FF); + let e: u64x1 = u64x1::new(0xFF_FF_FF_FF_FF_FF_FF_FF); + let r: u64x1 = transmute(vcopy_lane_u64::<0, 0>(transmute(a), transmute(b))); + assert_eq!(r, e); + } + + #[simd_test(enable = "neon")] + unsafe fn test_vcopy_lane_p64() { + let a: i64x1 = i64x1::new(1); + let b: i64x1 = i64x1::new(0x7F_FF_FF_FF_FF_FF_FF_FF); + let e: i64x1 = i64x1::new(0x7F_FF_FF_FF_FF_FF_FF_FF); + let r: i64x1 = transmute(vcopy_lane_p64::<0, 0>(transmute(a), transmute(b))); + assert_eq!(r, e); + } + + #[simd_test(enable = "neon")] + unsafe fn test_vcopy_lane_f64() { + let a: f64 = 1.; + let b: f64 = 0.; + let e: f64 = 0.; + let r: f64 = transmute(vcopy_lane_f64::<0, 0>(transmute(a), transmute(b))); + assert_eq!(r, e); + } + + #[simd_test(enable = "neon")] + unsafe fn test_vcopy_laneq_s64() { + let a: i64x1 = i64x1::new(1); + let b: i64x2 = i64x2::new(0, 0x7F_FF_FF_FF_FF_FF_FF_FF); + let e: i64x1 = i64x1::new(0x7F_FF_FF_FF_FF_FF_FF_FF); + let r: i64x1 = transmute(vcopy_laneq_s64::<0, 1>(transmute(a), transmute(b))); + assert_eq!(r, e); + } + + #[simd_test(enable = "neon")] + unsafe fn test_vcopy_laneq_u64() { + let a: u64x1 = u64x1::new(1); + let b: u64x2 = u64x2::new(0, 0xFF_FF_FF_FF_FF_FF_FF_FF); + let e: u64x1 = u64x1::new(0xFF_FF_FF_FF_FF_FF_FF_FF); + let r: u64x1 = transmute(vcopy_laneq_u64::<0, 1>(transmute(a), transmute(b))); + assert_eq!(r, e); + } + + #[simd_test(enable = "neon")] + unsafe fn test_vcopy_laneq_p64() { + let a: i64x1 = i64x1::new(1); + let b: i64x2 = i64x2::new(0, 0x7F_FF_FF_FF_FF_FF_FF_FF); + let e: i64x1 = i64x1::new(0x7F_FF_FF_FF_FF_FF_FF_FF); + let r: i64x1 = transmute(vcopy_laneq_p64::<0, 1>(transmute(a), transmute(b))); + assert_eq!(r, e); + } + + #[simd_test(enable = "neon")] + unsafe fn test_vcopy_laneq_f64() { + let a: f64 = 1.; + let b: f64x2 = f64x2::new(0., 0.5); + let e: f64 = 0.5; + let r: f64 = transmute(vcopy_laneq_f64::<0, 1>(transmute(a), transmute(b))); + assert_eq!(r, e); + } + #[simd_test(enable = "neon")] unsafe fn test_vceq_u64() { test_cmp_u64( diff --git a/crates/stdarch-gen/neon.spec b/crates/stdarch-gen/neon.spec index fcc61fb770..cf847a3d38 100644 --- a/crates/stdarch-gen/neon.spec +++ b/crates/stdarch-gen/neon.spec @@ -721,6 +721,124 @@ generate float64x1_t:uint64x1_t, float64x2_t:uint64x2_t arm = vacge.s generate float32x2_t:uint32x2_t, float32x4_t:uint32x4_t +/// Insert vector element from another vector element +name = vcopy +lane-suffixes +constn = LANE1:LANE2 +multi_fn = static_assert_imm-in0_exp_len-LANE1 +multi_fn = static_assert_imm-in_exp_len-LANE2 +multi_fn = matchn-in0_exp_len-LANE1, simd_shuffle-out_len-noext, a, b, {ins-in0_len-in0_len-LANE2} +a = 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16 +b = 0, MAX, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 +n = 0:1 +validate MAX, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16 + +aarch64 = mov +generate int8x8_t, int8x16_t, int16x4_t, int16x8_t, int32x2_t, int32x4_t, int64x2_t +generate uint8x8_t, uint8x16_t, uint16x4_t, uint16x8_t, uint32x2_t, uint32x4_t, uint64x2_t +generate poly8x8_t, poly8x16_t, poly16x4_t, poly16x8_t, poly64x2_t + +/// Insert vector element from another vector element +name = vcopy +lane-suffixes +constn = LANE1:LANE2 +multi_fn = static_assert_imm-in0_exp_len-LANE1 +multi_fn = static_assert_imm-in_exp_len-LANE2 +multi_fn = matchn-in0_exp_len-LANE1, simd_shuffle-out_len-noext, a, b, {ins-in0_len-in0_len-LANE2} +a = 1., 2., 3., 4. +b = 0., 0.5, 0., 0. +n = 0:1 +validate 0.5, 2., 3., 4. + +aarch64 = mov +generate float32x2_t, float32x4_t, float64x2_t + +/// Insert vector element from another vector element +name = vcopy +lane-suffixes +constn = LANE1:LANE2 +multi_fn = static_assert_imm-in0_exp_len-LANE1 +multi_fn = static_assert_imm-in_exp_len-LANE2 +multi_fn = simd_shuffle-in_len-noext, a:in_t, a, a, {asc-0-in_len} +multi_fn = matchn-in0_exp_len-LANE1, simd_shuffle-out_len-noext, a, b, {ins-in0_len-in_len-LANE2} +a = 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16 +b = 0, MAX, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 +n = 0:1 +validate MAX, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16 + +aarch64 = mov +generate int8x8_t:int8x16_t:int8x8_t, int16x4_t:int16x8_t:int16x4_t, int32x2_t:int32x4_t:int32x2_t +generate uint8x8_t:uint8x16_t:uint8x8_t, uint16x4_t:uint16x8_t:uint16x4_t, uint32x2_t:uint32x4_t:uint32x2_t +generate poly8x8_t:poly8x16_t:poly8x8_t, poly16x4_t:poly16x8_t:poly16x4_t + +/// Insert vector element from another vector element +name = vcopy +lane-suffixes +constn = LANE1:LANE2 +multi_fn = static_assert_imm-in0_exp_len-LANE1 +multi_fn = static_assert_imm-in_exp_len-LANE2 +multi_fn = simd_shuffle-in_len-noext, a:in_t, a, a, {asc-0-in_len} +multi_fn = matchn-in0_exp_len-LANE1, simd_shuffle-out_len-noext, a, b, {ins-in0_len-in_len-LANE2} +a = 1., 2., 3., 4. +b = 0., 0.5, 0., 0. +n = 0:1 +validate 0.5, 2., 3., 4. + +aarch64 = mov +generate float32x2_t:float32x4_t:float32x2_t + +/// Insert vector element from another vector element +name = vcopy +lane-suffixes +constn = LANE1:LANE2 +multi_fn = static_assert_imm-in0_exp_len-LANE1 +multi_fn = static_assert_imm-in_exp_len-LANE2 +multi_fn = simd_shuffle-in0_len-noext, b:in_t0, b, b, {asc-0-in0_len} +multi_fn = matchn-in0_exp_len-LANE1, simd_shuffle-out_len-noext, a, b, {ins-in0_len-in0_len-LANE2} +a = 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16 +b = 0, MAX, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 +n = 0:1 +validate MAX, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16 + +aarch64 = mov +generate int8x16_t:int8x8_t:int8x16_t, int16x8_t:int16x4_t:int16x8_t, int32x4_t:int32x2_t:int32x4_t +generate uint8x16_t:uint8x8_t:uint8x16_t, uint16x8_t:uint16x4_t:uint16x8_t, uint32x4_t:uint32x2_t:uint32x4_t +generate poly8x16_t:poly8x8_t:poly8x16_t, poly16x8_t:poly16x4_t:poly16x8_t + +/// Insert vector element from another vector element +name = vcopy +lane-suffixes +constn = LANE1:LANE2 +multi_fn = static_assert_imm-in0_exp_len-LANE1 +multi_fn = static_assert_imm-in_exp_len-LANE2 +multi_fn = simd_shuffle-in0_len-noext, b:in_t0, b, b, {asc-0-in0_len} +multi_fn = matchn-in0_exp_len-LANE1, simd_shuffle-out_len-noext, a, b, {ins-in0_len-in0_len-LANE2} +a = 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16 +b = MAX, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 +n = 1:0 +validate 1, MAX + +aarch64 = zip1 +generate int64x2_t:int64x1_t:int64x2_t, uint64x2_t:uint64x1_t:uint64x2_t, poly64x2_t:poly64x1_t:poly64x2_t + +/// Insert vector element from another vector element +name = vcopy +lane-suffixes +constn = LANE1:LANE2 +multi_fn = static_assert_imm-in0_exp_len-LANE1 +multi_fn = static_assert_imm-in_exp_len-LANE2 +multi_fn = simd_shuffle-in0_len-noext, b:in_t0, b, b, {asc-0-in0_len} +multi_fn = matchn-in0_exp_len-LANE1, simd_shuffle-out_len-noext, a, b, {ins-in0_len-in0_len-LANE2} +a = 1., 2., 3., 4. +b = 0.5, 0., 0., 0. +n = 1:0 +validate 1., 0.5, 3., 4. + +aarch64 = mov +generate float32x4_t:float32x2_t:float32x4_t +aarch64 = zip1 +generate float64x2_t:float64x1_t:float64x2_t + /// Floating-point convert to higher precision long name = vcvt double-suffixes @@ -1036,7 +1154,7 @@ generate float32x2_t:f32, float32x4_t:f32, float64x1_t:f64, float64x2_t:f64 name = vext constn = N multi_fn = static_assert_imm-out_exp_len-N -multi_fn = matchn-out_exp_len, simd_shuffle-out_len-noext, a, b, {asc-n-out_len} +multi_fn = matchn-out_exp_len-N, simd_shuffle-out_len-noext, a, b, {asc-n-out_len} a = 0, 8, 8, 9, 8, 9, 9, 11, 8, 9, 9, 11, 9, 11, 14, 15 b = 9, 11, 14, 15, 16, 17, 18, 19, 0, 8, 8, 9, 8, 9, 9, 11 n = HFLEN @@ -1050,7 +1168,7 @@ generate int*_t, uint*_t, poly8x8_t, poly8x16_t, poly16x4_t, poly16x8_t name = vext constn = N multi_fn = static_assert_imm-out_exp_len-N -multi_fn = matchn-out_exp_len, simd_shuffle-out_len-noext, a, b, {asc-n-out_len} +multi_fn = matchn-out_exp_len-N, simd_shuffle-out_len-noext, a, b, {asc-n-out_len} a = 0, 8, 8, 9, 8, 9, 9, 11, 8, 9, 9, 11, 9, 11, 14, 15 b = 9, 11, 14, 15, 16, 17, 18, 19, 0, 8, 8, 9, 8, 9, 9, 11 n = HFLEN @@ -1066,7 +1184,7 @@ generate int64x2_t, uint64x2_t name = vext constn = N multi_fn = static_assert_imm-out_exp_len-N -multi_fn = matchn-out_exp_len, simd_shuffle-out_len-noext, a, b, {asc-n-out_len} +multi_fn = matchn-out_exp_len-N, simd_shuffle-out_len-noext, a, b, {asc-n-out_len} a = 0., 2., 2., 3. b = 3., 4., 5., 6., n = HFLEN diff --git a/crates/stdarch-gen/src/main.rs b/crates/stdarch-gen/src/main.rs index d8326a3e2b..f659bab99b 100644 --- a/crates/stdarch-gen/src/main.rs +++ b/crates/stdarch-gen/src/main.rs @@ -949,21 +949,50 @@ fn gen_aarch64( String::new() }; let const_declare = if let Some(constn) = constn { - format!(r#""#, constn) + if constn.contains(":") { + let constns: Vec<_> = constn.split(':').map(|v| v.to_string()).collect(); + assert_eq!(constns.len(), 2); + format!(r#""#, constns[0], constns[1]) + } else { + format!(r#""#, constn) + } } else { String::new() }; let const_assert = if let Some(constn) = constn { - format!( - r#", {} = {}"#, - constn, - map_val(in_t[1], current_tests[0].3.as_ref().unwrap()) - ) + if constn.contains(":") { + let constns: Vec<_> = constn.split(':').map(|v| v.to_string()).collect(); + let const_test = current_tests[0].3.as_ref().unwrap(); + let const_tests: Vec<_> = const_test.split(':').map(|v| v.to_string()).collect(); + assert_eq!(constns.len(), 2); + assert_eq!(const_tests.len(), 2); + format!( + r#", {} = {}, {} = {}"#, + constns[0], + map_val(in_t[1], &const_tests[0]), + constns[1], + map_val(in_t[1], &const_tests[1]), + ) + } else { + format!( + r#", {} = {}"#, + constn, + map_val(in_t[1], current_tests[0].3.as_ref().unwrap()) + ) + } } else { String::new() }; - let const_legacy = if constn.is_some() { - format!("\n#[rustc_legacy_const_generics({})]", para_num) + let const_legacy = if let Some(constn) = constn { + if constn.contains(":") { + format!( + "\n#[rustc_legacy_const_generics({}, {})]", + para_num - 1, + para_num + 1 + ) + } else { + format!("\n#[rustc_legacy_const_generics({})]", para_num) + } } else { String::new() }; @@ -1105,7 +1134,16 @@ fn gen_test( let c: Vec = c.iter().take(len_in[2]).cloned().collect(); let e: Vec = e.iter().take(len_out).cloned().collect(); let const_value = if let Some(constn) = n { - format!(r#"::<{}>"#, map_val(in_t[1], constn)) + if constn.contains(":") { + let constns: Vec<_> = constn.split(':').map(|v| v.to_string()).collect(); + format!( + r#"::<{}, {}>"#, + map_val(in_t[1], &constns[0]), + map_val(in_t[1], &constns[1]) + ) + } else { + format!(r#"::<{}>"#, map_val(in_t[1], constn)) + } } else { String::new() }; @@ -1739,11 +1777,41 @@ fn get_call( let len = match &*fn_format[2] { "out_len" => type_len(out_t), "in_len" => type_len(in_t[1]), + "in0_len" => type_len(in_t[0]), "halflen" => type_len(in_t[1]) / 2, _ => 0, }; return asc(start, len); } + if fn_name.starts_with("ins") { + let fn_format: Vec<_> = fn_name.split('-').map(|v| v.to_string()).collect(); + let n = n.unwrap(); + let len = match &*fn_format[1] { + "out_len" => type_len(out_t), + "in_len" => type_len(in_t[1]), + "in0_len" => type_len(in_t[0]), + _ => 0, + }; + let offset = match &*fn_format[2] { + "out_len" => type_len(out_t), + "in_len" => type_len(in_t[1]), + "in0_len" => type_len(in_t[0]), + _ => 0, + }; + let mut s = String::from("["); + for i in 0..len { + if i != 0 { + s.push_str(", "); + } + if i == n as usize { + s.push_str(&format!("{} + {} as u32", offset.to_string(), fn_format[3])); + } else { + s.push_str(&i.to_string()); + } + } + s.push_str("]"); + return s; + } if fn_name.starts_with("static_assert_imm") { let fn_format: Vec<_> = fn_name.split('-').map(|v| v.to_string()).collect(); let len = match &*fn_format[1] { @@ -1751,6 +1819,8 @@ fn get_call( "out_bits_exp_len" => type_bits_exp_len(out_t), "in_exp_len" => type_exp_len(in_t[1]), "in_bits_exp_len" => type_bits_exp_len(in_t[1]), + "in0_exp_len" => type_exp_len(in_t[0]), + "in1_exp_len" => type_exp_len(in_t[1]), "in2_exp_len" => type_exp_len(in_t[2]), _ => 0, }; @@ -1796,9 +1866,10 @@ fn get_call( let len = match &*fn_format[1] { "out_exp_len" => type_exp_len(out_t), "in_exp_len" => type_exp_len(in_t[1]), + "in0_exp_len" => type_exp_len(in_t[0]), _ => 0, }; - let mut call = format!("match N & 0b{} {{\n", "1".repeat(len)); + let mut call = format!("match {} & 0b{} {{\n", &fn_format[2], "1".repeat(len)); let mut sub_call = String::new(); for p in 1..params.len() { if !sub_call.is_empty() { @@ -1946,6 +2017,8 @@ fn get_call( } else if fn_format[1] == "nosuffix" { } else if fn_format[1] == "in_len" { fn_name.push_str(&type_len(in_t[1]).to_string()); + } else if fn_format[1] == "in0_len" { + fn_name.push_str(&type_len(in_t[0]).to_string()); } else if fn_format[1] == "out_len" { fn_name.push_str(&type_len(out_t).to_string()); } else if fn_format[1] == "halflen" {