@@ -583,26 +583,66 @@ pub unsafe fn _mm256_cvtepi8_epi32(a: i8x16) -> i32x8 {
583
583
simd_cast :: < :: v64:: i8x8 , _ > ( simd_shuffle8 ( a, a, [ 0 , 1 , 2 , 3 , 4 , 5 , 6 , 7 ] ) )
584
584
}
585
585
586
- /// An i8x4 type is pretty useless, but we need it as an intermediate type in
587
- /// _mm256_cvtepi8_epi64.
588
- #[ repr( simd) ]
589
- #[ allow( non_camel_case_types) ]
590
- struct i8x4 ( i8 , i8 , i8 , i8 ) ;
591
-
592
586
/// Sign-extend 8-bit integers to 64-bit integers.
593
587
#[ inline( always) ]
594
588
#[ target_feature = "+avx2" ]
595
589
#[ cfg_attr( test, assert_instr( vpmovsxbq) ) ]
596
590
pub unsafe fn _mm256_cvtepi8_epi64 ( a : i8x16 ) -> i64x4 {
597
- simd_cast :: < i8x4 , _ > ( simd_shuffle4 ( a, a, [ 0 , 1 , 2 , 3 ] ) )
591
+ simd_cast :: < :: v32:: i8x4 , _ > ( simd_shuffle4 ( a, a, [ 0 , 1 , 2 , 3 ] ) )
592
+ }
593
+
594
+ /// Zero-extend the lower four unsigned 16-bit integers in `a` to 32-bit
595
+ /// integers. The upper four elements of `a` are unused.
596
+ #[ inline( always) ]
597
+ #[ target_feature = "+avx2" ]
598
+ #[ cfg_attr( test, assert_instr( vpmovzxwd) ) ]
599
+ pub unsafe fn _mm256_cvtepu16_epi32 ( a : u16x8 ) -> i32x8 {
600
+ simd_cast ( a)
601
+ }
602
+
603
+ /// Zero-extend the lower four unsigned 16-bit integers in `a` to 64-bit
604
+ /// integers. The upper four elements of `a` are unused.
605
+ #[ inline( always) ]
606
+ #[ target_feature = "+avx2" ]
607
+ #[ cfg_attr( test, assert_instr( vpmovzxwq) ) ]
608
+ pub unsafe fn _mm256_cvtepu16_epi64 ( a : u16x8 ) -> i64x4 {
609
+ simd_cast :: < :: v64:: u16x4 , _ > ( simd_shuffle4 ( a, a, [ 0 , 1 , 2 , 3 ] ) )
610
+ }
611
+
612
+ /// Zero-extend unsigned 32-bit integers in `a` to 64-bit integers.
613
+ #[ inline( always) ]
614
+ #[ target_feature = "+avx2" ]
615
+ #[ cfg_attr( test, assert_instr( vpmovzxdq) ) ]
616
+ pub unsafe fn _mm256_cvtepu32_epi64 ( a : u32x4 ) -> i64x4 {
617
+ simd_cast ( a)
618
+ }
619
+
620
+ /// Zero-extend unsigned 8-bit integers in `a` to 16-bit integers.
621
+ #[ inline( always) ]
622
+ #[ target_feature = "+avx2" ]
623
+ #[ cfg_attr( test, assert_instr( vpmovzxbw) ) ]
624
+ pub unsafe fn _mm256_cvtepu8_epi16 ( a : u8x16 ) -> i16x16 {
625
+ simd_cast ( a)
626
+ }
627
+
628
+ /// Zero-extend the lower eight unsigned 8-bit integers in `a` to 32-bit
629
+ /// integers. The upper eight elements of `a` are unused.
630
+ #[ inline( always) ]
631
+ #[ target_feature = "+avx2" ]
632
+ #[ cfg_attr( test, assert_instr( vpmovzxbd) ) ]
633
+ pub unsafe fn _mm256_cvtepu8_epi32 ( a : u8x16 ) -> i32x8 {
634
+ simd_cast :: < :: v64:: u8x8 , _ > ( simd_shuffle8 ( a, a, [ 0 , 1 , 2 , 3 , 4 , 5 , 6 , 7 ] ) )
635
+ }
636
+
637
+ /// Zero-extend the lower four unsigned 8-bit integers in `a` to 64-bit
638
+ /// integers. The upper twelve elements of `a` are unused.
639
+ #[ inline( always) ]
640
+ #[ target_feature = "+avx2" ]
641
+ #[ cfg_attr( test, assert_instr( vpmovzxbq) ) ]
642
+ pub unsafe fn _mm256_cvtepu8_epi64 ( a : u8x16 ) -> i64x4 {
643
+ simd_cast :: < :: v32:: u8x4 , _ > ( simd_shuffle4 ( a, a, [ 0 , 1 , 2 , 3 ] ) )
598
644
}
599
645
600
- // TODO _mm256_cvtepu16_epi32
601
- // TODO _mm256_cvtepu16_epi64
602
- // TODO _mm256_cvtepu32_epi64
603
- // TODO _mm256_cvtepu8_epi16
604
- // TODO _mm256_cvtepu8_epi32
605
- // TODO _mm256_cvtepu8_epi64
606
646
// TODO _m128i _mm256_extracti128_si256
607
647
608
648
/// Horizontally add adjacent pairs of 16-bit integers in `a` and `b`.
@@ -2738,6 +2778,52 @@ mod tests {
2738
2778
assert_eq ! ( r, avx2:: _mm256_cvtepi32_epi64( a) ) ;
2739
2779
}
2740
2780
2781
+ #[ simd_test = "avx2" ]
2782
+ unsafe fn _mm256_cvtepu16_epi32 ( ) {
2783
+ let a = u16x8:: new ( 0 , 1 , 2 , 3 , 4 , 5 , 6 , 7 ) ;
2784
+ let r = i32x8:: new ( 0 , 1 , 2 , 3 , 4 , 5 , 6 , 7 ) ;
2785
+ assert_eq ! ( r, avx2:: _mm256_cvtepu16_epi32( a) ) ;
2786
+ }
2787
+
2788
+ #[ simd_test = "avx2" ]
2789
+ unsafe fn _mm256_cvtepu16_epi64 ( ) {
2790
+ let a = u16x8:: new ( 0 , 1 , 2 , 3 , 4 , 5 , 6 , 7 ) ;
2791
+ let r = i64x4:: new ( 0 , 1 , 2 , 3 ) ;
2792
+ assert_eq ! ( r, avx2:: _mm256_cvtepu16_epi64( a) ) ;
2793
+ }
2794
+
2795
+ #[ simd_test = "avx2" ]
2796
+ unsafe fn _mm256_cvtepu32_epi64 ( ) {
2797
+ let a = u32x4:: new ( 0 , 1 , 2 , 3 ) ;
2798
+ let r = i64x4:: new ( 0 , 1 , 2 , 3 ) ;
2799
+ assert_eq ! ( r, avx2:: _mm256_cvtepu32_epi64( a) ) ;
2800
+ }
2801
+
2802
+ #[ simd_test = "avx2" ]
2803
+ unsafe fn _mm256_cvtepu8_epi16 ( ) {
2804
+ let a =
2805
+ u8x16:: new ( 0 , 1 , 2 , 3 , 4 , 5 , 6 , 7 , 8 , 9 , 10 , 11 , 12 , 13 , 14 , 15 ) ;
2806
+ let r =
2807
+ i16x16:: new ( 0 , 1 , 2 , 3 , 4 , 5 , 6 , 7 , 8 , 9 , 10 , 11 , 12 , 13 , 14 , 15 ) ;
2808
+ assert_eq ! ( r, avx2:: _mm256_cvtepu8_epi16( a) ) ;
2809
+ }
2810
+
2811
+ #[ simd_test = "avx2" ]
2812
+ unsafe fn _mm256_cvtepu8_epi32 ( ) {
2813
+ let a =
2814
+ u8x16:: new ( 0 , 1 , 2 , 3 , 4 , 5 , 6 , 7 , 8 , 9 , 10 , 11 , 12 , 13 , 14 , 15 ) ;
2815
+ let r = i32x8:: new ( 0 , 1 , 2 , 3 , 4 , 5 , 6 , 7 ) ;
2816
+ assert_eq ! ( r, avx2:: _mm256_cvtepu8_epi32( a) ) ;
2817
+ }
2818
+
2819
+ #[ simd_test = "avx2" ]
2820
+ unsafe fn _mm256_cvtepu8_epi64 ( ) {
2821
+ let a =
2822
+ u8x16:: new ( 0 , 1 , 2 , 3 , 4 , 5 , 6 , 7 , 8 , 9 , 10 , 11 , 12 , 13 , 14 , 15 ) ;
2823
+ let r = i64x4:: new ( 0 , 1 , 2 , 3 ) ;
2824
+ assert_eq ! ( r, avx2:: _mm256_cvtepu8_epi64( a) ) ;
2825
+ }
2826
+
2741
2827
#[ simd_test = "avx2" ]
2742
2828
unsafe fn _mm256_hadd_epi16 ( ) {
2743
2829
let a = i16x16:: splat ( 2 ) ;
0 commit comments