Skip to content
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.

Commit c934f5f

Browse files
committedMar 1, 2021
convert x86::sse41 intrinsics to const generics
1 parent cfc519a commit c934f5f

File tree

2 files changed

+98
-168
lines changed

2 files changed

+98
-168
lines changed
 

‎crates/core_arch/src/x86/avx2.rs

Lines changed: 5 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -4415,14 +4415,14 @@ mod tests {
44154415

44164416
#[simd_test(enable = "avx2")]
44174417
unsafe fn test_mm_broadcastb_epi8() {
4418-
let a = _mm_insert_epi8(_mm_set1_epi8(0x00), 0x2a, 0);
4418+
let a = _mm_insert_epi8::<0>(_mm_set1_epi8(0x00), 0x2a);
44194419
let res = _mm_broadcastb_epi8(a);
44204420
assert_eq_m128i(res, _mm_set1_epi8(0x2a));
44214421
}
44224422

44234423
#[simd_test(enable = "avx2")]
44244424
unsafe fn test_mm256_broadcastb_epi8() {
4425-
let a = _mm_insert_epi8(_mm_set1_epi8(0x00), 0x2a, 0);
4425+
let a = _mm_insert_epi8::<0>(_mm_set1_epi8(0x00), 0x2a);
44264426
let res = _mm256_broadcastb_epi8(a);
44274427
assert_eq_m256i(res, _mm256_set1_epi8(0x2a));
44284428
}
@@ -5204,7 +5204,7 @@ mod tests {
52045204
#[simd_test(enable = "avx2")]
52055205
unsafe fn test_mm256_sll_epi32() {
52065206
let a = _mm256_set1_epi32(0xFFFF);
5207-
let b = _mm_insert_epi32(_mm_set1_epi32(0), 4, 0);
5207+
let b = _mm_insert_epi32::<0>(_mm_set1_epi32(0), 4);
52085208
let r = _mm256_sll_epi32(a, b);
52095209
assert_eq_m256i(r, _mm256_set1_epi32(0xFFFF0));
52105210
}
@@ -5295,7 +5295,7 @@ mod tests {
52955295
#[simd_test(enable = "avx2")]
52965296
unsafe fn test_mm256_sra_epi32() {
52975297
let a = _mm256_set1_epi32(-1);
5298-
let b = _mm_insert_epi32(_mm_set1_epi32(0), 1, 0);
5298+
let b = _mm_insert_epi32::<0>(_mm_set1_epi32(0), 1);
52995299
let r = _mm256_sra_epi32(a, b);
53005300
assert_eq_m256i(r, _mm256_set1_epi32(-1));
53015301
}
@@ -5365,7 +5365,7 @@ mod tests {
53655365
#[simd_test(enable = "avx2")]
53665366
unsafe fn test_mm256_srl_epi32() {
53675367
let a = _mm256_set1_epi32(0xFFFF);
5368-
let b = _mm_insert_epi32(_mm_set1_epi32(0), 4, 0);
5368+
let b = _mm_insert_epi32::<0>(_mm_set1_epi32(0), 4);
53695369
let r = _mm256_srl_epi32(a, b);
53705370
assert_eq_m256i(r, _mm256_set1_epi32(0xFFF));
53715371
}

‎crates/core_arch/src/x86/sse41.rs

Lines changed: 93 additions & 163 deletions
Original file line numberDiff line numberDiff line change
@@ -78,17 +78,11 @@ pub unsafe fn _mm_blendv_epi8(a: __m128i, b: __m128i, mask: __m128i) -> __m128i
7878
// see https://bugs.llvm.org/show_bug.cgi?id=38195
7979
// #[cfg_attr(test, assert_instr(pblendw, imm8 = 0xF0))]
8080
#[cfg_attr(test, assert_instr(blendps, imm8 = 0xF0))]
81-
#[rustc_args_required_const(2)]
81+
#[rustc_legacy_const_generics(2)]
8282
#[stable(feature = "simd_x86", since = "1.27.0")]
83-
pub unsafe fn _mm_blend_epi16(a: __m128i, b: __m128i, imm8: i32) -> __m128i {
84-
let a = a.as_i16x8();
85-
let b = b.as_i16x8();
86-
macro_rules! call {
87-
($imm8:expr) => {
88-
pblendw(a, b, $imm8)
89-
};
90-
}
91-
transmute(constify_imm8!(imm8, call))
83+
pub unsafe fn _mm_blend_epi16<const imm8: i32>(a: __m128i, b: __m128i) -> __m128i {
84+
static_assert_imm8!(imm8);
85+
transmute(pblendw(a.as_i16x8(), b.as_i16x8(), imm8 as u8))
9286
}
9387

9488
/// Blend packed double-precision (64-bit) floating-point elements from `a`
@@ -125,15 +119,11 @@ pub unsafe fn _mm_blendv_ps(a: __m128, b: __m128, mask: __m128) -> __m128 {
125119
// see https://bugs.llvm.org/show_bug.cgi?id=38195
126120
// #[cfg_attr(test, assert_instr(blendpd, imm2 = 0b10))]
127121
#[cfg_attr(test, assert_instr(blendps, imm2 = 0b10))]
128-
#[rustc_args_required_const(2)]
122+
#[rustc_legacy_const_generics(2)]
129123
#[stable(feature = "simd_x86", since = "1.27.0")]
130-
pub unsafe fn _mm_blend_pd(a: __m128d, b: __m128d, imm2: i32) -> __m128d {
131-
macro_rules! call {
132-
($imm2:expr) => {
133-
blendpd(a, b, $imm2)
134-
};
135-
}
136-
constify_imm2!(imm2, call)
124+
pub unsafe fn _mm_blend_pd<const imm2: i32>(a: __m128d, b: __m128d) -> __m128d {
125+
static_assert_imm2!(imm2);
126+
blendpd(a, b, imm2 as u8)
137127
}
138128

139129
/// Blend packed single-precision (32-bit) floating-point elements from `a`
@@ -143,15 +133,11 @@ pub unsafe fn _mm_blend_pd(a: __m128d, b: __m128d, imm2: i32) -> __m128d {
143133
#[inline]
144134
#[target_feature(enable = "sse4.1")]
145135
#[cfg_attr(test, assert_instr(blendps, imm4 = 0b0101))]
146-
#[rustc_args_required_const(2)]
136+
#[rustc_legacy_const_generics(2)]
147137
#[stable(feature = "simd_x86", since = "1.27.0")]
148-
pub unsafe fn _mm_blend_ps(a: __m128, b: __m128, imm4: i32) -> __m128 {
149-
macro_rules! call {
150-
($imm4:expr) => {
151-
blendps(a, b, $imm4)
152-
};
153-
}
154-
constify_imm4!(imm4, call)
138+
pub unsafe fn _mm_blend_ps<const imm4: i32>(a: __m128, b: __m128) -> __m128 {
139+
static_assert_imm4!(imm4);
140+
blendps(a, b, imm4 as u8)
155141
}
156142

157143
/// Extracts a single-precision (32-bit) floating-point element from `a`,
@@ -164,15 +150,11 @@ pub unsafe fn _mm_blend_ps(a: __m128, b: __m128, imm4: i32) -> __m128 {
164150
all(test, not(target_os = "windows")),
165151
assert_instr(extractps, imm8 = 0)
166152
)]
167-
#[rustc_args_required_const(1)]
153+
#[rustc_legacy_const_generics(1)]
168154
#[stable(feature = "simd_x86", since = "1.27.0")]
169-
pub unsafe fn _mm_extract_ps(a: __m128, imm8: i32) -> i32 {
170-
macro_rules! call {
171-
($imm2:expr) => {
172-
transmute(simd_extract::<_, f32>(a, $imm2))
173-
};
174-
}
175-
constify_imm2!(imm8, call)
155+
pub unsafe fn _mm_extract_ps<const imm8: i32>(a: __m128) -> i32 {
156+
static_assert_imm2!(imm8);
157+
transmute(simd_extract::<_, f32>(a, imm8 as u32))
176158
}
177159

178160
/// Extracts an 8-bit integer from `a`, selected with `imm8`. Returns a 32-bit
@@ -184,16 +166,11 @@ pub unsafe fn _mm_extract_ps(a: __m128, imm8: i32) -> i32 {
184166
#[inline]
185167
#[target_feature(enable = "sse4.1")]
186168
#[cfg_attr(test, assert_instr(pextrb, imm8 = 0))]
187-
#[rustc_args_required_const(1)]
169+
#[rustc_legacy_const_generics(1)]
188170
#[stable(feature = "simd_x86", since = "1.27.0")]
189-
pub unsafe fn _mm_extract_epi8(a: __m128i, imm8: i32) -> i32 {
190-
let a = a.as_u8x16();
191-
macro_rules! call {
192-
($imm4:expr) => {
193-
simd_extract::<_, u8>(a, $imm4) as i32
194-
};
195-
}
196-
constify_imm4!(imm8, call)
171+
pub unsafe fn _mm_extract_epi8<const imm8: i32>(a: __m128i) -> i32 {
172+
static_assert_imm4!(imm8);
173+
simd_extract::<_, u8>(a.as_u8x16(), imm8 as u32) as i32
197174
}
198175

199176
/// Extracts an 32-bit integer from `a` selected with `imm8`
@@ -205,16 +182,11 @@ pub unsafe fn _mm_extract_epi8(a: __m128i, imm8: i32) -> i32 {
205182
all(test, not(target_os = "windows")),
206183
assert_instr(extractps, imm8 = 1)
207184
)]
208-
#[rustc_args_required_const(1)]
185+
#[rustc_legacy_const_generics(1)]
209186
#[stable(feature = "simd_x86", since = "1.27.0")]
210-
pub unsafe fn _mm_extract_epi32(a: __m128i, imm8: i32) -> i32 {
211-
let a = a.as_i32x4();
212-
macro_rules! call {
213-
($imm2:expr) => {
214-
simd_extract::<_, i32>(a, $imm2)
215-
};
216-
}
217-
constify_imm2!(imm8, call)
187+
pub unsafe fn _mm_extract_epi32<const imm8: i32>(a: __m128i) -> i32 {
188+
static_assert_imm2!(imm8);
189+
simd_extract::<_, i32>(a.as_i32x4(), imm8 as u32)
218190
}
219191

220192
/// Select a single value in `a` to store at some position in `b`,
@@ -244,15 +216,11 @@ pub unsafe fn _mm_extract_epi32(a: __m128i, imm8: i32) -> i32 {
244216
#[inline]
245217
#[target_feature(enable = "sse4.1")]
246218
#[cfg_attr(test, assert_instr(insertps, imm8 = 0b1010))]
247-
#[rustc_args_required_const(2)]
219+
#[rustc_legacy_const_generics(2)]
248220
#[stable(feature = "simd_x86", since = "1.27.0")]
249-
pub unsafe fn _mm_insert_ps(a: __m128, b: __m128, imm8: i32) -> __m128 {
250-
macro_rules! call {
251-
($imm8:expr) => {
252-
insertps(a, b, $imm8)
253-
};
254-
}
255-
constify_imm8!(imm8, call)
221+
pub unsafe fn _mm_insert_ps<const imm8: i32>(a: __m128, b: __m128) -> __m128 {
222+
static_assert_imm8!(imm8);
223+
insertps(a, b, imm8 as u8)
256224
}
257225

258226
/// Returns a copy of `a` with the 8-bit integer from `i` inserted at a
@@ -262,16 +230,11 @@ pub unsafe fn _mm_insert_ps(a: __m128, b: __m128, imm8: i32) -> __m128 {
262230
#[inline]
263231
#[target_feature(enable = "sse4.1")]
264232
#[cfg_attr(test, assert_instr(pinsrb, imm8 = 0))]
265-
#[rustc_args_required_const(2)]
233+
#[rustc_legacy_const_generics(2)]
266234
#[stable(feature = "simd_x86", since = "1.27.0")]
267-
pub unsafe fn _mm_insert_epi8(a: __m128i, i: i32, imm8: i32) -> __m128i {
268-
let a = a.as_i8x16();
269-
macro_rules! call {
270-
($imm4:expr) => {
271-
transmute(simd_insert(a, $imm4, i as i8))
272-
};
273-
}
274-
constify_imm4!(imm8, call)
235+
pub unsafe fn _mm_insert_epi8<const imm8: i32>(a: __m128i, i: i32) -> __m128i {
236+
static_assert_imm4!(imm8);
237+
transmute(simd_insert(a.as_i8x16(), imm8 as u32, i as i8))
275238
}
276239

277240
/// Returns a copy of `a` with the 32-bit integer from `i` inserted at a
@@ -281,16 +244,11 @@ pub unsafe fn _mm_insert_epi8(a: __m128i, i: i32, imm8: i32) -> __m128i {
281244
#[inline]
282245
#[target_feature(enable = "sse4.1")]
283246
#[cfg_attr(test, assert_instr(pinsrd, imm8 = 0))]
284-
#[rustc_args_required_const(2)]
247+
#[rustc_legacy_const_generics(2)]
285248
#[stable(feature = "simd_x86", since = "1.27.0")]
286-
pub unsafe fn _mm_insert_epi32(a: __m128i, i: i32, imm8: i32) -> __m128i {
287-
let a = a.as_i32x4();
288-
macro_rules! call {
289-
($imm2:expr) => {
290-
transmute(simd_insert(a, $imm2, i))
291-
};
292-
}
293-
constify_imm2!(imm8, call)
249+
pub unsafe fn _mm_insert_epi32<const imm8: i32>(a: __m128i, i: i32) -> __m128i {
250+
static_assert_imm2!(imm8);
251+
transmute(simd_insert(a.as_i32x4(), imm8 as u32, i))
294252
}
295253

296254
/// Compares packed 8-bit integers in `a` and `b` and returns packed maximum
@@ -584,15 +542,11 @@ pub unsafe fn _mm_cvtepu32_epi64(a: __m128i) -> __m128i {
584542
#[inline]
585543
#[target_feature(enable = "sse4.1")]
586544
#[cfg_attr(test, assert_instr(dppd, imm8 = 0))]
587-
#[rustc_args_required_const(2)]
545+
#[rustc_legacy_const_generics(2)]
588546
#[stable(feature = "simd_x86", since = "1.27.0")]
589-
pub unsafe fn _mm_dp_pd(a: __m128d, b: __m128d, imm8: i32) -> __m128d {
590-
macro_rules! call {
591-
($imm8:expr) => {
592-
dppd(a, b, $imm8)
593-
};
594-
}
595-
constify_imm8!(imm8, call)
547+
pub unsafe fn _mm_dp_pd<const imm8: i32>(a: __m128d, b: __m128d) -> __m128d {
548+
static_assert_imm8!(imm8);
549+
dppd(a, b, imm8 as u8)
596550
}
597551

598552
/// Returns the dot product of two __m128 vectors.
@@ -607,15 +561,11 @@ pub unsafe fn _mm_dp_pd(a: __m128d, b: __m128d, imm8: i32) -> __m128d {
607561
#[inline]
608562
#[target_feature(enable = "sse4.1")]
609563
#[cfg_attr(test, assert_instr(dpps, imm8 = 0))]
610-
#[rustc_args_required_const(2)]
564+
#[rustc_legacy_const_generics(2)]
611565
#[stable(feature = "simd_x86", since = "1.27.0")]
612-
pub unsafe fn _mm_dp_ps(a: __m128, b: __m128, imm8: i32) -> __m128 {
613-
macro_rules! call {
614-
($imm8:expr) => {
615-
dpps(a, b, $imm8)
616-
};
617-
}
618-
constify_imm8!(imm8, call)
566+
pub unsafe fn _mm_dp_ps<const imm8: i32>(a: __m128, b: __m128) -> __m128 {
567+
static_assert_imm8!(imm8);
568+
dpps(a, b, imm8 as u8)
619569
}
620570

621571
/// Round the packed double-precision (64-bit) floating-point elements in `a`
@@ -764,15 +714,11 @@ pub unsafe fn _mm_ceil_ss(a: __m128, b: __m128) -> __m128 {
764714
#[inline]
765715
#[target_feature(enable = "sse4.1")]
766716
#[cfg_attr(test, assert_instr(roundpd, rounding = 0))]
767-
#[rustc_args_required_const(1)]
717+
#[rustc_legacy_const_generics(1)]
768718
#[stable(feature = "simd_x86", since = "1.27.0")]
769-
pub unsafe fn _mm_round_pd(a: __m128d, rounding: i32) -> __m128d {
770-
macro_rules! call {
771-
($imm4:expr) => {
772-
roundpd(a, $imm4)
773-
};
774-
}
775-
constify_imm4!(rounding, call)
719+
pub unsafe fn _mm_round_pd<const rounding: i32>(a: __m128d) -> __m128d {
720+
static_assert_imm4!(rounding);
721+
roundpd(a, rounding)
776722
}
777723

778724
/// Round the packed single-precision (32-bit) floating-point elements in `a`
@@ -809,15 +755,11 @@ pub unsafe fn _mm_round_pd(a: __m128d, rounding: i32) -> __m128d {
809755
#[inline]
810756
#[target_feature(enable = "sse4.1")]
811757
#[cfg_attr(test, assert_instr(roundps, rounding = 0))]
812-
#[rustc_args_required_const(1)]
758+
#[rustc_legacy_const_generics(1)]
813759
#[stable(feature = "simd_x86", since = "1.27.0")]
814-
pub unsafe fn _mm_round_ps(a: __m128, rounding: i32) -> __m128 {
815-
macro_rules! call {
816-
($imm4:expr) => {
817-
roundps(a, $imm4)
818-
};
819-
}
820-
constify_imm4!(rounding, call)
760+
pub unsafe fn _mm_round_ps<const rounding: i32>(a: __m128) -> __m128 {
761+
static_assert_imm4!(rounding);
762+
roundps(a, rounding)
821763
}
822764

823765
/// Round the lower double-precision (64-bit) floating-point element in `b`
@@ -856,15 +798,11 @@ pub unsafe fn _mm_round_ps(a: __m128, rounding: i32) -> __m128 {
856798
#[inline]
857799
#[target_feature(enable = "sse4.1")]
858800
#[cfg_attr(test, assert_instr(roundsd, rounding = 0))]
859-
#[rustc_args_required_const(2)]
801+
#[rustc_legacy_const_generics(2)]
860802
#[stable(feature = "simd_x86", since = "1.27.0")]
861-
pub unsafe fn _mm_round_sd(a: __m128d, b: __m128d, rounding: i32) -> __m128d {
862-
macro_rules! call {
863-
($imm4:expr) => {
864-
roundsd(a, b, $imm4)
865-
};
866-
}
867-
constify_imm4!(rounding, call)
803+
pub unsafe fn _mm_round_sd<const rounding: i32>(a: __m128d, b: __m128d) -> __m128d {
804+
static_assert_imm4!(rounding);
805+
roundsd(a, b, rounding)
868806
}
869807

870808
/// Round the lower single-precision (32-bit) floating-point element in `b`
@@ -903,15 +841,11 @@ pub unsafe fn _mm_round_sd(a: __m128d, b: __m128d, rounding: i32) -> __m128d {
903841
#[inline]
904842
#[target_feature(enable = "sse4.1")]
905843
#[cfg_attr(test, assert_instr(roundss, rounding = 0))]
906-
#[rustc_args_required_const(2)]
844+
#[rustc_legacy_const_generics(2)]
907845
#[stable(feature = "simd_x86", since = "1.27.0")]
908-
pub unsafe fn _mm_round_ss(a: __m128, b: __m128, rounding: i32) -> __m128 {
909-
macro_rules! call {
910-
($imm4:expr) => {
911-
roundss(a, b, $imm4)
912-
};
913-
}
914-
constify_imm4!(rounding, call)
846+
pub unsafe fn _mm_round_ss<const rounding: i32>(a: __m128, b: __m128) -> __m128 {
847+
static_assert_imm4!(rounding);
848+
roundss(a, b, rounding)
915849
}
916850

917851
/// Finds the minimum unsigned 16-bit element in the 128-bit __m128i vector,
@@ -1007,17 +941,11 @@ pub unsafe fn _mm_mullo_epi32(a: __m128i, b: __m128i) -> __m128i {
1007941
#[inline]
1008942
#[target_feature(enable = "sse4.1")]
1009943
#[cfg_attr(test, assert_instr(mpsadbw, imm8 = 0))]
1010-
#[rustc_args_required_const(2)]
944+
#[rustc_legacy_const_generics(2)]
1011945
#[stable(feature = "simd_x86", since = "1.27.0")]
1012-
pub unsafe fn _mm_mpsadbw_epu8(a: __m128i, b: __m128i, imm8: i32) -> __m128i {
1013-
let a = a.as_u8x16();
1014-
let b = b.as_u8x16();
1015-
macro_rules! call {
1016-
($imm8:expr) => {
1017-
mpsadbw(a, b, $imm8)
1018-
};
1019-
}
1020-
transmute(constify_imm3!(imm8, call))
946+
pub unsafe fn _mm_mpsadbw_epu8<const imm8: i32>(a: __m128i, b: __m128i) -> __m128i {
947+
static_assert_imm3!(imm8);
948+
transmute(mpsadbw(a.as_u8x16(), b.as_u8x16(), imm8 as u8))
1021949
}
1022950

1023951
/// Tests whether the specified bits in a 128-bit integer vector are all
@@ -1270,7 +1198,7 @@ mod tests {
12701198
unsafe fn test_mm_blend_pd() {
12711199
let a = _mm_set1_pd(0.0);
12721200
let b = _mm_set1_pd(1.0);
1273-
let r = _mm_blend_pd(a, b, 0b10);
1201+
let r = _mm_blend_pd::<0b10>(a, b);
12741202
let e = _mm_setr_pd(0.0, 1.0);
12751203
assert_eq_m128d(r, e);
12761204
}
@@ -1279,7 +1207,7 @@ mod tests {
12791207
unsafe fn test_mm_blend_ps() {
12801208
let a = _mm_set1_ps(0.0);
12811209
let b = _mm_set1_ps(1.0);
1282-
let r = _mm_blend_ps(a, b, 0b1010);
1210+
let r = _mm_blend_ps::<0b1010>(a, b);
12831211
let e = _mm_setr_ps(0.0, 1.0, 0.0, 1.0);
12841212
assert_eq_m128(r, e);
12851213
}
@@ -1288,18 +1216,18 @@ mod tests {
12881216
unsafe fn test_mm_blend_epi16() {
12891217
let a = _mm_set1_epi16(0);
12901218
let b = _mm_set1_epi16(1);
1291-
let r = _mm_blend_epi16(a, b, 0b1010_1100);
1219+
let r = _mm_blend_epi16::<0b1010_1100>(a, b);
12921220
let e = _mm_setr_epi16(0, 0, 1, 1, 0, 1, 0, 1);
12931221
assert_eq_m128i(r, e);
12941222
}
12951223

12961224
#[simd_test(enable = "sse4.1")]
12971225
unsafe fn test_mm_extract_ps() {
12981226
let a = _mm_setr_ps(0.0, 1.0, 2.0, 3.0);
1299-
let r: f32 = transmute(_mm_extract_ps(a, 1));
1300-
assert_eq!(r, 1.0);
1301-
let r: f32 = transmute(_mm_extract_ps(a, 5));
1227+
let r: f32 = transmute(_mm_extract_ps::<1>(a));
13021228
assert_eq!(r, 1.0);
1229+
let r: f32 = transmute(_mm_extract_ps::<3>(a));
1230+
assert_eq!(r, 3.0);
13031231
}
13041232

13051233
#[simd_test(enable = "sse4.1")]
@@ -1309,26 +1237,26 @@ mod tests {
13091237
-1, 1, 2, 3, 4, 5, 6, 7,
13101238
8, 9, 10, 11, 12, 13, 14, 15
13111239
);
1312-
let r1 = _mm_extract_epi8(a, 0);
1313-
let r2 = _mm_extract_epi8(a, 19);
1240+
let r1 = _mm_extract_epi8::<0>(a);
1241+
let r2 = _mm_extract_epi8::<3>(a);
13141242
assert_eq!(r1, 0xFF);
13151243
assert_eq!(r2, 3);
13161244
}
13171245

13181246
#[simd_test(enable = "sse4.1")]
13191247
unsafe fn test_mm_extract_epi32() {
13201248
let a = _mm_setr_epi32(0, 1, 2, 3);
1321-
let r = _mm_extract_epi32(a, 1);
1322-
assert_eq!(r, 1);
1323-
let r = _mm_extract_epi32(a, 5);
1249+
let r = _mm_extract_epi32::<1>(a);
13241250
assert_eq!(r, 1);
1251+
let r = _mm_extract_epi32::<3>(a);
1252+
assert_eq!(r, 3);
13251253
}
13261254

13271255
#[simd_test(enable = "sse4.1")]
13281256
unsafe fn test_mm_insert_ps() {
13291257
let a = _mm_set1_ps(1.0);
13301258
let b = _mm_setr_ps(1.0, 2.0, 3.0, 4.0);
1331-
let r = _mm_insert_ps(a, b, 0b11_00_1100);
1259+
let r = _mm_insert_ps::<0b11_00_1100>(a, b);
13321260
let e = _mm_setr_ps(4.0, 1.0, 0.0, 0.0);
13331261
assert_eq_m128(r, e);
13341262
}
@@ -1337,19 +1265,21 @@ mod tests {
13371265
unsafe fn test_mm_insert_epi8() {
13381266
let a = _mm_set1_epi8(0);
13391267
let e = _mm_setr_epi8(0, 32, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0);
1340-
let r = _mm_insert_epi8(a, 32, 1);
1268+
let r = _mm_insert_epi8::<1>(a, 32);
13411269
assert_eq_m128i(r, e);
1342-
let r = _mm_insert_epi8(a, 32, 17);
1270+
let e = _mm_setr_epi8(0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 32, 0);
1271+
let r = _mm_insert_epi8::<14>(a, 32);
13431272
assert_eq_m128i(r, e);
13441273
}
13451274

13461275
#[simd_test(enable = "sse4.1")]
13471276
unsafe fn test_mm_insert_epi32() {
13481277
let a = _mm_set1_epi32(0);
13491278
let e = _mm_setr_epi32(0, 32, 0, 0);
1350-
let r = _mm_insert_epi32(a, 32, 1);
1279+
let r = _mm_insert_epi32::<1>(a, 32);
13511280
assert_eq_m128i(r, e);
1352-
let r = _mm_insert_epi32(a, 32, 5);
1281+
let e = _mm_setr_epi32(0, 0, 0, 32);
1282+
let r = _mm_insert_epi32::<3>(a, 32);
13531283
assert_eq_m128i(r, e);
13541284
}
13551285

@@ -1622,15 +1552,15 @@ mod tests {
16221552
let a = _mm_setr_pd(2.0, 3.0);
16231553
let b = _mm_setr_pd(1.0, 4.0);
16241554
let e = _mm_setr_pd(14.0, 0.0);
1625-
assert_eq_m128d(_mm_dp_pd(a, b, 0b00110001), e);
1555+
assert_eq_m128d(_mm_dp_pd::<0b00110001>(a, b), e);
16261556
}
16271557

16281558
#[simd_test(enable = "sse4.1")]
16291559
unsafe fn test_mm_dp_ps() {
16301560
let a = _mm_setr_ps(2.0, 3.0, 1.0, 10.0);
16311561
let b = _mm_setr_ps(1.0, 4.0, 0.5, 10.0);
16321562
let e = _mm_setr_ps(14.5, 0.0, 14.5, 0.0);
1633-
assert_eq_m128(_mm_dp_ps(a, b, 0b01110101), e);
1563+
assert_eq_m128(_mm_dp_ps::<0b01110101>(a, b), e);
16341564
}
16351565

16361566
#[simd_test(enable = "sse4.1")]
@@ -1704,15 +1634,15 @@ mod tests {
17041634
#[simd_test(enable = "sse4.1")]
17051635
unsafe fn test_mm_round_pd() {
17061636
let a = _mm_setr_pd(1.25, 3.75);
1707-
let r = _mm_round_pd(a, _MM_FROUND_TO_NEAREST_INT);
1637+
let r = _mm_round_pd::<_MM_FROUND_TO_NEAREST_INT>(a);
17081638
let e = _mm_setr_pd(1.0, 4.0);
17091639
assert_eq_m128d(r, e);
17101640
}
17111641

17121642
#[simd_test(enable = "sse4.1")]
17131643
unsafe fn test_mm_round_ps() {
17141644
let a = _mm_setr_ps(2.25, 4.75, -1.75, -4.25);
1715-
let r = _mm_round_ps(a, _MM_FROUND_TO_ZERO);
1645+
let r = _mm_round_ps::<_MM_FROUND_TO_ZERO>(a);
17161646
let e = _mm_setr_ps(2.0, 4.0, -1.0, -4.0);
17171647
assert_eq_m128(r, e);
17181648
}
@@ -1723,7 +1653,7 @@ mod tests {
17231653
let b = _mm_setr_pd(-2.5, -4.5);
17241654
let old_mode = _MM_GET_ROUNDING_MODE();
17251655
_MM_SET_ROUNDING_MODE(_MM_ROUND_TOWARD_ZERO);
1726-
let r = _mm_round_sd(a, b, _MM_FROUND_CUR_DIRECTION);
1656+
let r = _mm_round_sd::<_MM_FROUND_CUR_DIRECTION>(a, b);
17271657
_MM_SET_ROUNDING_MODE(old_mode);
17281658
let e = _mm_setr_pd(-2.0, 3.5);
17291659
assert_eq_m128d(r, e);
@@ -1735,7 +1665,7 @@ mod tests {
17351665
let b = _mm_setr_ps(-1.75, -4.5, -8.5, -16.5);
17361666
let old_mode = _MM_GET_ROUNDING_MODE();
17371667
_MM_SET_ROUNDING_MODE(_MM_ROUND_NEAREST);
1738-
let r = _mm_round_ss(a, b, _MM_FROUND_CUR_DIRECTION);
1668+
let r = _mm_round_ss::<_MM_FROUND_CUR_DIRECTION>(a, b);
17391669
_MM_SET_ROUNDING_MODE(old_mode);
17401670
let e = _mm_setr_ps(-2.0, 3.5, 7.5, 15.5);
17411671
assert_eq_m128(r, e);
@@ -1815,23 +1745,23 @@ mod tests {
18151745
8, 9, 10, 11, 12, 13, 14, 15,
18161746
);
18171747

1818-
let r = _mm_mpsadbw_epu8(a, a, 0b000);
1748+
let r = _mm_mpsadbw_epu8::<0b000>(a, a);
18191749
let e = _mm_setr_epi16(0, 4, 8, 12, 16, 20, 24, 28);
18201750
assert_eq_m128i(r, e);
18211751

1822-
let r = _mm_mpsadbw_epu8(a, a, 0b001);
1752+
let r = _mm_mpsadbw_epu8::<0b001>(a, a);
18231753
let e = _mm_setr_epi16(16, 12, 8, 4, 0, 4, 8, 12);
18241754
assert_eq_m128i(r, e);
18251755

1826-
let r = _mm_mpsadbw_epu8(a, a, 0b100);
1756+
let r = _mm_mpsadbw_epu8::<0b100>(a, a);
18271757
let e = _mm_setr_epi16(16, 20, 24, 28, 32, 36, 40, 44);
18281758
assert_eq_m128i(r, e);
18291759

1830-
let r = _mm_mpsadbw_epu8(a, a, 0b101);
1760+
let r = _mm_mpsadbw_epu8::<0b101>(a, a);
18311761
let e = _mm_setr_epi16(0, 4, 8, 12, 16, 20, 24, 28);
18321762
assert_eq_m128i(r, e);
18331763

1834-
let r = _mm_mpsadbw_epu8(a, a, 0b111);
1764+
let r = _mm_mpsadbw_epu8::<0b111>(a, a);
18351765
let e = _mm_setr_epi16(32, 28, 24, 20, 16, 12, 8, 4);
18361766
assert_eq_m128i(r, e);
18371767
}

0 commit comments

Comments
 (0)
Please sign in to comment.