Skip to content

Commit c96b603

Browse files
committed
Merge remote-tracking branch 'up/master' into sse4.1
2 parents 22af7b2 + 4d7d2f1 commit c96b603

File tree

3 files changed

+167
-4
lines changed

3 files changed

+167
-4
lines changed

src/x86/sse2.rs

+40
Original file line numberDiff line numberDiff line change
@@ -2056,6 +2056,30 @@ pub unsafe fn _mm_undefined_si128() -> __m128i {
20562056
mem::transmute(i32x4::splat(mem::uninitialized()))
20572057
}
20582058

2059+
/// The resulting `f64x2` element is composed by the low-order values of
2060+
/// the two `f64x2` interleaved input elements, i.e.:
2061+
///
2062+
/// * The [127:64] bits are copied from the [127:64] bits of the second input
2063+
/// * The [63:0] bits are copied from the [127:64] bits of the first input
2064+
#[inline(always)]
2065+
#[target_feature = "+sse2"]
2066+
#[cfg_attr(test, assert_instr(unpckhpd))]
2067+
pub unsafe fn _mm_unpackhi_pd(a: f64x2, b: f64x2) -> f64x2 {
2068+
simd_shuffle2(a, b, [1, 3])
2069+
}
2070+
2071+
/// The resulting `f64x2` element is composed by the high-order values of
2072+
/// the two `f64x2` interleaved input elements, i.e.:
2073+
///
2074+
/// * The [127:64] bits are copied from the [63:0] bits of the second input
2075+
/// * The [63:0] bits are copied from the [63:0] bits of the first input
2076+
#[inline(always)]
2077+
#[target_feature = "+sse2"]
2078+
#[cfg_attr(test, assert_instr(unpcklpd))]
2079+
pub unsafe fn _mm_unpacklo_pd(a: f64x2, b: f64x2) -> f64x2 {
2080+
simd_shuffle2(a, b, [0, 2])
2081+
}
2082+
20592083
#[allow(improper_ctypes)]
20602084
extern "C" {
20612085
#[link_name = "llvm.x86.sse2.pause"]
@@ -4174,4 +4198,20 @@ mod tests {
41744198
let r = sse2::_mm_load_pd1(&d);
41754199
assert_eq!(r, f64x2::new(d, d));
41764200
}
4201+
4202+
#[simd_test = "sse2"]
4203+
unsafe fn _mm_unpackhi_pd() {
4204+
let a = f64x2::new(1.0, 2.0);
4205+
let b = f64x2::new(3.0, 4.0);
4206+
let r = sse2::_mm_unpackhi_pd(a, b);
4207+
assert_eq!(r, f64x2::new(2.0, 4.0));
4208+
}
4209+
4210+
#[simd_test = "sse2"]
4211+
unsafe fn _mm_unpacklo_pd() {
4212+
let a = f64x2::new(1.0, 2.0);
4213+
let b = f64x2::new(3.0, 4.0);
4214+
let r = sse2::_mm_unpacklo_pd(a, b);
4215+
assert_eq!(r, f64x2::new(1.0, 3.0));
4216+
}
41774217
}

src/x86/sse41.rs

+123-1
Original file line numberDiff line numberDiff line change
@@ -208,7 +208,7 @@ pub unsafe fn _mm_insert_epi64(a: i64x2, i: i64, imm8: u8) -> i64x2 {
208208
a.replace((imm8 & 0b1) as u32, i)
209209
}
210210

211-
/// Compare packed 8-bit integers in `a` and `b`,87 and return packed maximum
211+
/// Compare packed 8-bit integers in `a` and `b` and return packed maximum
212212
/// values in dst.
213213
#[inline(always)]
214214
#[target_feature = "+sse4.1"]
@@ -244,6 +244,42 @@ pub unsafe fn _mm_max_epu32(a: u32x4, b: u32x4) -> u32x4 {
244244
pmaxud(a, b)
245245
}
246246

247+
/// Compare packed 8-bit integers in `a` and `b` and return packed minimum
248+
/// values in dst.
249+
#[inline(always)]
250+
#[target_feature = "+sse4.1"]
251+
#[cfg_attr(test, assert_instr(pminsb))]
252+
pub unsafe fn _mm_min_epi8(a: i8x16, b: i8x16) -> i8x16 {
253+
pminsb(a, b)
254+
}
255+
256+
/// Compare packed unsigned 16-bit integers in `a` and `b`, and return packed
257+
/// minimum.
258+
#[inline(always)]
259+
#[target_feature = "+sse4.1"]
260+
#[cfg_attr(test, assert_instr(pminuw))]
261+
pub unsafe fn _mm_min_epu16(a: u16x8, b: u16x8) -> u16x8 {
262+
pminuw(a, b)
263+
}
264+
265+
/// Compare packed 32-bit integers in `a` and `b`, and return packed minimum
266+
/// values.
267+
#[inline(always)]
268+
#[target_feature = "+sse4.1"]
269+
#[cfg_attr(test, assert_instr(pminsd))]
270+
pub unsafe fn _mm_min_epi32(a: i32x4, b: i32x4) -> i32x4 {
271+
pminsd(a, b)
272+
}
273+
274+
/// Compare packed unsigned 32-bit integers in `a` and `b`, and return packed
275+
/// minimum values.
276+
#[inline(always)]
277+
#[target_feature = "+sse4.1"]
278+
#[cfg_attr(test, assert_instr(pminud))]
279+
pub unsafe fn _mm_min_epu32(a: u32x4, b: u32x4) -> u32x4 {
280+
pminud(a, b)
281+
}
282+
247283
/// Convert packed 32-bit integers from `a` and `b` to packed 16-bit integers
248284
/// using unsigned saturation
249285
#[inline(always)]
@@ -828,6 +864,14 @@ extern "C" {
828864
fn pmaxsd(a: i32x4, b: i32x4) -> i32x4;
829865
#[link_name = "llvm.x86.sse41.pmaxud"]
830866
fn pmaxud(a: u32x4, b: u32x4) -> u32x4;
867+
#[link_name = "llvm.x86.sse41.pminsb"]
868+
fn pminsb(a: i8x16, b: i8x16) -> i8x16;
869+
#[link_name = "llvm.x86.sse41.pminuw"]
870+
fn pminuw(a: u16x8, b: u16x8) -> u16x8;
871+
#[link_name = "llvm.x86.sse41.pminsd"]
872+
fn pminsd(a: i32x4, b: i32x4) -> i32x4;
873+
#[link_name = "llvm.x86.sse41.pminud"]
874+
fn pminud(a: u32x4, b: u32x4) -> u32x4;
831875
#[link_name = "llvm.x86.sse41.packusdw"]
832876
fn packusdw(a: i32x4, b: i32x4) -> u16x8;
833877
#[link_name = "llvm.x86.sse41.pmuldq"]
@@ -1056,6 +1100,84 @@ mod tests {
10561100
assert_eq!(r, e);
10571101
}
10581102

1103+
#[simd_test = "sse4.1"]
1104+
unsafe fn _mm_min_epi8_1() {
1105+
#[cfg_attr(rustfmt, rustfmt_skip)]
1106+
let a = i8x16::new(
1107+
1, 4, 5, 8, 9, 12, 13, 16,
1108+
17, 20, 21, 24, 25, 28, 29, 32,
1109+
);
1110+
#[cfg_attr(rustfmt, rustfmt_skip)]
1111+
let b = i8x16::new(
1112+
2, 3, 6, 7, 10, 11, 14, 15,
1113+
18, 19, 22, 23, 26, 27, 30, 31,
1114+
);
1115+
let r = sse41::_mm_min_epi8(a, b);
1116+
#[cfg_attr(rustfmt, rustfmt_skip)]
1117+
let e = i8x16::new(
1118+
1, 3, 5, 7, 9, 11, 13, 15,
1119+
17, 19, 21, 23, 25, 27, 29, 31,
1120+
);
1121+
assert_eq!(r, e);
1122+
}
1123+
1124+
#[simd_test = "sse4.1"]
1125+
unsafe fn _mm_min_epi8_2() {
1126+
#[cfg_attr(rustfmt, rustfmt_skip)]
1127+
let a = i8x16::new(
1128+
1, -4, -5, 8, -9, -12, 13, -16,
1129+
17, 20, 21, 24, 25, 28, 29, 32,
1130+
);
1131+
#[cfg_attr(rustfmt, rustfmt_skip)]
1132+
let b = i8x16::new(
1133+
2, -3, -6, 7, -10, -11, 14, -15,
1134+
18, 19, 22, 23, 26, 27, 30, 31,
1135+
);
1136+
let r = sse41::_mm_min_epi8(a, b);
1137+
#[cfg_attr(rustfmt, rustfmt_skip)]
1138+
let e = i8x16::new(
1139+
1, -4, -6, 7, -10, -12, 13, -16,
1140+
17, 19, 21, 23, 25, 27, 29, 31,
1141+
);
1142+
assert_eq!(r, e);
1143+
}
1144+
1145+
#[simd_test = "sse4.1"]
1146+
unsafe fn _mm_min_epu16() {
1147+
let a = u16x8::new(1, 4, 5, 8, 9, 12, 13, 16);
1148+
let b = u16x8::new(2, 3, 6, 7, 10, 11, 14, 15);
1149+
let r = sse41::_mm_min_epu16(a, b);
1150+
let e = u16x8::new(1, 3, 5, 7, 9, 11, 13, 15);
1151+
assert_eq!(r, e);
1152+
}
1153+
1154+
#[simd_test = "sse4.1"]
1155+
unsafe fn _mm_min_epi32_1() {
1156+
let a = i32x4::new(1, 4, 5, 8);
1157+
let b = i32x4::new(2, 3, 6, 7);
1158+
let r = sse41::_mm_min_epi32(a, b);
1159+
let e = i32x4::new(1, 3, 5, 7);
1160+
assert_eq!(r, e);
1161+
}
1162+
1163+
#[simd_test = "sse4.1"]
1164+
unsafe fn _mm_min_epi32_2() {
1165+
let a = i32x4::new(-1, 4, 5, -7);
1166+
let b = i32x4::new(-2, 3, -6, 8);
1167+
let r = sse41::_mm_min_epi32(a, b);
1168+
let e = i32x4::new(-2, 3, -6, -7);
1169+
assert_eq!(r, e);
1170+
}
1171+
1172+
#[simd_test = "sse4.1"]
1173+
unsafe fn _mm_min_epu32() {
1174+
let a = u32x4::new(1, 4, 5, 8);
1175+
let b = u32x4::new(2, 3, 6, 7);
1176+
let r = sse41::_mm_min_epu32(a, b);
1177+
let e = u32x4::new(1, 3, 5, 7);
1178+
assert_eq!(r, e);
1179+
}
1180+
10591181
#[simd_test = "sse4.1"]
10601182
unsafe fn _mm_packus_epi32() {
10611183
let a = i32x4::new(1, 2, 3, 4);

stdsimd-test/assert-instr-macro/src/lib.rs

+4-3
Original file line numberDiff line numberDiff line change
@@ -28,8 +28,8 @@ pub fn assert_instr(
2828
.expect("expected #[assert_instr(instr, a = b, ...)]");
2929
let item =
3030
syn::parse::<syn::Item>(item).expect("must be attached to an item");
31-
let func = match item.node {
32-
syn::ItemKind::Fn(ref f) => f,
31+
let func = match item {
32+
syn::Item::Fn(ref f) => f,
3333
_ => panic!("must be attached to a function"),
3434
};
3535

@@ -70,7 +70,8 @@ pub fn assert_instr(
7070
}
7171
};
7272
}
73-
let attrs = item.attrs
73+
74+
let attrs = func.attrs
7475
.iter()
7576
.filter(|attr| {
7677
attr.path

0 commit comments

Comments
 (0)