Skip to content

Commit a44ef76

Browse files
committed
Add _mm_ucomi*_ss instructions SSE
They all compile down to the same x86 instruction, UCOMISS, whereas the _mm_comi*_ss instructions compile down to COMISS. The outputs of both sets of instructions are exactly the same. The only difference is in exception handling. I therefore added a single test case which tests their different effect on the MXCSR register (_mm_getcsr) of _mm_comieq_ss vs. _mm_ucomieq_ss. Together with the tests about emitting the right instruction, no tests further tests are needed for the other variants.
1 parent ee266f7 commit a44ef76

File tree

1 file changed

+238
-0
lines changed

1 file changed

+238
-0
lines changed

src/x86/sse.rs

Lines changed: 238 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -352,6 +352,70 @@ pub unsafe fn _mm_comineq_ss(a: f32x4, b: f32x4) -> i32 {
352352
comineq_ss(a, b)
353353
}
354354

355+
/// Compare two 32-bit floats from the low-order bits of `a` and `b`. Returns
356+
/// `1` if they are equal, or `0` otherwise. This instruction will not signal
357+
/// an exception if either argument is a quiet NaN.
358+
#[inline(always)]
359+
#[target_feature = "+sse"]
360+
#[cfg_attr(test, assert_instr(ucomiss))]
361+
pub unsafe fn _mm_ucomieq_ss(a: f32x4, b: f32x4) -> i32 {
362+
ucomieq_ss(a, b)
363+
}
364+
365+
/// Compare two 32-bit floats from the low-order bits of `a` and `b`. Returns
366+
/// `1` if the value from `a` is less than the one from `b`, or `0` otherwise.
367+
/// This instruction will not signal an exception if either argument is a quiet
368+
/// NaN.
369+
#[inline(always)]
370+
#[target_feature = "+sse"]
371+
#[cfg_attr(test, assert_instr(ucomiss))]
372+
pub unsafe fn _mm_ucomilt_ss(a: f32x4, b: f32x4) -> i32 {
373+
ucomilt_ss(a, b)
374+
}
375+
376+
/// Compare two 32-bit floats from the low-order bits of `a` and `b`. Returns
377+
/// `1` if the value from `a` is less than or equal to the one from `b`, or `0`
378+
/// otherwise. This instruction will not signal an exception if either argument
379+
/// is a quiet NaN.
380+
#[inline(always)]
381+
#[target_feature = "+sse"]
382+
#[cfg_attr(test, assert_instr(ucomiss))]
383+
pub unsafe fn _mm_ucomile_ss(a: f32x4, b: f32x4) -> i32 {
384+
ucomile_ss(a, b)
385+
}
386+
387+
/// Compare two 32-bit floats from the low-order bits of `a` and `b`. Returns
388+
/// `1` if the value from `a` is greater than the one from `b`, or `0`
389+
/// otherwise. This instruction will not signal an exception if either argument
390+
/// is a quiet NaN.
391+
#[inline(always)]
392+
#[target_feature = "+sse"]
393+
#[cfg_attr(test, assert_instr(ucomiss))]
394+
pub unsafe fn _mm_ucomigt_ss(a: f32x4, b: f32x4) -> i32 {
395+
ucomigt_ss(a, b)
396+
}
397+
398+
/// Compare two 32-bit floats from the low-order bits of `a` and `b`. Returns
399+
/// `1` if the value from `a` is greater than or equal to the one from `b`, or
400+
/// `0` otherwise. This instruction will not signal an exception if either
401+
/// argument is a quiet NaN.
402+
#[inline(always)]
403+
#[target_feature = "+sse"]
404+
#[cfg_attr(test, assert_instr(ucomiss))]
405+
pub unsafe fn _mm_ucomige_ss(a: f32x4, b: f32x4) -> i32 {
406+
ucomige_ss(a, b)
407+
}
408+
409+
/// Compare two 32-bit floats from the low-order bits of `a` and `b`. Returns
410+
/// `1` if they are *not* equal, or `0` otherwise. This instruction will not
411+
/// signal an exception if either argument is a quiet NaN.
412+
#[inline(always)]
413+
#[target_feature = "+sse"]
414+
#[cfg_attr(test, assert_instr(ucomiss))]
415+
pub unsafe fn _mm_ucomineq_ss(a: f32x4, b: f32x4) -> i32 {
416+
ucomineq_ss(a, b)
417+
}
418+
355419
/// Construct a `f32x4` with the lowest element set to `a` and the rest set to
356420
/// zero.
357421
#[inline(always)]
@@ -1098,6 +1162,18 @@ extern {
10981162
fn comige_ss(a: f32x4, b: f32x4) -> i32;
10991163
#[link_name = "llvm.x86.sse.comineq.ss"]
11001164
fn comineq_ss(a: f32x4, b: f32x4) -> i32;
1165+
#[link_name = "llvm.x86.sse.ucomieq.ss"]
1166+
fn ucomieq_ss(a: f32x4, b: f32x4) -> i32;
1167+
#[link_name = "llvm.x86.sse.ucomilt.ss"]
1168+
fn ucomilt_ss(a: f32x4, b: f32x4) -> i32;
1169+
#[link_name = "llvm.x86.sse.ucomile.ss"]
1170+
fn ucomile_ss(a: f32x4, b: f32x4) -> i32;
1171+
#[link_name = "llvm.x86.sse.ucomigt.ss"]
1172+
fn ucomigt_ss(a: f32x4, b: f32x4) -> i32;
1173+
#[link_name = "llvm.x86.sse.ucomige.ss"]
1174+
fn ucomige_ss(a: f32x4, b: f32x4) -> i32;
1175+
#[link_name = "llvm.x86.sse.ucomineq.ss"]
1176+
fn ucomineq_ss(a: f32x4, b: f32x4) -> i32;
11011177
#[link_name = "llvm.x86.sse.sfence"]
11021178
fn sfence();
11031179
#[link_name = "llvm.x86.sse.stmxcsr"]
@@ -1565,6 +1641,168 @@ mod tests {
15651641
}
15661642
}
15671643

1644+
#[simd_test = "sse"]
1645+
unsafe fn _mm_ucomieq_ss() {
1646+
use std::f32::NAN;
1647+
1648+
let aa = &[3.0f32, 12.0, 23.0, NAN];
1649+
let bb = &[3.0f32, 47.5, 1.5, NAN];
1650+
1651+
let ee = &[1i32, 0, 0, 0];
1652+
1653+
for i in 0..4 {
1654+
let a = f32x4::new(aa[i], 1.0, 2.0, 3.0);
1655+
let b = f32x4::new(bb[i], 0.0, 2.0, 4.0);
1656+
1657+
let r = sse::_mm_ucomieq_ss(a, b);
1658+
1659+
assert_eq!(ee[i], r,
1660+
"_mm_ucomieq_ss({:?}, {:?}) = {}, expected: {} (i={})",
1661+
a, b, r, ee[i], i);
1662+
}
1663+
}
1664+
1665+
#[simd_test = "sse"]
1666+
unsafe fn _mm_ucomilt_ss() {
1667+
use std::f32::NAN;
1668+
1669+
let aa = &[3.0f32, 12.0, 23.0, NAN];
1670+
let bb = &[3.0f32, 47.5, 1.5, NAN];
1671+
1672+
let ee = &[0i32, 1, 0, 0];
1673+
1674+
for i in 0..4 {
1675+
let a = f32x4::new(aa[i], 1.0, 2.0, 3.0);
1676+
let b = f32x4::new(bb[i], 0.0, 2.0, 4.0);
1677+
1678+
let r = sse::_mm_ucomilt_ss(a, b);
1679+
1680+
assert_eq!(ee[i], r,
1681+
"_mm_ucomilt_ss({:?}, {:?}) = {}, expected: {} (i={})",
1682+
a, b, r, ee[i], i);
1683+
}
1684+
}
1685+
1686+
#[simd_test = "sse"]
1687+
unsafe fn _mm_ucomile_ss() {
1688+
use std::f32::NAN;
1689+
1690+
let aa = &[3.0f32, 12.0, 23.0, NAN];
1691+
let bb = &[3.0f32, 47.5, 1.5, NAN];
1692+
1693+
let ee = &[1i32, 1, 0, 0];
1694+
1695+
for i in 0..4 {
1696+
let a = f32x4::new(aa[i], 1.0, 2.0, 3.0);
1697+
let b = f32x4::new(bb[i], 0.0, 2.0, 4.0);
1698+
1699+
let r = sse::_mm_ucomile_ss(a, b);
1700+
1701+
assert_eq!(ee[i], r,
1702+
"_mm_ucomile_ss({:?}, {:?}) = {}, expected: {} (i={})",
1703+
a, b, r, ee[i], i);
1704+
}
1705+
}
1706+
1707+
#[simd_test = "sse"]
1708+
unsafe fn _mm_ucomigt_ss() {
1709+
use std::f32::NAN;
1710+
1711+
let aa = &[3.0f32, 12.0, 23.0, NAN];
1712+
let bb = &[3.0f32, 47.5, 1.5, NAN];
1713+
1714+
let ee = &[0i32, 0, 1, 0];
1715+
1716+
for i in 0..4 {
1717+
let a = f32x4::new(aa[i], 1.0, 2.0, 3.0);
1718+
let b = f32x4::new(bb[i], 0.0, 2.0, 4.0);
1719+
1720+
let r = sse::_mm_ucomigt_ss(a, b);
1721+
1722+
assert_eq!(ee[i], r,
1723+
"_mm_ucomigt_ss({:?}, {:?}) = {}, expected: {} (i={})",
1724+
a, b, r, ee[i], i);
1725+
}
1726+
}
1727+
1728+
#[simd_test = "sse"]
1729+
unsafe fn _mm_ucomige_ss() {
1730+
use std::f32::NAN;
1731+
1732+
let aa = &[3.0f32, 12.0, 23.0, NAN];
1733+
let bb = &[3.0f32, 47.5, 1.5, NAN];
1734+
1735+
let ee = &[1i32, 0, 1, 0];
1736+
1737+
for i in 0..4 {
1738+
let a = f32x4::new(aa[i], 1.0, 2.0, 3.0);
1739+
let b = f32x4::new(bb[i], 0.0, 2.0, 4.0);
1740+
1741+
let r = sse::_mm_ucomige_ss(a, b);
1742+
1743+
assert_eq!(ee[i], r,
1744+
"_mm_ucomige_ss({:?}, {:?}) = {}, expected: {} (i={})",
1745+
a, b, r, ee[i], i);
1746+
}
1747+
}
1748+
1749+
#[simd_test = "sse"]
1750+
unsafe fn _mm_ucomineq_ss() {
1751+
use std::f32::NAN;
1752+
1753+
let aa = &[3.0f32, 12.0, 23.0, NAN];
1754+
let bb = &[3.0f32, 47.5, 1.5, NAN];
1755+
1756+
let ee = &[0i32, 1, 1, 1];
1757+
1758+
for i in 0..4 {
1759+
let a = f32x4::new(aa[i], 1.0, 2.0, 3.0);
1760+
let b = f32x4::new(bb[i], 0.0, 2.0, 4.0);
1761+
1762+
let r = sse::_mm_ucomineq_ss(a, b);
1763+
1764+
assert_eq!(ee[i], r,
1765+
"_mm_ucomineq_ss({:?}, {:?}) = {}, expected: {} (i={})",
1766+
a, b, r, ee[i], i);
1767+
}
1768+
}
1769+
1770+
#[simd_test = "sse"]
1771+
unsafe fn _mm_comieq_ss_vs_ucomieq_ss() {
1772+
// If one of the arguments is a quiet NaN `comieq_ss` should signal an
1773+
// Invalid Operation Exception while `ucomieq_ss` should not.
1774+
use std::f32::NAN; // This is a quiet NaN.
1775+
let aa = &[3.0f32, NAN, 23.0, NAN];
1776+
let bb = &[3.0f32, 47.5, NAN, NAN];
1777+
1778+
let ee = &[1i32, 0, 0, 0];
1779+
let exc = &[0u32, 1, 1, 1]; // Should comieq_ss signal an exception?
1780+
1781+
for i in 0..4 {
1782+
let a = f32x4::new(aa[i], 1.0, 2.0, 3.0);
1783+
let b = f32x4::new(bb[i], 0.0, 2.0, 4.0);
1784+
1785+
sse::_MM_SET_EXCEPTION_STATE(0);
1786+
let r1 = sse::_mm_comieq_ss(a, b);
1787+
let s1 = sse::_MM_GET_EXCEPTION_STATE();
1788+
1789+
sse::_MM_SET_EXCEPTION_STATE(0);
1790+
let r2 = sse::_mm_ucomieq_ss(a, b);
1791+
let s2 = sse::_MM_GET_EXCEPTION_STATE();
1792+
1793+
assert_eq!(ee[i], r1,
1794+
"_mm_comeq_ss({:?}, {:?}) = {}, expected: {} (i={})",
1795+
a, b, r1, ee[i], i);
1796+
assert_eq!(ee[i], r2,
1797+
"_mm_ucomeq_ss({:?}, {:?}) = {}, expected: {} (i={})",
1798+
a, b, r2, ee[i], i);
1799+
assert_eq!(s1, exc[i] * sse::_MM_EXCEPT_INVALID,
1800+
"_mm_comieq_ss() set exception flags: {} (i={})", s1, i);
1801+
assert_eq!(s2, 0, // ucomieq_ss should not signal an exception
1802+
"_mm_ucomieq_ss() set exception flags: {} (i={})", s2, i);
1803+
}
1804+
}
1805+
15681806
#[simd_test = "sse"]
15691807
unsafe fn _mm_set_ss() {
15701808
let r = sse::_mm_set_ss(black_box(4.25));

0 commit comments

Comments
 (0)