Skip to content

Commit 2c6746f

Browse files
committed
algo: implement sine/cosine using Intel SVML for single/double precision
- Add sin/cos functions with ≤1 ULP and ≤4 ULP accuracy variants - Support large argument ranges with proper range reduction - Standalone implementation doesn't require libm - Written using Google Highway for SIMD optimization - Optimize for both float32 and float64 data types
1 parent 7dbd9b6 commit 2c6746f

File tree

8 files changed

+26617
-0
lines changed

8 files changed

+26617
-0
lines changed

npsr/common.h

Lines changed: 146 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,146 @@
1+
#ifndef NUMPY_SIMD_ROUTINES_NPSR_COMMON_H_
2+
#define NUMPY_SIMD_ROUTINES_NPSR_COMMON_H_
3+
4+
#include "hwy/highway.h"
5+
6+
#include <cfenv>
7+
#include <type_traits>
8+
9+
namespace npsr {
10+
11+
struct _NoLargeArgument {};
12+
struct _NoSpecialCases {};
13+
struct _NoExceptions {};
14+
struct _LowAccuracy {};
15+
constexpr auto kNoLargeArgument = _NoLargeArgument{};
16+
constexpr auto kNoSpecialCases = _NoSpecialCases{};
17+
constexpr auto kNoExceptions = _NoExceptions{};
18+
constexpr auto kLowAccuracy = _LowAccuracy{};
19+
20+
struct Round {
21+
struct _Force {};
22+
struct _Nearest {};
23+
struct _Down {};
24+
struct _Up {};
25+
struct _Zero {};
26+
static constexpr auto kForce = _Force{};
27+
static constexpr auto kNearest = _Nearest{};
28+
#if 0 // not used yet
29+
static constexpr auto kDown = _Down{};
30+
static constexpr auto kUp = _Up{};
31+
static constexpr auto kZero = _Zero{};
32+
#endif
33+
};
34+
35+
struct Subnormal {
36+
struct _DAZ {};
37+
struct _FTZ {};
38+
struct _IEEE754 {};
39+
#if 0 // not used yet
40+
static constexpr auto kDAZ = _DAZ{};
41+
static constexpr auto kFTZ = _FTZ{};
42+
#endif
43+
static constexpr auto kIEEE754 = _IEEE754{};
44+
};
45+
46+
struct FPExceptions {
47+
static constexpr auto kNone = 0;
48+
static constexpr auto kInvalid = FE_INVALID;
49+
static constexpr auto kDivByZero = FE_DIVBYZERO;
50+
static constexpr auto kOverflow = FE_OVERFLOW;
51+
static constexpr auto kUnderflow = FE_UNDERFLOW;
52+
};
53+
54+
template <typename... Args> class Precise {
55+
public:
56+
Precise() {
57+
if constexpr (!kNoExceptions) {
58+
fegetexceptflag(&_exceptions, FE_ALL_EXCEPT);
59+
}
60+
if constexpr (kRoundForce) {
61+
_rounding_mode = fegetround();
62+
int new_mode = _NewRoundingMode();
63+
if (_rounding_mode != new_mode) {
64+
_retrieve_rounding_mode = true;
65+
fesetround(new_mode);
66+
}
67+
}
68+
}
69+
void FlushExceptions() { fesetexceptflag(&_exceptions, FE_ALL_EXCEPT); }
70+
71+
void Raise(int errors) {
72+
static_assert(!kNoExceptions,
73+
"Cannot raise exceptions in NoExceptions mode");
74+
_exceptions |= errors;
75+
}
76+
~Precise() {
77+
FlushExceptions();
78+
if constexpr (kRoundForce) {
79+
if (_retrieve_rounding_mode) {
80+
fesetround(_rounding_mode);
81+
}
82+
}
83+
}
84+
static constexpr bool kNoExceptions =
85+
(std::is_same_v<_NoExceptions, Args> || ...);
86+
static constexpr bool kNoLargeArgument =
87+
(std::is_same_v<_NoLargeArgument, Args> || ...);
88+
static constexpr bool kNoSpecialCases =
89+
(std::is_same_v<_NoSpecialCases, Args> || ...);
90+
static constexpr bool kLowAccuracy =
91+
(std::is_same_v<_LowAccuracy, Args> || ...);
92+
// defaults to high accuracy if no low accuracy flag is set
93+
static constexpr bool kHighAccuracy = !kLowAccuracy;
94+
// defaults to large argument support if no no large argument flag is set
95+
static constexpr bool kLargeArgument = !kNoLargeArgument;
96+
// defaults to special cases support if no no special cases flag is set
97+
static constexpr bool kSpecialCases = !kNoSpecialCases;
98+
// defaults to exception support if no no exception flag is set
99+
static constexpr bool kException = !kNoExceptions;
100+
101+
static constexpr bool kRoundForce =
102+
(std::is_same_v<Round::_Force, Args> || ...);
103+
static constexpr bool _kRoundNearest =
104+
(std::is_same_v<Round::_Nearest, Args> || ...);
105+
static constexpr bool kRoundZero =
106+
(std::is_same_v<Round::_Zero, Args> || ...);
107+
static constexpr bool kRoundDown =
108+
(std::is_same_v<Round::_Down, Args> || ...);
109+
static constexpr bool kRoundUp = (std::is_same_v<Round::_Up, Args> || ...);
110+
// only one rounding mode can be set
111+
static_assert((_kRoundNearest + kRoundDown + kRoundUp + kRoundZero) <= 1,
112+
"Only one rounding mode can be set at a time");
113+
// if no rounding mode is set, default to round nearest
114+
static constexpr bool kRoundNearest =
115+
_kRoundNearest || (!kRoundDown && !kRoundUp && !kRoundZero);
116+
117+
static constexpr bool kDAZ = (std::is_same_v<Subnormal::_DAZ, Args> || ...);
118+
static constexpr bool kFTZ = (std::is_same_v<Subnormal::_FTZ, Args> || ...);
119+
static constexpr bool _kIEEE754 =
120+
(std::is_same_v<Subnormal::_IEEE754, Args> || ...);
121+
static_assert(!_kIEEE754 || !(kDAZ || kFTZ),
122+
"IEEE754 mode cannot be used "
123+
"with Denormals Are Zero (DAZ) or Flush To Zero (FTZ) "
124+
"subnormal handling");
125+
static constexpr bool kIEEE754 = _kIEEE754 || !(kDAZ || kFTZ);
126+
127+
private:
128+
int _NewRoundingMode() const {
129+
if constexpr (kRoundDown) {
130+
return FE_DOWNWARD;
131+
} else if constexpr (kRoundUp) {
132+
return FE_UPWARD;
133+
} else if constexpr (kRoundZero) {
134+
return FE_TOWARDZERO;
135+
} else {
136+
return FE_TONEAREST;
137+
}
138+
}
139+
int _rounding_mode = 0;
140+
bool _retrieve_rounding_mode = false;
141+
fexcept_t _exceptions;
142+
};
143+
144+
} // namespace npsr
145+
146+
#endif // NUMPY_SIMD_ROUTINES_NPSR_COMMON_H_

npsr/npsr.h

Lines changed: 12 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,12 @@
1+
// To include them once per target, which is ensured by the toggle check.
2+
// clang-format off
3+
#if defined(_NPSR_NPSR_H_) == defined(HWY_TARGET_TOGGLE) // NOLINT
4+
#ifdef _NPSR_NPSR_H_
5+
#undef _NPSR_NPSR_H_
6+
#else
7+
#define _NPSR_NPSR_H_
8+
#endif
9+
10+
#include "npsr/trig/inl.h"
11+
12+
#endif // _NPSR_NPSR_H_

npsr/trig/inl.h

Lines changed: 61 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,61 @@
1+
#include "npsr/common.h"
2+
3+
#include "npsr/sincos/large-inl.h"
4+
#include "npsr/sincos/small-inl.h"
5+
6+
// clang-format off
7+
#if defined(NPSR_TRIG_INL_H_) == defined(HWY_TARGET_TOGGLE) // NOLINT
8+
#ifdef NPSR_TRIG_INL_H_
9+
#undef NPSR_TRIG_INL_H_
10+
#else
11+
#define NPSR_TRIG_INL_H_
12+
#endif
13+
14+
HWY_BEFORE_NAMESPACE();
15+
16+
namespace npsr::HWY_NAMESPACE::sincos {
17+
template <bool IS_COS, typename V, typename Prec>
18+
HWY_API V SinCos(Prec &prec, V x) {
19+
using namespace hwy::HWY_NAMESPACE;
20+
constexpr bool kIsSingle = std::is_same_v<TFromV<V>, float>;
21+
const DFromV<V> d;
22+
V ret;
23+
if constexpr (Prec::kLowAccuracy) {
24+
ret = SmallArgLow<IS_COS>(x);
25+
}
26+
else {
27+
ret = SmallArg<IS_COS>(x);
28+
}
29+
if constexpr (Prec::kLargeArgument) {
30+
// Identify inputs requiring extended precision (very large arguments)
31+
auto has_large_arg = Gt(Abs(x), Set(d, kIsSingle ? 10000.0 : 16777216.0));
32+
if (HWY_UNLIKELY(!AllFalse(d, has_large_arg))) {
33+
// Use extended precision algorithm for large arguments
34+
ret = IfThenElse(has_large_arg, LargeArg<IS_COS>(x), ret);
35+
}
36+
}
37+
if constexpr (Prec::kSpecialCases || Prec::kException) {
38+
auto is_finite = IsFinite(x);
39+
ret = IfThenElse(is_finite, ret, NaN(d));
40+
if constexpr (Prec::kException) {
41+
prec.Raise(!AllFalse(d, IsInf(x)) ? FPExceptions::kInvalid : 0);
42+
}
43+
}
44+
return ret;
45+
}
46+
} // namespace npsr::HWY_NAMESPACE::sincos
47+
48+
namespace npsr::HWY_NAMESPACE {
49+
template <typename V, typename Prec>
50+
HWY_API V Sin(Prec &prec, V x) {
51+
return sincos::SinCos<false>(prec, x);
52+
}
53+
template <typename V, typename Prec>
54+
HWY_API V Cos(Prec &prec, V x) {
55+
return sincos::SinCos<true>(prec, x);
56+
}
57+
} // namespace npsr::HWY_NAMESPACE
58+
59+
HWY_AFTER_NAMESPACE();
60+
61+
#endif // NPSR_TRIG_INL_H_

0 commit comments

Comments
 (0)