File tree 5 files changed +51
-18
lines changed 5 files changed +51
-18
lines changed Original file line number Diff line number Diff line change @@ -20,8 +20,8 @@ CONFIGURE_FILE(Config.h.in "${CMAKE_CURRENT_SOURCE_DIR}/Config.h")
20
20
CONFIGURE_FILE (cuda/CUDAConfig.h.in "${CMAKE_CURRENT_SOURCE_DIR} /cuda/CUDAConfig.h" )
21
21
22
22
# NB: If you edit these globs, you'll have to update setup.py package_data as well
23
- FILE (GLOB base_h "*.h" "detail/*.h" )
24
- FILE (GLOB base_cpp "*.cpp" "detail/*.cpp" )
23
+ FILE (GLOB base_h "*.h" "detail/*.h" "cpu/*.h" )
24
+ FILE (GLOB base_cpp "*.cpp" "detail/*.cpp" "cpu/*.cpp" )
25
25
add_subdirectory (core)
26
26
FILE (GLOB cuda_h "cuda/*.h" "cuda/detail/*.h" "cuda/*.cuh" "cuda/detail/*.cuh" )
27
27
FILE (GLOB cuda_cpp "cuda/*.cpp" "cuda/detail/*.cpp" )
Original file line number Diff line number Diff line change 13
13
#include " ATen/CPUGenerator.h"
14
14
#include " ATen/RegisterCPU.h"
15
15
#include " ATen/Tensor.h"
16
+ #include < ATen/cpu/FlushDenormal.h>
16
17
17
18
#include " TH/TH.h" // for USE_LAPACK
18
19
19
- #ifdef USE_SSE3
20
- #include < pmmintrin.h>
21
- #endif
22
-
23
20
namespace at {
24
21
25
22
static inline void errorHandler (const char * msg, void * data) {
@@ -94,18 +91,7 @@ bool Context::hasLAPACK() const {
94
91
}
95
92
96
93
bool Context::setFlushDenormal (bool on) {
97
- #ifdef USE_SSE3
98
- // Setting flush-to-zero (FTZ) flag
99
- _MM_SET_FLUSH_ZERO_MODE (on ? _MM_FLUSH_ZERO_ON
100
- : _MM_FLUSH_ZERO_OFF);
101
-
102
- // Setting denormals-are-zero (DAZ) flag
103
- _MM_SET_DENORMALS_ZERO_MODE (on ? _MM_DENORMALS_ZERO_ON
104
- : _MM_DENORMALS_ZERO_OFF);
105
- return true ;
106
- #else
107
- return false ;
108
- #endif
94
+ return at::cpu::set_flush_denormal (on);
109
95
}
110
96
111
97
TypeExtendedInterface& getType (TensorOptions options) {
Original file line number Diff line number Diff line change
1
+ #include < ATen/cpu/FlushDenormal.h>
2
+
3
+ #include < ATen/cpu/vec256/intrinsics.h>
4
+ #include < cpuinfo.h>
5
+
6
+ namespace at { namespace cpu {
7
+
8
+ static constexpr unsigned int DENORMALS_ZERO = 0x0040 ;
9
+ static constexpr unsigned int FLUSH_ZERO = 0x8000 ;
10
+
11
+ bool set_flush_denormal (bool on) {
12
+ // Compile if we have SSE support (GCC), x86-64 (MSVC), or x86 with SSE (MSVC)
13
+ #if defined(__SSE__) || defined(_M_X64) || (defined(_M_IX86_FP) && _M_IX86_FP >= 1)
14
+ // Denormals-Are-Zero is supported by most SSE2 processors, with the exception
15
+ // of some early Pentium 4 processors. We guard it with a runtime check.
16
+ // Flush-To-Zero (FTZ) only requires SSE.
17
+ if (cpuinfo_has_x86_daz ()) {
18
+ unsigned int csr = _mm_getcsr ();
19
+ csr &= ~DENORMALS_ZERO;
20
+ csr &= ~FLUSH_ZERO;
21
+ if (on) {
22
+ csr |= DENORMALS_ZERO;
23
+ csr |= FLUSH_ZERO;
24
+ }
25
+ _mm_setcsr (csr);
26
+ return true ;
27
+ }
28
+ #endif
29
+ return false ;
30
+ }
31
+
32
+ }} // namespace at::cpu
Original file line number Diff line number Diff line change
1
+ // / Flush-To-Zero and Denormals-Are-Zero mode
2
+ // /
3
+ // / Flush-To-Zero (FTZ) and Denormals-Are-Zero (DAZ) are modes that bypass
4
+ // / IEEE 754 methods of dealing with denormal floating-point numbers on x86-64
5
+ // / and some x86 CPUs. They result in reduced precision for values near zero,
6
+ // / but increased performance.
7
+ // /
8
+ // / See https://software.intel.com/en-us/articles/x87-and-sse-floating-point-assists-in-ia-32-flush-to-zero-ftz-and-denormals-are-zero-daz
9
+
10
+ namespace at { namespace cpu {
11
+
12
+ bool set_flush_denormal (bool on);
13
+
14
+ }} // namespace at::cpu
Original file line number Diff line number Diff line change @@ -1195,6 +1195,7 @@ def make_relative_rpath(path):
1195
1195
'lib/torch_shm_manager' ,
1196
1196
'lib/*.h' ,
1197
1197
'lib/include/ATen/*.h' ,
1198
+ 'lib/include/ATen/cpu/*.h' ,
1198
1199
'lib/include/ATen/core/*.h' ,
1199
1200
'lib/include/ATen/cuda/*.cuh' ,
1200
1201
'lib/include/ATen/cuda/*.h' ,
You can’t perform that action at this time.
0 commit comments