File tree Expand file tree Collapse file tree 2 files changed +34
-0
lines changed Expand file tree Collapse file tree 2 files changed +34
-0
lines changed Original file line number Diff line number Diff line change
1
+ #ifndef _cuda_compat_h
2
+ #define _cuda_compat_h
3
+
4
+ // atomicAdd for half types, to support CC < 7.x
5
+
6
+ __device__ __forceinline__ void atomicAdd_half (half * address , half val )
7
+ {
8
+ unsigned int * address_as_ui = (unsigned int * ) ((char * )address - ((size_t )address & 2 ));
9
+ unsigned int old = * address_as_ui ;
10
+ unsigned int assumed ;
11
+
12
+ do
13
+ {
14
+ assumed = old ;
15
+ __half_raw hsum ;
16
+ hsum .x = (size_t )address & 2 ? (old >> 16 ) : (old & 0xffff );
17
+ half tmpres = __hadd (hsum , val );
18
+ hsum = __half_raw (tmpres );
19
+ old = (size_t )address & 2 ? (old & 0xffff ) | (hsum .x << 16 ) : (old & 0xffff0000 ) | hsum .x ;
20
+ old = atomicCAS (address_as_ui , assumed , old );
21
+ }
22
+ while (assumed != old );
23
+ }
24
+
25
+ #ifdef __CUDA_ARCH__
26
+ #if __CUDA_ARCH__ < 700
27
+
28
+ __device__ __forceinline__ void atomicAdd (half * address , half val ) { atomicAdd_half (address , val ); }
29
+
30
+ #endif
31
+ #endif
32
+
33
+ #endif
Original file line number Diff line number Diff line change 2
2
#include " column_remap.h"
3
3
#include " util.h"
4
4
#include " matrix.h"
5
+ #include " cuda_compat.h"
5
6
6
7
// Block size
7
8
You can’t perform that action at this time.
0 commit comments