Skip to content

Commit 99cfff4

Browse files
committed
Remove __host__ annotation when compiling for NVRTC
1 parent 9b41485 commit 99cfff4

File tree

2 files changed

+16
-6
lines changed

2 files changed

+16
-6
lines changed

include/kernel_float/macros.h

Lines changed: 7 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -10,11 +10,16 @@
1010
#define KERNEL_FLOAT_IS_CUDA (1)
1111
#define KERNEL_FLOAT_DEVICE __forceinline__ __device__
1212

13-
#ifdef __CUDA_ARCH__
13+
// NVRTC cannot deal with __host__ annotations
14+
#ifdef __CUDACC_RTC__
1415
#define KERNEL_FLOAT_INLINE __forceinline__ __device__
16+
#else
17+
#define KERNEL_FLOAT_INLINE __forceinline__ __host__ __device__
18+
#endif
19+
20+
#ifdef __CUDA_ARCH__
1521
#define KERNEL_FLOAT_IS_DEVICE (1)
1622
#else // __CUDA_ARCH__
17-
#define KERNEL_FLOAT_INLINE __forceinline__ __host__
1823
#define KERNEL_FLOAT_IS_HOST (1)
1924
#endif // __CUDA_ARCH__
2025
#elif defined(__HIPCC__)

single_include/kernel_float.h

Lines changed: 9 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -16,8 +16,8 @@
1616

1717
//================================================================================
1818
// this file has been auto-generated, do not modify its contents!
19-
// date: 2025-09-15 12:39:02.709972
20-
// git hash: 81efb0fbfcc587343ba798590ba1541babe378d6
19+
// date: 2025-09-15 12:44:05.768243
20+
// git hash: 9b41485a27b669ea6f4aae118b4d251947608bf6
2121
//================================================================================
2222

2323
#ifndef KERNEL_FLOAT_MACROS_H
@@ -32,11 +32,16 @@
3232
#define KERNEL_FLOAT_IS_CUDA (1)
3333
#define KERNEL_FLOAT_DEVICE __forceinline__ __device__
3434

35-
#ifdef __CUDA_ARCH__
35+
// NVRTC cannot deal with __host__ annotations
36+
#ifdef __CUDACC_RTC__
3637
#define KERNEL_FLOAT_INLINE __forceinline__ __device__
38+
#else
39+
#define KERNEL_FLOAT_INLINE __forceinline__ __host__ __device__
40+
#endif
41+
42+
#ifdef __CUDA_ARCH__
3743
#define KERNEL_FLOAT_IS_DEVICE (1)
3844
#else // __CUDA_ARCH__
39-
#define KERNEL_FLOAT_INLINE __forceinline__ __host__
4045
#define KERNEL_FLOAT_IS_HOST (1)
4146
#endif // __CUDA_ARCH__
4247
#elif defined(__HIPCC__)

0 commit comments

Comments
 (0)