Skip to content

Commit 18d0351

Browse files
authored
Merge pull request #62 from Jorghi12/dynamic
Automatically pre-include CUDA headers just like NVCC.
2 parents 38ad0c3 + b4f6ff9 commit 18d0351

File tree

1 file changed

+31
-0
lines changed

1 file changed

+31
-0
lines changed

tools/amd_build/pyHIPIFY/hipify-python.py

Lines changed: 31 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -504,6 +504,34 @@ def replace_math_functions(input_string):
504504
return output_string
505505

506506

507+
def hip_header_magic(input_string):
508+
"""If the file makes kernel builtin calls and does not include the cuda_runtime.h header,
509+
then automatically add an #include to match the "magic" includes provided by NVCC.
510+
TODO:
511+
Update logic to ignore cases where the cuda_runtime.h is included by another file.
512+
"""
513+
514+
# Copy the input.
515+
output_string = input_string
516+
517+
# Check if one of the following headers is already included.
518+
headers = ["hip/hip_runtime.h", "hip/hip_runtime_api.h"]
519+
if any(re.search(r'#include ("{0}"|<{0}>)'.format(ext), output_string) for ext in headers):
520+
return output_string
521+
522+
# Rough logic to detect if we're inside device code
523+
hasDeviceLogic = "hipLaunchKernelGGL" in output_string
524+
hasDeviceLogic += "__global__" in output_string
525+
hasDeviceLogic += "__shared__" in output_string
526+
hasDeviceLogic += re.search(r"[:]?[:]?\b(__syncthreads)\b(\w*\()", output_string) is not None
527+
528+
# If device logic found, provide the necessary header.
529+
if hasDeviceLogic:
530+
output_string = '#include "hip/hip_runtime.h"\n' + input_string
531+
532+
return output_string
533+
534+
507535
def replace_extern_shared(input_string):
508536
"""Match extern __shared__ type foo[]; syntax and use HIP_DYNAMIC_SHARED() MACRO instead.
509537
https://github.com/ROCm-Developer-Tools/HIP/blob/master/docs/markdown/hip_kernel_language.md#__shared__
@@ -735,6 +763,9 @@ def preprocessor(filepath, stats, hipify_caffe2):
735763
# Replace __forceinline__ with inline
736764
output_source = replace_forceinline(output_source)
737765

766+
# Include header if device code is contained.
767+
output_source = hip_header_magic(output_source)
768+
738769
# Replace the extern __shared__
739770
output_source = replace_extern_shared(output_source)
740771

0 commit comments

Comments
 (0)