@@ -504,6 +504,34 @@ def replace_math_functions(input_string):
504
504
return output_string
505
505
506
506
507
+ def hip_header_magic (input_string ):
508
+ """If the file makes kernel builtin calls and does not include the cuda_runtime.h header,
509
+ then automatically add an #include to match the "magic" includes provided by NVCC.
510
+ TODO:
511
+ Update logic to ignore cases where the cuda_runtime.h is included by another file.
512
+ """
513
+
514
+ # Copy the input.
515
+ output_string = input_string
516
+
517
+ # Check if one of the following headers is already included.
518
+ headers = ["hip/hip_runtime.h" , "hip/hip_runtime_api.h" ]
519
+ if any (re .search (r'#include ("{0}"|<{0}>)' .format (ext ), output_string ) for ext in headers ):
520
+ return output_string
521
+
522
+ # Rough logic to detect if we're inside device code
523
+ hasDeviceLogic = "hipLaunchKernelGGL" in output_string
524
+ hasDeviceLogic += "__global__" in output_string
525
+ hasDeviceLogic += "__shared__" in output_string
526
+ hasDeviceLogic += re .search (r"[:]?[:]?\b(__syncthreads)\b(\w*\()" , output_string ) is not None
527
+
528
+ # If device logic found, provide the necessary header.
529
+ if hasDeviceLogic :
530
+ output_string = '#include "hip/hip_runtime.h"\n ' + input_string
531
+
532
+ return output_string
533
+
534
+
507
535
def replace_extern_shared (input_string ):
508
536
"""Match extern __shared__ type foo[]; syntax and use HIP_DYNAMIC_SHARED() MACRO instead.
509
537
https://github.com/ROCm-Developer-Tools/HIP/blob/master/docs/markdown/hip_kernel_language.md#__shared__
@@ -735,6 +763,9 @@ def preprocessor(filepath, stats, hipify_caffe2):
735
763
# Replace __forceinline__ with inline
736
764
output_source = replace_forceinline (output_source )
737
765
766
+ # Include header if device code is contained.
767
+ output_source = hip_header_magic (output_source )
768
+
738
769
# Replace the extern __shared__
739
770
output_source = replace_extern_shared (output_source )
740
771
0 commit comments