[Offload] Rework compiling device code for unit test suites #144776

jhuber6 · 2025-06-18T18:32:57Z

Summary:
I'll probably want to use this as a more generic utility in the future.
This patch reworks it to make it a top level function. I also tried to
decouple this from the OpenMP utilities to make that easier in the
future. Instead, I just use -march=native functionality which is the
same thing. Needed a small hack to skip the linker stage for checking if
that works.

This should still create the same output as far as I'm aware.

jhuber6 · 2025-06-18T18:33:12Z

@callumfare @RossBrunton

llvmbot · 2025-06-18T18:33:28Z

@llvm/pr-subscribers-offload

Author: Joseph Huber (jhuber6)

Changes

Summary:
I'll probably want to use this as a more generic utility in the future.
This patch reworks it to make it a top level function. I also tried to
decouple this from the OpenMP utilities to make that easier in the
future. Instead, I just use -march=native functionality which is the
same thing. Needed a small hack to skip the linker stage for checking if
that works.

This should still create the same output as far as I'm aware.

Full diff: https://github.com/llvm/llvm-project/pull/144776.diff

2 Files Affected:

(modified) offload/unittests/CMakeLists.txt (+73-1)
(modified) offload/unittests/OffloadAPI/device_code/CMakeLists.txt (+1-67)

diff --git a/offload/unittests/CMakeLists.txt b/offload/unittests/CMakeLists.txt
index 985dd892d8046..cbeae5ce72989 100644
--- a/offload/unittests/CMakeLists.txt
+++ b/offload/unittests/CMakeLists.txt
@@ -1,6 +1,73 @@
 add_custom_target(OffloadUnitTests)
 set_target_properties(OffloadUnitTests PROPERTIES FOLDER "Tests/UnitTests")
 
+function(add_offload_test_device_code test_filename test_name)
+  set(SRC_PATH ${CMAKE_CURRENT_SOURCE_DIR}/${test_filename})
+  set(CMAKE_TRY_COMPILE_TARGET_TYPE STATIC_LIBRARY)
+
+  # Try to build with support for NVPTX devices.
+  if("cuda" IN_LIST LIBOMPTARGET_PLUGINS_TO_BUILD)
+    find_package(CUDAToolkit QUIET)
+    if(CUDAToolkit_FOUND)
+      get_filename_component(cuda_path "${CUDAToolkit_BIN_DIR}" DIRECTORY ABSOLUTE)
+    endif()
+    check_cxx_compiler_flag(
+      "--target=nvptx64-nvidia-cuda -march=native --cuda-path=${cuda_path}" PLATFORM_HAS_NVPTX)
+
+    if(PLATFORM_HAS_NVPTX)
+      set(nvptx_arch "native")
+    elseif(OFFLOAD_TESTS_FORCE_NVIDIA_ARCH)
+      set(nvptx_arch "${OFFLOAD_TESTS_FORCE_NVIDIA_ARCH}")
+    endif()
+
+    if(nvptx_arch AND CUDAToolkit_FOUND)
+      set(output_file "${CMAKE_CURRENT_BINARY_DIR}/${test_name}.nvptx64.bin")
+      add_custom_command(
+        OUTPUT ${output_file}
+        COMMAND ${CMAKE_C_COMPILER}
+        --target=nvptx64-nvidia-cuda -march=${nvptx_arch}
+        -nogpulib --cuda-path=${CUDA_ROOT} -flto ${ARGN}
+        -c ${SRC_PATH} -o ${output_file}
+        DEPENDS ${SRC_PATH}
+      )
+      add_custom_target(${test_name}.nvptx64 DEPENDS ${output_file})
+    endif()
+  endif()
+
+  # Try to build with support for AMDGPU devices.
+  if("amdgpu" IN_LIST LIBOMPTARGET_PLUGINS_TO_BUILD)
+    check_cxx_compiler_flag("--target=amdgcn-amd-amdhsa -mcpu=native" PLATFORM_HAS_AMDGPU)
+
+    if(PLATFORM_HAS_AMDGPU)
+      set(amdgpu_arch "native")
+    elseif(OFFLOAD_TESTS_FORCE_AMDGPU_ARCH)
+      set(amdgpu_arch "${OFFLOAD_TESTS_FORCE_AMDGPU_ARCH}")
+    endif()
+
+    if(amdgpu_arch)
+      set(output_file "${CMAKE_CURRENT_BINARY_DIR}/${test_name}.amdgpu.bin")
+      add_custom_command(
+        OUTPUT ${output_file}
+        COMMAND ${CMAKE_C_COMPILER}
+        --target=amdgcn-amd-amdhsa -mcpu=${amdgpu_arch}
+        -nogpulib --cuda-path=${CUDA_ROOT} -flto ${ARGN}
+        -c ${SRC_PATH} -o ${output_file}
+        DEPENDS ${SRC_PATH}
+      )
+      add_custom_target(${test_name}.amdgpu DEPENDS ${output_file})
+    endif()
+  endif()
+
+  # Create a single dependency target for the device code.
+  add_custom_target(${test_name})
+  if(TARGET ${test_name}.amdgpu)
+    add_dependencies(${test_name} ${test_name}.amdgpu)
+  endif()
+  if(TARGET ${test_name}.nvptx64)
+    add_dependencies(${test_name} ${test_name}.nvptx64)
+  endif()
+endfunction()
+
 function(add_offload_unittest test_dirname)
   set(target_name "${test_dirname}.unittests")
 
@@ -9,10 +76,15 @@ function(add_offload_unittest test_dirname)
   add_unittest(OffloadUnitTests "${target_name}"
     ${CMAKE_CURRENT_SOURCE_DIR}/common/Environment.cpp
     ${files})
-  add_dependencies(${target_name} ${PLUGINS_TEST_COMMON} OffloadUnitTestsDeviceBins)
+  add_dependencies(${target_name} ${PLUGINS_TEST_COMMON} offload_device_binaries)
   target_compile_definitions(${target_name} PRIVATE DEVICE_CODE_PATH="${OFFLOAD_TEST_DEVICE_CODE_PATH}")
   target_link_libraries(${target_name} PRIVATE ${PLUGINS_TEST_COMMON})
   target_include_directories(${target_name} PRIVATE ${PLUGINS_TEST_INCLUDE})
 endfunction()
 
+set(OFFLOAD_TESTS_FORCE_NVIDIA_ARCH "" CACHE STRING
+  "Force building of NVPTX device code for Offload unit tests with the given arch, e.g. sm_61")
+set(OFFLOAD_TESTS_FORCE_AMDGPU_ARCH "" CACHE STRING
+  "Force building of AMDGPU device code for Offload unit tests with the given arch, e.g. gfx1030")
+
 add_subdirectory(OffloadAPI)
diff --git a/offload/unittests/OffloadAPI/device_code/CMakeLists.txt b/offload/unittests/OffloadAPI/device_code/CMakeLists.txt
index c2e4d0cb24e6e..c475bc6f314dd 100644
--- a/offload/unittests/OffloadAPI/device_code/CMakeLists.txt
+++ b/offload/unittests/OffloadAPI/device_code/CMakeLists.txt
@@ -1,72 +1,6 @@
-macro(add_offload_test_device_code test_filename test_name)
-    set(SRC_PATH ${CMAKE_CURRENT_SOURCE_DIR}/${test_filename})
-
-    # Build for NVPTX
-    if(OFFLOAD_TEST_TARGET_NVIDIA)
-        set(BIN_PATH ${CMAKE_CURRENT_BINARY_DIR}/${test_name}.nvptx64.bin)
-        add_custom_command(OUTPUT ${BIN_PATH}
-            COMMAND
-            ${CMAKE_C_COMPILER} --target=nvptx64-nvidia-cuda
-            ${ARGN}
-            -march=${LIBOMPTARGET_DEP_CUDA_ARCH}
-            --cuda-path=${CUDA_ROOT}
-            ${SRC_PATH} -o ${BIN_PATH}
-            DEPENDS ${SRC_PATH}
-        )
-        list(APPEND BIN_PATHS ${BIN_PATH})
-    endif()
-
-    # Build for AMDGPU
-    if(OFFLOAD_TEST_TARGET_AMDGPU)
-        set(BIN_PATH ${CMAKE_CURRENT_BINARY_DIR}/${test_name}.amdgpu.bin)
-        add_custom_command(OUTPUT ${BIN_PATH}
-            COMMAND
-            ${CMAKE_C_COMPILER} --target=amdgcn-amd-amdhsa -nogpulib
-            ${ARGN}
-            -mcpu=${LIBOMPTARGET_DEP_AMDGPU_ARCH}
-            ${SRC_PATH} -o ${BIN_PATH}
-            DEPENDS ${SRC_PATH}
-        )
-        list(APPEND BIN_PATHS ${BIN_PATH})
-    endif()
-
-    # TODO: Build for host CPU
-endmacro()
-
-
-# Decide what device targets to build for. LibomptargetGetDependencies is
-# included at the top-level so the GPUs present on the system are already
-# detected.
-set(OFFLOAD_TESTS_FORCE_NVIDIA_ARCH "" CACHE STRING
-    "Force building of NVPTX device code for Offload unit tests with the given arch, e.g. sm_61")
-set(OFFLOAD_TESTS_FORCE_AMDGPU_ARCH "" CACHE STRING
-    "Force building of AMDGPU device code for Offload unit tests with the given arch, e.g. gfx1030")
-
-find_package(CUDAToolkit QUIET)
-if(CUDAToolkit_FOUND)
-  get_filename_component(CUDA_ROOT "${CUDAToolkit_BIN_DIR}" DIRECTORY ABSOLUTE)
-endif()
-if (OFFLOAD_TESTS_FORCE_NVIDIA_ARCH)
-    set(LIBOMPTARGET_DEP_CUDA_ARCH ${OFFLOAD_TESTS_FORCE_NVIDIA_ARCH})
-    set(OFFLOAD_TEST_TARGET_NVIDIA ON)
-elseif (LIBOMPTARGET_FOUND_NVIDIA_GPU AND CUDA_ROOT AND "cuda" IN_LIST LIBOMPTARGET_PLUGINS_TO_BUILD)
-    set(OFFLOAD_TEST_TARGET_NVIDIA ON)
-endif()
-
-if (OFFLOAD_TESTS_FORCE_AMDGPU_ARCH)
-    set(LIBOMPTARGET_DEP_AMDGPU_ARCH ${OFFLOAD_TESTS_FORCE_AMDGPU_ARCH})
-    set(OFFLOAD_TEST_TARGET_AMDGPU ON)
-elseif (LIBOMPTARGET_FOUND_AMDGPU_GPU AND "amdgpu" IN_LIST LIBOMPTARGET_PLUGINS_TO_BUILD)
-    list(GET LIBOMPTARGET_AMDGPU_DETECTED_ARCH_LIST 0 LIBOMPTARGET_DEP_AMDGPU_ARCH)
-    set(OFFLOAD_TEST_TARGET_AMDGPU ON)
-endif()
-
 add_offload_test_device_code(foo.c foo)
 add_offload_test_device_code(bar.c bar)
-# By default, amdhsa will add a number of "hidden" arguments to the kernel defintion
-# O3 disables this, and results in a kernel function with actually no arguments as seen by liboffload
 add_offload_test_device_code(noargs.c noargs -O3)
 
-add_custom_target(OffloadUnitTestsDeviceBins DEPENDS ${BIN_PATHS})
-
+add_custom_target(offload_device_binaries DEPENDS foo bar noargs)
 set(OFFLOAD_TEST_DEVICE_CODE_PATH ${CMAKE_CURRENT_BINARY_DIR} PARENT_SCOPE)

offload/unittests/OffloadAPI/device_code/CMakeLists.txt

offload/unittests/CMakeLists.txt

offload/unittests/OffloadAPI/device_code/CMakeLists.txt

Summary: I'll probably want to use this as a more generic utility in the future. This patch reworks it to make it a top level function. I also tried to decouple this from the OpenMP utilities to make that easier in the future. Instead, I just use `-march=native` functionality which is the same thing. Needed a small hack to skip the linker stage for checking if that works. This should still create the same output as far as I'm aware.

jplehr · 2025-06-20T07:01:10Z

offload/unittests/CMakeLists.txt

  target_compile_definitions(${target_name} PRIVATE DEVICE_CODE_PATH="${OFFLOAD_TEST_DEVICE_CODE_PATH}")
  target_link_libraries(${target_name} PRIVATE ${PLUGINS_TEST_COMMON})
  target_include_directories(${target_name} PRIVATE ${PLUGINS_TEST_INCLUDE})
 endfunction()

+set(OFFLOAD_TESTS_FORCE_NVIDIA_ARCH "" CACHE STRING
+  "Force building of NVPTX device code for Offload unit tests with the given arch, e.g. sm_61")
+set(OFFLOAD_TESTS_FORCE_AMDGPU_ARCH "" CACHE STRING


Does this also take a list of arches here?

jplehr · 2025-06-20T07:05:42Z

offload/unittests/CMakeLists.txt

+    if(PLATFORM_HAS_AMDGPU)
+      set(amdgpu_arch "native")
+    elseif(OFFLOAD_TESTS_FORCE_AMDGPU_ARCH)
+      set(amdgpu_arch "${OFFLOAD_TESTS_FORCE_AMDGPU_ARCH}")
+    endif()


Does this mean that on a platform that has an AMDGPU, the arch cannot be forcefully overwritten?

Good catch, I should reverse these.

shiltian · 2025-06-20T12:48:16Z

offload/unittests/CMakeLists.txt

  target_compile_definitions(${target_name} PRIVATE DEVICE_CODE_PATH="${OFFLOAD_TEST_DEVICE_CODE_PATH}")
  target_link_libraries(${target_name} PRIVATE ${PLUGINS_TEST_COMMON})
  target_include_directories(${target_name} PRIVATE ${PLUGINS_TEST_INCLUDE})
 endfunction()

+set(OFFLOAD_TESTS_FORCE_NVIDIA_ARCH "" CACHE STRING


I'd call it NVPTX_ARCH since we use AMDGPU instead of AMD.

This was existing so I didn't modify it

Hmm, this is really bad…

Can modify it in a follow up, I wanted this to mostly be NFC

Figured that could be a follow up but I guess I can rename it here

jhuber6 requested review from jdoerfert, jplehr and shiltian June 18, 2025 18:32

llvmbot added the offload label Jun 18, 2025

RossBrunton reviewed Jun 19, 2025

View reviewed changes

offload/unittests/OffloadAPI/device_code/CMakeLists.txt Show resolved Hide resolved

RossBrunton reviewed Jun 19, 2025

View reviewed changes

offload/unittests/CMakeLists.txt Outdated Show resolved Hide resolved

RossBrunton reviewed Jun 19, 2025

View reviewed changes

offload/unittests/CMakeLists.txt Outdated Show resolved Hide resolved

RossBrunton reviewed Jun 19, 2025

View reviewed changes

offload/unittests/OffloadAPI/device_code/CMakeLists.txt Outdated Show resolved Hide resolved

jhuber6 force-pushed the ReworkDeviceCode branch from 4bc1aa3 to 447f1dc Compare June 19, 2025 14:34

jhuber6 force-pushed the ReworkDeviceCode branch from 447f1dc to fffb54b Compare June 19, 2025 14:40

jplehr reviewed Jun 20, 2025

View reviewed changes

fix

19c1b1a

shiltian reviewed Jun 20, 2025

View reviewed changes

rename

a651583

shiltian approved these changes Jun 20, 2025

View reviewed changes

jhuber6 merged commit 3f1de19 into llvm:main Jun 20, 2025
7 checks passed

Provide feedback

Saved searches

Use saved searches to filter your results more quickly

Uh oh!

[Offload] Rework compiling device code for unit test suites #144776

[Offload] Rework compiling device code for unit test suites #144776

jhuber6 commented Jun 18, 2025

Uh oh!

jhuber6 commented Jun 18, 2025

Uh oh!

llvmbot commented Jun 18, 2025

Uh oh!

Uh oh!

Uh oh!

Uh oh!

Uh oh!

jplehr Jun 20, 2025

Uh oh!

jhuber6 Jun 20, 2025

Uh oh!

jplehr Jun 20, 2025

Uh oh!

jhuber6 Jun 20, 2025

Uh oh!

shiltian Jun 20, 2025 •

edited

Loading

Uh oh!

jhuber6 Jun 20, 2025

Uh oh!

shiltian Jun 20, 2025

Uh oh!

jhuber6 Jun 20, 2025

Uh oh!

jhuber6 Jun 20, 2025

Uh oh!

Uh oh!

Uh oh!

[Offload] Rework compiling device code for unit test suites #144776

[Offload] Rework compiling device code for unit test suites #144776

Conversation

jhuber6 commented Jun 18, 2025

Uh oh!

jhuber6 commented Jun 18, 2025

Uh oh!

llvmbot commented Jun 18, 2025

Uh oh!

Uh oh!

Uh oh!

Uh oh!

Uh oh!

Choose a reason for hiding this comment

Uh oh!

Choose a reason for hiding this comment

Uh oh!

Choose a reason for hiding this comment

Uh oh!

Choose a reason for hiding this comment

Uh oh!

shiltian Jun 20, 2025 • edited Loading Uh oh! There was an error while loading. Please reload this page.

Uh oh!

Choose a reason for hiding this comment

Uh oh!

Choose a reason for hiding this comment

Uh oh!

Choose a reason for hiding this comment

Uh oh!

Choose a reason for hiding this comment

Uh oh!

Choose a reason for hiding this comment

Uh oh!

Uh oh!

Uh oh!

shiltian Jun 20, 2025 •

edited

Loading