-
Notifications
You must be signed in to change notification settings - Fork 14.2k
[Offload] Rework compiling device code for unit test suites #144776
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Conversation
@llvm/pr-subscribers-offload Author: Joseph Huber (jhuber6) ChangesSummary: This should still create the same output as far as I'm aware. Full diff: https://github.com/llvm/llvm-project/pull/144776.diff 2 Files Affected:
diff --git a/offload/unittests/CMakeLists.txt b/offload/unittests/CMakeLists.txt
index 985dd892d8046..cbeae5ce72989 100644
--- a/offload/unittests/CMakeLists.txt
+++ b/offload/unittests/CMakeLists.txt
@@ -1,6 +1,73 @@
add_custom_target(OffloadUnitTests)
set_target_properties(OffloadUnitTests PROPERTIES FOLDER "Tests/UnitTests")
+function(add_offload_test_device_code test_filename test_name)
+ set(SRC_PATH ${CMAKE_CURRENT_SOURCE_DIR}/${test_filename})
+ set(CMAKE_TRY_COMPILE_TARGET_TYPE STATIC_LIBRARY)
+
+ # Try to build with support for NVPTX devices.
+ if("cuda" IN_LIST LIBOMPTARGET_PLUGINS_TO_BUILD)
+ find_package(CUDAToolkit QUIET)
+ if(CUDAToolkit_FOUND)
+ get_filename_component(cuda_path "${CUDAToolkit_BIN_DIR}" DIRECTORY ABSOLUTE)
+ endif()
+ check_cxx_compiler_flag(
+ "--target=nvptx64-nvidia-cuda -march=native --cuda-path=${cuda_path}" PLATFORM_HAS_NVPTX)
+
+ if(PLATFORM_HAS_NVPTX)
+ set(nvptx_arch "native")
+ elseif(OFFLOAD_TESTS_FORCE_NVIDIA_ARCH)
+ set(nvptx_arch "${OFFLOAD_TESTS_FORCE_NVIDIA_ARCH}")
+ endif()
+
+ if(nvptx_arch AND CUDAToolkit_FOUND)
+ set(output_file "${CMAKE_CURRENT_BINARY_DIR}/${test_name}.nvptx64.bin")
+ add_custom_command(
+ OUTPUT ${output_file}
+ COMMAND ${CMAKE_C_COMPILER}
+ --target=nvptx64-nvidia-cuda -march=${nvptx_arch}
+ -nogpulib --cuda-path=${CUDA_ROOT} -flto ${ARGN}
+ -c ${SRC_PATH} -o ${output_file}
+ DEPENDS ${SRC_PATH}
+ )
+ add_custom_target(${test_name}.nvptx64 DEPENDS ${output_file})
+ endif()
+ endif()
+
+ # Try to build with support for AMDGPU devices.
+ if("amdgpu" IN_LIST LIBOMPTARGET_PLUGINS_TO_BUILD)
+ check_cxx_compiler_flag("--target=amdgcn-amd-amdhsa -mcpu=native" PLATFORM_HAS_AMDGPU)
+
+ if(PLATFORM_HAS_AMDGPU)
+ set(amdgpu_arch "native")
+ elseif(OFFLOAD_TESTS_FORCE_AMDGPU_ARCH)
+ set(amdgpu_arch "${OFFLOAD_TESTS_FORCE_AMDGPU_ARCH}")
+ endif()
+
+ if(amdgpu_arch)
+ set(output_file "${CMAKE_CURRENT_BINARY_DIR}/${test_name}.amdgpu.bin")
+ add_custom_command(
+ OUTPUT ${output_file}
+ COMMAND ${CMAKE_C_COMPILER}
+ --target=amdgcn-amd-amdhsa -mcpu=${amdgpu_arch}
+ -nogpulib --cuda-path=${CUDA_ROOT} -flto ${ARGN}
+ -c ${SRC_PATH} -o ${output_file}
+ DEPENDS ${SRC_PATH}
+ )
+ add_custom_target(${test_name}.amdgpu DEPENDS ${output_file})
+ endif()
+ endif()
+
+ # Create a single dependency target for the device code.
+ add_custom_target(${test_name})
+ if(TARGET ${test_name}.amdgpu)
+ add_dependencies(${test_name} ${test_name}.amdgpu)
+ endif()
+ if(TARGET ${test_name}.nvptx64)
+ add_dependencies(${test_name} ${test_name}.nvptx64)
+ endif()
+endfunction()
+
function(add_offload_unittest test_dirname)
set(target_name "${test_dirname}.unittests")
@@ -9,10 +76,15 @@ function(add_offload_unittest test_dirname)
add_unittest(OffloadUnitTests "${target_name}"
${CMAKE_CURRENT_SOURCE_DIR}/common/Environment.cpp
${files})
- add_dependencies(${target_name} ${PLUGINS_TEST_COMMON} OffloadUnitTestsDeviceBins)
+ add_dependencies(${target_name} ${PLUGINS_TEST_COMMON} offload_device_binaries)
target_compile_definitions(${target_name} PRIVATE DEVICE_CODE_PATH="${OFFLOAD_TEST_DEVICE_CODE_PATH}")
target_link_libraries(${target_name} PRIVATE ${PLUGINS_TEST_COMMON})
target_include_directories(${target_name} PRIVATE ${PLUGINS_TEST_INCLUDE})
endfunction()
+set(OFFLOAD_TESTS_FORCE_NVIDIA_ARCH "" CACHE STRING
+ "Force building of NVPTX device code for Offload unit tests with the given arch, e.g. sm_61")
+set(OFFLOAD_TESTS_FORCE_AMDGPU_ARCH "" CACHE STRING
+ "Force building of AMDGPU device code for Offload unit tests with the given arch, e.g. gfx1030")
+
add_subdirectory(OffloadAPI)
diff --git a/offload/unittests/OffloadAPI/device_code/CMakeLists.txt b/offload/unittests/OffloadAPI/device_code/CMakeLists.txt
index c2e4d0cb24e6e..c475bc6f314dd 100644
--- a/offload/unittests/OffloadAPI/device_code/CMakeLists.txt
+++ b/offload/unittests/OffloadAPI/device_code/CMakeLists.txt
@@ -1,72 +1,6 @@
-macro(add_offload_test_device_code test_filename test_name)
- set(SRC_PATH ${CMAKE_CURRENT_SOURCE_DIR}/${test_filename})
-
- # Build for NVPTX
- if(OFFLOAD_TEST_TARGET_NVIDIA)
- set(BIN_PATH ${CMAKE_CURRENT_BINARY_DIR}/${test_name}.nvptx64.bin)
- add_custom_command(OUTPUT ${BIN_PATH}
- COMMAND
- ${CMAKE_C_COMPILER} --target=nvptx64-nvidia-cuda
- ${ARGN}
- -march=${LIBOMPTARGET_DEP_CUDA_ARCH}
- --cuda-path=${CUDA_ROOT}
- ${SRC_PATH} -o ${BIN_PATH}
- DEPENDS ${SRC_PATH}
- )
- list(APPEND BIN_PATHS ${BIN_PATH})
- endif()
-
- # Build for AMDGPU
- if(OFFLOAD_TEST_TARGET_AMDGPU)
- set(BIN_PATH ${CMAKE_CURRENT_BINARY_DIR}/${test_name}.amdgpu.bin)
- add_custom_command(OUTPUT ${BIN_PATH}
- COMMAND
- ${CMAKE_C_COMPILER} --target=amdgcn-amd-amdhsa -nogpulib
- ${ARGN}
- -mcpu=${LIBOMPTARGET_DEP_AMDGPU_ARCH}
- ${SRC_PATH} -o ${BIN_PATH}
- DEPENDS ${SRC_PATH}
- )
- list(APPEND BIN_PATHS ${BIN_PATH})
- endif()
-
- # TODO: Build for host CPU
-endmacro()
-
-
-# Decide what device targets to build for. LibomptargetGetDependencies is
-# included at the top-level so the GPUs present on the system are already
-# detected.
-set(OFFLOAD_TESTS_FORCE_NVIDIA_ARCH "" CACHE STRING
- "Force building of NVPTX device code for Offload unit tests with the given arch, e.g. sm_61")
-set(OFFLOAD_TESTS_FORCE_AMDGPU_ARCH "" CACHE STRING
- "Force building of AMDGPU device code for Offload unit tests with the given arch, e.g. gfx1030")
-
-find_package(CUDAToolkit QUIET)
-if(CUDAToolkit_FOUND)
- get_filename_component(CUDA_ROOT "${CUDAToolkit_BIN_DIR}" DIRECTORY ABSOLUTE)
-endif()
-if (OFFLOAD_TESTS_FORCE_NVIDIA_ARCH)
- set(LIBOMPTARGET_DEP_CUDA_ARCH ${OFFLOAD_TESTS_FORCE_NVIDIA_ARCH})
- set(OFFLOAD_TEST_TARGET_NVIDIA ON)
-elseif (LIBOMPTARGET_FOUND_NVIDIA_GPU AND CUDA_ROOT AND "cuda" IN_LIST LIBOMPTARGET_PLUGINS_TO_BUILD)
- set(OFFLOAD_TEST_TARGET_NVIDIA ON)
-endif()
-
-if (OFFLOAD_TESTS_FORCE_AMDGPU_ARCH)
- set(LIBOMPTARGET_DEP_AMDGPU_ARCH ${OFFLOAD_TESTS_FORCE_AMDGPU_ARCH})
- set(OFFLOAD_TEST_TARGET_AMDGPU ON)
-elseif (LIBOMPTARGET_FOUND_AMDGPU_GPU AND "amdgpu" IN_LIST LIBOMPTARGET_PLUGINS_TO_BUILD)
- list(GET LIBOMPTARGET_AMDGPU_DETECTED_ARCH_LIST 0 LIBOMPTARGET_DEP_AMDGPU_ARCH)
- set(OFFLOAD_TEST_TARGET_AMDGPU ON)
-endif()
-
add_offload_test_device_code(foo.c foo)
add_offload_test_device_code(bar.c bar)
-# By default, amdhsa will add a number of "hidden" arguments to the kernel defintion
-# O3 disables this, and results in a kernel function with actually no arguments as seen by liboffload
add_offload_test_device_code(noargs.c noargs -O3)
-add_custom_target(OffloadUnitTestsDeviceBins DEPENDS ${BIN_PATHS})
-
+add_custom_target(offload_device_binaries DEPENDS foo bar noargs)
set(OFFLOAD_TEST_DEVICE_CODE_PATH ${CMAKE_CURRENT_BINARY_DIR} PARENT_SCOPE)
|
4bc1aa3
to
447f1dc
Compare
Summary: I'll probably want to use this as a more generic utility in the future. This patch reworks it to make it a top level function. I also tried to decouple this from the OpenMP utilities to make that easier in the future. Instead, I just use `-march=native` functionality which is the same thing. Needed a small hack to skip the linker stage for checking if that works. This should still create the same output as far as I'm aware.
447f1dc
to
fffb54b
Compare
target_compile_definitions(${target_name} PRIVATE DEVICE_CODE_PATH="${OFFLOAD_TEST_DEVICE_CODE_PATH}") | ||
target_link_libraries(${target_name} PRIVATE ${PLUGINS_TEST_COMMON}) | ||
target_include_directories(${target_name} PRIVATE ${PLUGINS_TEST_INCLUDE}) | ||
endfunction() | ||
|
||
set(OFFLOAD_TESTS_FORCE_NVIDIA_ARCH "" CACHE STRING | ||
"Force building of NVPTX device code for Offload unit tests with the given arch, e.g. sm_61") | ||
set(OFFLOAD_TESTS_FORCE_AMDGPU_ARCH "" CACHE STRING |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Does this also take a list of arches here?
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
No
offload/unittests/CMakeLists.txt
Outdated
if(PLATFORM_HAS_AMDGPU) | ||
set(amdgpu_arch "native") | ||
elseif(OFFLOAD_TESTS_FORCE_AMDGPU_ARCH) | ||
set(amdgpu_arch "${OFFLOAD_TESTS_FORCE_AMDGPU_ARCH}") | ||
endif() |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Does this mean that on a platform that has an AMDGPU, the arch cannot be forcefully overwritten?
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Good catch, I should reverse these.
offload/unittests/CMakeLists.txt
Outdated
target_compile_definitions(${target_name} PRIVATE DEVICE_CODE_PATH="${OFFLOAD_TEST_DEVICE_CODE_PATH}") | ||
target_link_libraries(${target_name} PRIVATE ${PLUGINS_TEST_COMMON}) | ||
target_include_directories(${target_name} PRIVATE ${PLUGINS_TEST_INCLUDE}) | ||
endfunction() | ||
|
||
set(OFFLOAD_TESTS_FORCE_NVIDIA_ARCH "" CACHE STRING |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
I'd call it NVPTX_ARCH
since we use AMDGPU
instead of AMD
.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
This was existing so I didn't modify it
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Hmm, this is really bad…
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Can modify it in a follow up, I wanted this to mostly be NFC
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Figured that could be a follow up but I guess I can rename it here
Summary:
I'll probably want to use this as a more generic utility in the future.
This patch reworks it to make it a top level function. I also tried to
decouple this from the OpenMP utilities to make that easier in the
future. Instead, I just use
-march=native
functionality which is thesame thing. Needed a small hack to skip the linker stage for checking if
that works.
This should still create the same output as far as I'm aware.