diff --git a/sycl/include/sycl/ext/oneapi/experimental/syclbin_kernel_bundle.hpp b/sycl/include/sycl/ext/oneapi/experimental/syclbin_kernel_bundle.hpp new file mode 100644 index 0000000000000..f258156d7bb99 --- /dev/null +++ b/sycl/include/sycl/ext/oneapi/experimental/syclbin_kernel_bundle.hpp @@ -0,0 +1,83 @@ +//==---- syclbin_kernel_bundle.hpp - SYCLBIN-based kernel_bundle tooling ---==// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#pragma once + +#include +#include + +#include +#include + +#if __has_include() +#include +#endif + +#if __has_include() +#include +#endif + +namespace sycl { +inline namespace _V1 { +namespace ext::oneapi::experimental { + +template +std::enable_if_t> +get_kernel_bundle(const context &Ctxt, const std::vector &Devs, + const sycl::span &Bytes, PropertyListT = {}) { + std::vector UniqueDevices = + sycl::detail::removeDuplicateDevices(Devs); + + sycl::detail::KernelBundleImplPtr Impl = + sycl::detail::get_kernel_bundle_impl(Ctxt, UniqueDevices, Bytes, State); + return sycl::detail::createSyclObjFromImpl>(Impl); +} + +#if __cpp_lib_span +template +std::enable_if_t> +get_kernel_bundle(const context &Ctxt, const std::vector &Devs, + const std::span &Bytes, PropertyListT Props = {}) { + return experimental::get_kernel_bundle( + Ctxt, Devs, sycl::span(Bytes.data(), Bytes.size()), Props); +} +#endif + +#if __cpp_lib_filesystem +template +std::enable_if_t> +get_kernel_bundle(const context &Ctxt, const std::vector &Devs, + const std::filesystem::path &Filename, + PropertyListT Props = {}) { + std::vector RawSYCLBINData; + { + std::ifstream FileStream{Filename, std::ios::binary}; + if (!FileStream.is_open()) + throw sycl::exception(make_error_code(errc::invalid), + "Failed to open SYCLBIN file: " + + Filename.string()); + RawSYCLBINData = + std::vector{std::istreambuf_iterator(FileStream), + std::istreambuf_iterator()}; + } + return experimental::get_kernel_bundle( + Ctxt, Devs, sycl::span{RawSYCLBINData}, Props); +} + +template +std::enable_if_t> +get_kernel_bundle(const context &Ctxt, const std::filesystem::path &Filename, + PropertyListT Props = {}) { + return experimental::get_kernel_bundle(Ctxt, Ctxt.get_devices(), + Filename, Props); +} +#endif + +} // namespace ext::oneapi::experimental +} // namespace _V1 +} // namespace sycl diff --git a/sycl/include/sycl/kernel_bundle.hpp b/sycl/include/sycl/kernel_bundle.hpp index 803504d21f585..eece0db7e17bc 100644 --- a/sycl/include/sycl/kernel_bundle.hpp +++ b/sycl/include/sycl/kernel_bundle.hpp @@ -19,7 +19,8 @@ #include // for kernel, kernel_bundle #include // for bundle_state #include // for property_list -#include // for ur_native_handle_t +#include +#include #include #include // PropertyT @@ -639,6 +640,10 @@ __SYCL_EXPORT detail::KernelBundleImplPtr get_kernel_bundle_impl(const context &Ctx, const std::vector &Devs, bundle_state State); +__SYCL_EXPORT detail::KernelBundleImplPtr +get_kernel_bundle_impl(const context &Ctx, const std::vector &Devs, + const sycl::span &Bytes, bundle_state State); + __SYCL_EXPORT const std::vector removeDuplicateDevices(const std::vector &Devs); diff --git a/sycl/include/sycl/sycl.hpp b/sycl/include/sycl/sycl.hpp index ada8a1c72ebfd..8e3d017af2cc3 100644 --- a/sycl/include/sycl/sycl.hpp +++ b/sycl/include/sycl/sycl.hpp @@ -104,6 +104,7 @@ #include #include #include +#include #include #include #include diff --git a/sycl/source/CMakeLists.txt b/sycl/source/CMakeLists.txt index f2e5494fb6218..3201af58d9d3f 100644 --- a/sycl/source/CMakeLists.txt +++ b/sycl/source/CMakeLists.txt @@ -299,6 +299,7 @@ set(SYCL_COMMON_SOURCES "detail/reduction.cpp" "detail/sampler_impl.cpp" "detail/stream_impl.cpp" + "detail/syclbin.cpp" "detail/scheduler/commands.cpp" "detail/scheduler/leaves_collection.cpp" "detail/scheduler/scheduler.cpp" diff --git a/sycl/source/detail/base64.hpp b/sycl/source/detail/base64.hpp new file mode 100644 index 0000000000000..301cb7ee37716 --- /dev/null +++ b/sycl/source/detail/base64.hpp @@ -0,0 +1,121 @@ +//===--- Base64.h - Base64 Encoder/Decoder ----------------------*- C++ -*-===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +// Adjusted copy of llvm/include/llvm/Support/Base64.h. +// TODO: Remove once we can consistently link the SYCL runtime library with +// LLVMSupport. + +#pragma once + +#include +#include +#include +#include + +namespace sycl { +inline namespace _V1 { +namespace detail { + +class Base64 { +private: + // Decode a single character. + static inline int decode(char Ch) { + if (Ch >= 'A' && Ch <= 'Z') // 0..25 + return Ch - 'A'; + else if (Ch >= 'a' && Ch <= 'z') // 26..51 + return Ch - 'a' + 26; + else if (Ch >= '0' && Ch <= '9') // 52..61 + return Ch - '0' + 52; + else if (Ch == '+') // 62 + return 62; + else if (Ch == '/') // 63 + return 63; + return -1; + } + + // Decode a quadruple of characters. + static inline void decode4(const char *Src, byte *Dst) { + int BadCh = -1; + + for (auto I = 0; I < 4; ++I) { + char Ch = Src[I]; + int Byte = decode(Ch); + + if (Byte < 0) { + BadCh = Ch; + break; + } + Dst[I] = (byte)Byte; + } + if (BadCh != -1) + throw sycl::exception(make_error_code(errc::invalid), + "Invalid char in base 64 encoding."); + } + +public: + using byte = uint8_t; + + // Get the size of the encoded byte sequence of given size. + static size_t getDecodedSize(size_t SrcSize) { return (SrcSize * 3 + 3) / 4; } + + // Decode a sequence of given size into a pre-allocated memory. + // Returns the number of bytes in the decoded result or 0 in case of error. + static size_t decode(const char *Src, byte *Dst, size_t SrcSize) { + size_t SrcOff = 0; + size_t DstOff = 0; + + // decode full quads + for (size_t Qch = 0; Qch < SrcSize / 4; ++Qch, SrcOff += 4, DstOff += 3) { + byte Ch[4] = {0, 0, 0, 0}; + decode4(Src + SrcOff, Ch); + + // each quad of chars produces three bytes of output + Dst[DstOff + 0] = Ch[0] | (Ch[1] << 6); + Dst[DstOff + 1] = (Ch[1] >> 2) | (Ch[2] << 4); + Dst[DstOff + 2] = (Ch[2] >> 4) | (Ch[3] << 2); + } + auto RemChars = SrcSize - SrcOff; + + if (RemChars == 0) + return DstOff; + // decode the remainder; variants: + // 2 chars remain - produces single byte + // 3 chars remain - produces two bytes + + if (RemChars != 2 && RemChars != 3) + throw sycl::exception(make_error_code(errc::invalid), + "Invalid encoded sequence length."); + + int Ch0 = decode(Src[SrcOff++]); + int Ch1 = decode(Src[SrcOff++]); + int Ch2 = RemChars == 3 ? decode(Src[SrcOff]) : 0; + + if (Ch0 < 0 || Ch1 < 0 || Ch2 < 0) + throw sycl::exception( + make_error_code(errc::invalid), + "Invalid characters in the encoded sequence remainder."); + Dst[DstOff++] = Ch0 | (Ch1 << 6); + + if (RemChars == 3) + Dst[DstOff++] = (Ch1 >> 2) | (Ch2 << 4); + return DstOff; + } + + // Allocate minimum required amount of memory and decode a sequence of given + // size into it. + // Returns the decoded result. The size can be obtained via getDecodedSize. + static std::unique_ptr decode(const char *Src, size_t SrcSize) { + size_t DstSize = getDecodedSize(SrcSize); + std::unique_ptr Dst(new byte[DstSize]); + decode(Src, Dst.get(), SrcSize); + return Dst; + } +}; + +} // namespace detail +} // namespace _V1 +} // namespace sycl diff --git a/sycl/source/detail/device_binary_image.cpp b/sycl/source/detail/device_binary_image.cpp index 5201a9dfb1e9c..192d6054d53be 100644 --- a/sycl/source/detail/device_binary_image.cpp +++ b/sycl/source/detail/device_binary_image.cpp @@ -162,7 +162,7 @@ RTDeviceBinaryImage::getProperty(const char *PropName) const { return *It; } -void RTDeviceBinaryImage::init(sycl_device_binary Bin) { +RTDeviceBinaryImage::RTDeviceBinaryImage(sycl_device_binary Bin) { ImageId = ImageCounter++; // If there was no binary, we let the owner handle initialization as they see @@ -227,12 +227,11 @@ DynRTDeviceBinaryImage::DynRTDeviceBinaryImage() : RTDeviceBinaryImage() { Bin->DeviceTargetSpec = __SYCL_DEVICE_BINARY_TARGET_UNKNOWN; } -DynRTDeviceBinaryImage::DynRTDeviceBinaryImage( - std::unique_ptr> &&DataPtr, - size_t DataSize) - : DynRTDeviceBinaryImage() { - Data = std::move(DataPtr); - Bin->BinaryStart = reinterpret_cast(Data.get()); +std::unique_ptr CreateDefaultDynBinary( + const std::unique_ptr> &DataPtr, + size_t DataSize) { + auto Bin = std::make_unique(); + Bin->BinaryStart = reinterpret_cast(DataPtr.get()); Bin->BinaryEnd = Bin->BinaryStart + DataSize; Bin->Format = ur::getBinaryImageFormat(Bin->BinaryStart, DataSize); switch (Bin->Format) { @@ -242,9 +241,15 @@ DynRTDeviceBinaryImage::DynRTDeviceBinaryImage( default: Bin->DeviceTargetSpec = __SYCL_DEVICE_BINARY_TARGET_UNKNOWN; } - init(Bin); + return Bin; } +DynRTDeviceBinaryImage::DynRTDeviceBinaryImage( + std::unique_ptr> &&DataPtr, + size_t DataSize) + : RTDeviceBinaryImage(CreateDefaultDynBinary(DataPtr, DataSize).release()), + Data{std::move(DataPtr)} {} + DynRTDeviceBinaryImage::~DynRTDeviceBinaryImage() { delete Bin; Bin = nullptr; @@ -479,8 +484,6 @@ static void copyProperty(sycl_device_binary_property &NextFreeProperty, DynRTDeviceBinaryImage::DynRTDeviceBinaryImage( const std::vector &Imgs) : DynRTDeviceBinaryImage() { - init(nullptr); - // Naive merges. auto MergedSpecConstants = naiveMergeBinaryProperties(Imgs, [](const RTDeviceBinaryImage &Img) { @@ -675,18 +678,11 @@ DynRTDeviceBinaryImage::DynRTDeviceBinaryImage( #ifdef SYCL_RT_ZSTD_AVAILABLE CompressedRTDeviceBinaryImage::CompressedRTDeviceBinaryImage( sycl_device_binary CompressedBin) - : RTDeviceBinaryImage() { - - // 'CompressedBin' is part of the executable image loaded into memory - // which can't be modified easily. So, we need to make a copy of it. - Bin = new sycl_device_binary_struct(*CompressedBin); - + : RTDeviceBinaryImage(new sycl_device_binary_struct(*CompressedBin)) { // Get the decompressed size of the binary image. m_ImageSize = ZSTDCompressor::GetDecompressedSize( reinterpret_cast(Bin->BinaryStart), static_cast(Bin->BinaryEnd - Bin->BinaryStart)); - - init(Bin); } void CompressedRTDeviceBinaryImage::Decompress() { diff --git a/sycl/source/detail/device_binary_image.hpp b/sycl/source/detail/device_binary_image.hpp index 872cad84ced27..6a0103be7b873 100644 --- a/sycl/source/detail/device_binary_image.hpp +++ b/sycl/source/detail/device_binary_image.hpp @@ -140,7 +140,7 @@ class RTDeviceBinaryImage { public: RTDeviceBinaryImage() : Bin(nullptr) {} - RTDeviceBinaryImage(sycl_device_binary Bin) { init(Bin); } + RTDeviceBinaryImage(sycl_device_binary Bin); // Explicitly delete copy constructor/operator= to avoid unintentional copies RTDeviceBinaryImage(const RTDeviceBinaryImage &) = delete; RTDeviceBinaryImage &operator=(const RTDeviceBinaryImage &) = delete; @@ -247,8 +247,6 @@ class RTDeviceBinaryImage { } protected: - void init(); - void init(sycl_device_binary Bin); sycl_device_binary get() const { return Bin; } sycl_device_binary Bin; diff --git a/sycl/source/detail/device_image_impl.hpp b/sycl/source/detail/device_image_impl.hpp index 83af5b246683a..50dc29f02a4c3 100644 --- a/sycl/source/detail/device_image_impl.hpp +++ b/sycl/source/detail/device_image_impl.hpp @@ -20,6 +20,7 @@ #include #include #include +#include #include #include #include @@ -53,6 +54,7 @@ using include_pairs_t = constexpr uint8_t ImageOriginSYCLOffline = 1; constexpr uint8_t ImageOriginInterop = 1 << 1; constexpr uint8_t ImageOriginKernelCompiler = 1 << 2; +constexpr uint8_t ImageOriginSYCLBIN = 1 << 3; // Helper class to track and unregister shared SYCL device_globals. class ManagedDeviceGlobalsRegistry { @@ -160,16 +162,11 @@ struct KernelCompilerBinaryInfo { include_pairs_t &&IncludePairsVec) : MLanguage{Lang}, MIncludePairs{std::move(IncludePairsVec)} {} - KernelCompilerBinaryInfo(syclex::source_language Lang, - KernelNameSetT &&KernelNames) - : MLanguage{Lang}, MKernelNames{std::move(KernelNames)} {} - KernelCompilerBinaryInfo( - syclex::source_language Lang, KernelNameSetT &&KernelNames, - MangledKernelNameMapT &&MangledKernelNames, std::string &&Prefix, + syclex::source_language Lang, MangledKernelNameMapT &&MangledKernelNames, + std::string &&Prefix, std::shared_ptr &&DeviceGlobalRegistry) - : MLanguage{Lang}, MKernelNames{std::move(KernelNames)}, - MMangledKernelNames{std::move(MangledKernelNames)}, + : MLanguage{Lang}, MMangledKernelNames{std::move(MangledKernelNames)}, MPrefixes{std::move(Prefix)}, MDeviceGlobalRegistries{std::move(DeviceGlobalRegistry)} {} @@ -193,9 +190,6 @@ struct KernelCompilerBinaryInfo { "Linking binaries with different source " "languages is not currently supported."); - for (const std::string &KernelName : RTCInfo->MKernelNames) - Result->MKernelNames.insert(KernelName); - Result->MMangledKernelNames.insert(RTCInfo->MMangledKernelNames.begin(), RTCInfo->MMangledKernelNames.end()); @@ -226,7 +220,6 @@ struct KernelCompilerBinaryInfo { } syclex::source_language MLanguage; - KernelNameSetT MKernelNames; MangledKernelNameMapT MMangledKernelNames; std::set MPrefixes; include_pairs_t MIncludePairs; @@ -272,10 +265,12 @@ class device_image_impl { ur_program_handle_t Program, const SpecConstMapT &SpecConstMap, const std::vector &SpecConstsBlob, uint8_t Origins, std::optional &&RTCInfo, + KernelNameSetT &&KernelNames, std::unique_ptr &&MergedImageStorage = nullptr) : MBinImage(BinImage), MContext(std::move(Context)), MDevices(std::move(Devices)), MState(State), MProgram(Program), - MKernelIDs(std::move(KernelIDs)), MSpecConstsBlob(SpecConstsBlob), + MKernelIDs(std::move(KernelIDs)), MKernelNames{std::move(KernelNames)}, + MSpecConstsBlob(SpecConstsBlob), MSpecConstsDefValBlob(getSpecConstsDefValBlob()), MSpecConstSymMap(SpecConstMap), MOrigins(Origins), MRTCBinInfo(std::move(RTCInfo)), @@ -288,9 +283,10 @@ class device_image_impl { : MBinImage(BinImage), MContext(std::move(Context)), MDevices(std::move(Devices)), MState(State), MProgram(Program), MKernelIDs(std::make_shared>()), + MKernelNames{std::move(KernelNames)}, MSpecConstsDefValBlob(getSpecConstsDefValBlob()), MOrigins(ImageOriginKernelCompiler), - MRTCBinInfo(KernelCompilerBinaryInfo{Lang, std::move(KernelNames)}) { + MRTCBinInfo(KernelCompilerBinaryInfo{Lang}) { updateSpecConstSymMap(); } @@ -303,12 +299,12 @@ class device_image_impl { std::shared_ptr &&DeviceGlobalRegistry) : MBinImage(BinImage), MContext(std::move(Context)), MDevices(std::move(Devices)), MState(State), MProgram(nullptr), - MKernelIDs(std::move(KernelIDs)), + MKernelIDs(std::move(KernelIDs)), MKernelNames{std::move(KernelNames)}, MSpecConstsDefValBlob(getSpecConstsDefValBlob()), MOrigins(ImageOriginKernelCompiler), MRTCBinInfo(KernelCompilerBinaryInfo{ - Lang, std::move(KernelNames), std::move(MangledKernelNames), - std::move(Prefix), std::move(DeviceGlobalRegistry)}) { + Lang, std::move(MangledKernelNames), std::move(Prefix), + std::move(DeviceGlobalRegistry)}) { updateSpecConstSymMap(); } @@ -347,9 +343,10 @@ class device_image_impl { MContext(std::move(Context)), MDevices(std::move(Devices)), MState(State), MProgram(Program), MKernelIDs(std::make_shared>()), + MKernelNames{std::move(KernelNames)}, MSpecConstsDefValBlob(getSpecConstsDefValBlob()), MOrigins(ImageOriginKernelCompiler), - MRTCBinInfo(KernelCompilerBinaryInfo{Lang, std::move(KernelNames)}) {} + MRTCBinInfo(KernelCompilerBinaryInfo{Lang}) {} bool has_kernel(const kernel_id &KernelIDCand) const noexcept { return std::binary_search(MKernelIDs->begin(), MKernelIDs->end(), @@ -599,6 +596,14 @@ class device_image_impl { } std::string adjustKernelName(std::string_view Name) const { + if (MOrigins & ImageOriginSYCLBIN) { + constexpr std::string_view KernelPrefix = "__sycl_kernel_"; + if (Name.size() > KernelPrefix.size() && + Name.substr(0, KernelPrefix.size()) == KernelPrefix) + return Name.data(); + return std::string{KernelPrefix} + Name.data(); + } + if (!MRTCBinInfo.has_value()) return Name.data(); @@ -611,22 +616,24 @@ class device_image_impl { return Name.data(); } - bool hasKernelName(const std::string &Name) const { - return MRTCBinInfo.has_value() && !Name.empty() && - MRTCBinInfo->MKernelNames.find(adjustKernelName(Name)) != - MRTCBinInfo->MKernelNames.end(); + bool hasKernelName(std::string_view Name) const { + return (getOriginMask() & + (ImageOriginKernelCompiler | ImageOriginSYCLBIN)) && + !Name.empty() && + MKernelNames.find(adjustKernelName(Name)) != MKernelNames.end(); } - std::shared_ptr tryGetSourceBasedKernel( - std::string_view Name, const context &Context, - const std::shared_ptr &OwnerBundle, - const std::shared_ptr &Self) const { - if (!(getOriginMask() & ImageOriginKernelCompiler)) + std::shared_ptr + tryGetExtensionKernel(std::string_view Name, const context &Context, + const std::shared_ptr &OwnerBundle, + const std::shared_ptr &Self) const { + if (!(getOriginMask() & ImageOriginKernelCompiler) && + !((getOriginMask() & ImageOriginSYCLBIN) && hasKernelName(Name))) return nullptr; - assert(MRTCBinInfo); std::string AdjustedName = adjustKernelName(Name); - if (MRTCBinInfo->MLanguage == syclex::source_language::sycl) { + if (MRTCBinInfo && + MRTCBinInfo->MLanguage == syclex::source_language::sycl) { auto &PM = ProgramManager::getInstance(); for (const std::string &Prefix : MRTCBinInfo->MPrefixes) { auto KID = PM.tryGetSYCLKernelID(Prefix + AdjustedName); @@ -684,6 +691,8 @@ class device_image_impl { return MRTCBinInfo; } + const KernelNameSetT &getKernelNames() const noexcept { return MKernelNames; } + bool isNonSYCLSourceBased() const noexcept { return (getOriginMask() & ImageOriginKernelCompiler) && !isFromSourceLanguage(syclex::source_language::sycl); @@ -763,7 +772,7 @@ class device_image_impl { nullptr); std::vector KernelNames = - getKernelNamesFromURProgram(Adapter, UrProgram); + ProgramManager::getKernelNamesFromURProgram(Adapter, UrProgram); KernelNameSetT KernelNameSet{KernelNames.begin(), KernelNames.end()}; // If caching enabled and kernel not fetched from cache, cache. @@ -1246,24 +1255,8 @@ class device_image_impl { return UrProgram; } - static std::vector - getKernelNamesFromURProgram(const AdapterPtr &Adapter, - ur_program_handle_t UrProgram) { - // Get the kernel names. - size_t KernelNamesSize; - Adapter->call( - UrProgram, UR_PROGRAM_INFO_KERNEL_NAMES, 0, nullptr, &KernelNamesSize); - - // semi-colon delimited list of kernel names. - std::string KernelNamesStr(KernelNamesSize, ' '); - Adapter->call( - UrProgram, UR_PROGRAM_INFO_KERNEL_NAMES, KernelNamesStr.size(), - &KernelNamesStr[0], nullptr); - return detail::split_string(KernelNamesStr, ';'); - } - const std::variant, - const RTDeviceBinaryImage *> + const RTDeviceBinaryImage *, SYCLBINBinaries> MBinImage = static_cast(nullptr); context MContext; std::vector MDevices; @@ -1275,6 +1268,9 @@ class device_image_impl { // according to LessByNameComp std::shared_ptr> MKernelIDs; + // List of known kernel names. + KernelNameSetT MKernelNames; + // A mutex for sycnhronizing access to spec constants blob. Mutable because // needs to be locked in the const method for getting spec constant value. mutable std::mutex MSpecConstAccessMtx; diff --git a/sycl/source/detail/kernel_bundle_impl.hpp b/sycl/source/detail/kernel_bundle_impl.hpp index ec7ff702ee8ac..1875226086f6b 100644 --- a/sycl/source/detail/kernel_bundle_impl.hpp +++ b/sycl/source/detail/kernel_bundle_impl.hpp @@ -13,6 +13,7 @@ #include #include #include +#include #include #include #include @@ -148,6 +149,10 @@ class kernel_bundle_impl { "Not all devices are in the set of associated " "devices for input bundle or vector of devices is empty"); + // Copy SYCLBINs to ensure lifetime is preserved by the executable bundle. + MSYCLBINs.insert(MSYCLBINs.end(), InputBundleImpl->MSYCLBINs.begin(), + InputBundleImpl->MSYCLBINs.end()); + for (const DevImgPlainWithDeps &DevImgWithDeps : InputBundleImpl->MDeviceImages) { // Skip images which are not compatible with devices provided @@ -275,17 +280,13 @@ class kernel_bundle_impl { std::set> SeenKernelNames; std::set> Conflicts; for (const device_image_plain &DevImage : DevImages) { - const std::optional &RTCInfo = - getSyclObjImpl(DevImage)->getRTCInfo(); - if (!RTCInfo.has_value()) - continue; + const KernelNameSetT &KernelNames = + getSyclObjImpl(DevImage)->getKernelNames(); std::vector Intersect; std::set_intersection(SeenKernelNames.begin(), SeenKernelNames.end(), - RTCInfo->MKernelNames.begin(), - RTCInfo->MKernelNames.end(), + KernelNames.begin(), KernelNames.end(), std::inserter(Conflicts, Conflicts.begin())); - SeenKernelNames.insert(RTCInfo->MKernelNames.begin(), - RTCInfo->MKernelNames.end()); + SeenKernelNames.insert(KernelNames.begin(), KernelNames.end()); } if (!Conflicts.empty()) { @@ -455,12 +456,27 @@ class kernel_bundle_impl { "Not all input bundles have the same set of associated devices."); } + // Pre-count and reserve space in vectors. + { + size_t NumDevImgs = 0, NumSharedDevBins = 0, NumSYCLBINs = 0; + for (const detail::KernelBundleImplPtr &Bundle : Bundles) { + NumDevImgs += Bundle->MDeviceImages.size(); + NumSharedDevBins += Bundle->MSharedDeviceBinaries.size(); + NumSYCLBINs += Bundle->MSYCLBINs.size(); + } + MDeviceImages.reserve(NumDevImgs); + MSharedDeviceBinaries.reserve(NumSharedDevBins); + MSYCLBINs.reserve(NumSYCLBINs); + } + for (const detail::KernelBundleImplPtr &Bundle : Bundles) { MDeviceImages.insert(MDeviceImages.end(), Bundle->MDeviceImages.begin(), Bundle->MDeviceImages.end()); MSharedDeviceBinaries.insert(MSharedDeviceBinaries.end(), Bundle->MSharedDeviceBinaries.begin(), Bundle->MSharedDeviceBinaries.end()); + MSYCLBINs.insert(MSYCLBINs.end(), Bundle->MSYCLBINs.begin(), + Bundle->MSYCLBINs.end()); } fillUniqueDeviceImages(); @@ -540,6 +556,32 @@ class kernel_bundle_impl { MDeviceImages.emplace_back(DevImg); } + // SYCLBIN constructor + kernel_bundle_impl(const context &Context, const std::vector &Devs, + const sycl::span &Bytes, bundle_state State) + : MContext(Context), MDevices(Devs), MState(State) { + common_ctor_checks(); + + auto &SYCLBIN = MSYCLBINs.emplace_back( + std::make_shared(Bytes.data(), Bytes.size())); + + if (SYCLBIN->getState() != static_cast(State)) + throw sycl::exception( + make_error_code(errc::invalid), + "kernel_bundle state does not match the state of the SYCLBIN file."); + + std::vector BestImages = + SYCLBIN->getBestCompatibleImages(Devs); + MDeviceImages.reserve(BestImages.size()); + for (const detail::RTDeviceBinaryImage *Image : BestImages) + MDeviceImages.emplace_back(std::make_shared( + Image, Context, Devs, ProgramManager::getBinImageState(Image), + /*KernelIDs=*/nullptr, /*URProgram=*/nullptr, ImageOriginSYCLBIN)); + ProgramManager::getInstance().bringSYCLDeviceImagesToState(MDeviceImages, + State); + fillUniqueDeviceImages(); + } + std::shared_ptr build_from_source( const std::vector Devices, const std::vector &BuildOptions, @@ -600,9 +642,10 @@ class kernel_bundle_impl { kernel ext_oneapi_get_kernel(const std::string &Name, const std::shared_ptr &Self) const { - if (!hasSourceBasedImages()) + if (!hasSourceBasedImages() && !hasSYCLBINImages()) throw sycl::exception(make_error_code(errc::invalid), "'ext_oneapi_get_kernel' is only available in " + "kernel_bundles created from SYCLBIN files and " "kernel_bundles successfully built from " "kernel_bundle."); @@ -615,8 +658,8 @@ class kernel_bundle_impl { const std::shared_ptr &DevImgImpl = getSyclObjImpl(DevImg); if (std::shared_ptr PotentialKernelImpl = - DevImgImpl->tryGetSourceBasedKernel(Name, MContext, Self, - DevImgImpl)) + DevImgImpl->tryGetExtensionKernel(Name, MContext, Self, + DevImgImpl)) return detail::createSyclObjFromImpl( std::move(PotentialKernelImpl)); } @@ -625,12 +668,12 @@ class kernel_bundle_impl { } std::string ext_oneapi_get_raw_kernel_name(const std::string &Name) { - if (!hasSourceBasedImages()) - throw sycl::exception( - make_error_code(errc::invalid), - "'ext_oneapi_get_raw_kernel_name' is only available in " - "kernel_bundles successfully built from " - "kernel_bundle."); + if (!hasSourceBasedImages() && !hasSYCLBINImages()) + throw sycl::exception(make_error_code(errc::invalid), + "'ext_oneapi_get_raw_kernel_name' is only " + "available in kernel_bundles created from SYCLBIN " + "files and kernel_bundles successfully built from " + "kernel_bundle."); auto It = std::find_if(begin(), end(), [&Name](const device_image_plain &DevImg) { @@ -863,6 +906,12 @@ class kernel_bundle_impl { }); } + bool hasSYCLBINImages() const noexcept { + return std::any_of(begin(), end(), [](const device_image_plain &DevImg) { + return getSyclObjImpl(DevImg)->getOriginMask() & ImageOriginSYCLBIN; + }); + } + bool hasSYCLOfflineImages() const noexcept { return std::any_of(begin(), end(), [](const device_image_plain &DevImg) { return getSyclObjImpl(DevImg)->getOriginMask() & ImageOriginSYCLOffline; @@ -952,8 +1001,8 @@ class kernel_bundle_impl { const std::shared_ptr &DevImgImpl = getSyclObjImpl(DevImg); if (std::shared_ptr SourceBasedKernel = - DevImgImpl->tryGetSourceBasedKernel(Name, MContext, Self, - DevImgImpl)) + DevImgImpl->tryGetExtensionKernel(Name, MContext, Self, + DevImgImpl)) return SourceBasedKernel; } @@ -1013,6 +1062,11 @@ class kernel_bundle_impl { // device globals prior to unregistering the binaries. std::vector> MSharedDeviceBinaries; + // SYCLBINs manage their own binary information, so if we have any we store + // them. These are stored as shared_ptr to ensure they stay alive across + // kernel_bundles that use them. + std::vector> MSYCLBINs; + std::vector MDeviceImages; std::vector MUniqueDeviceImages; // This map stores values for specialization constants, that are missing diff --git a/sycl/source/detail/program_manager/program_manager.cpp b/sycl/source/detail/program_manager/program_manager.cpp index 86738958bb0f5..3bd0d50a9302a 100644 --- a/sycl/source/detail/program_manager/program_manager.cpp +++ b/sycl/source/detail/program_manager/program_manager.cpp @@ -2394,7 +2394,8 @@ ProgramManager::getEliminatedKernelArgMask(ur_program_handle_t NativePrg, return nullptr; } -static bundle_state getBinImageState(const RTDeviceBinaryImage *BinImage) { +bundle_state +ProgramManager::getBinImageState(const RTDeviceBinaryImage *BinImage) { auto IsAOTBinary = [](const char *Format) { return ((strcmp(Format, __SYCL_DEVICE_BINARY_TARGET_SPIRV64_X86_64) == 0) || (strcmp(Format, __SYCL_DEVICE_BINARY_TARGET_SPIRV64_GEN) == 0) || @@ -2414,6 +2415,22 @@ static bundle_state getBinImageState(const RTDeviceBinaryImage *BinImage) { : sycl::bundle_state::object; } +std::vector +ProgramManager::getKernelNamesFromURProgram(const AdapterPtr &Adapter, + ur_program_handle_t UrProgram) { + // Get the kernel names. + size_t KernelNamesSize; + Adapter->call( + UrProgram, UR_PROGRAM_INFO_KERNEL_NAMES, 0, nullptr, &KernelNamesSize); + + // semi-colon delimited list of kernel names. + std::string KernelNamesStr(KernelNamesSize, ' '); + Adapter->call( + UrProgram, UR_PROGRAM_INFO_KERNEL_NAMES, KernelNamesStr.size(), + &KernelNamesStr[0], nullptr); + return detail::split_string(KernelNamesStr, ';'); +} + std::optional ProgramManager::tryGetSYCLKernelID(KernelNameStrRefT KernelName) { std::lock_guard KernelIDsGuard(m_KernelIDsMutex); @@ -2941,6 +2958,8 @@ ProgramManager::compile(const DevImgPlainWithDeps &ImgWithDeps, if (InputImpl->get_bin_image_ref()->supportsSpecConstants()) setSpecializationConstants(InputImpl, Prog, Adapter); + KernelNameSetT KernelNames = InputImpl->getKernelNames(); + std::optional RTCInfo = InputImpl->getRTCInfo(); DeviceImageImplPtr ObjectImpl = std::make_shared( @@ -2949,7 +2968,7 @@ ProgramManager::compile(const DevImgPlainWithDeps &ImgWithDeps, InputImpl->get_kernel_ids_ptr(), Prog, InputImpl->get_spec_const_data_ref(), InputImpl->get_spec_const_blob_ref(), InputImpl->getOriginMask(), - std::move(RTCInfo)); + std::move(RTCInfo), std::move(KernelNames)); std::string CompileOptions; applyCompileOptionsFromEnvironment(CompileOptions); @@ -3133,10 +3152,20 @@ ProgramManager::link(const std::vector &Imgs, std::vector *> RTCInfoPtrs; RTCInfoPtrs.reserve(Imgs.size()); + KernelNameSetT MergedKernelNames; for (const device_image_plain &DevImg : Imgs) { const DeviceImageImplPtr &DevImgImpl = getSyclObjImpl(DevImg); CombinedOrigins |= DevImgImpl->getOriginMask(); RTCInfoPtrs.emplace_back(&(DevImgImpl->getRTCInfo())); + MergedKernelNames.insert(DevImgImpl->getKernelNames().begin(), + DevImgImpl->getKernelNames().end()); + if (DevImgImpl->getOriginMask() & ImageOriginSYCLBIN) { + // SYCLBIN binaries should gather their kernels from the backend. + std::vector GatheredKernelNames = + getKernelNamesFromURProgram(Adapter, LinkedProg); + MergedKernelNames.insert(GatheredKernelNames.begin(), + GatheredKernelNames.end()); + } } auto MergedRTCInfo = detail::KernelCompilerBinaryInfo::Merge(RTCInfoPtrs); @@ -3146,7 +3175,7 @@ ProgramManager::link(const std::vector &Imgs, bundle_state::executable, std::move(KernelIDs), LinkedProg, std::move(NewSpecConstMap), std::move(NewSpecConstBlob), CombinedOrigins, std::move(MergedRTCInfo), - std::move(MergedImageStorage)); + std::move(MergedKernelNames), std::move(MergedImageStorage)); // TODO: Make multiple sets of device images organized by devices they are // compiled for. @@ -3202,6 +3231,9 @@ ProgramManager::build(const DevImgPlainWithDeps &DevImgWithDeps, SpecConstMap = MainInputImpl->get_spec_const_data_ref(); } + ur_program_handle_t ResProgram = getBuiltURProgram( + std::move(BinImgs), ContextImpl, Devs, &DevImgWithDeps, SpecConstBlob); + // The origin becomes the combination of all the origins. uint8_t CombinedOrigins = 0; for (const device_image_plain &DevImg : DevImgWithDeps) @@ -3210,18 +3242,28 @@ ProgramManager::build(const DevImgPlainWithDeps &DevImgWithDeps, std::vector *> RTCInfoPtrs; RTCInfoPtrs.reserve(DevImgWithDeps.size()); - for (const device_image_plain &DevImg : DevImgWithDeps) - RTCInfoPtrs.emplace_back(&(getSyclObjImpl(DevImg)->getRTCInfo())); + KernelNameSetT MergedKernelNames; + for (const device_image_plain &DevImg : DevImgWithDeps) { + const auto &DevImgImpl = getSyclObjImpl(DevImg); + RTCInfoPtrs.emplace_back(&(DevImgImpl->getRTCInfo())); + MergedKernelNames.insert(DevImgImpl->getKernelNames().begin(), + DevImgImpl->getKernelNames().end()); + if (DevImgImpl->getOriginMask() & ImageOriginSYCLBIN) { + // SYCLBIN binaries should gather their kernels from the backend. + std::vector GatheredKernelNames = + getKernelNamesFromURProgram(ContextImpl.getAdapter(), ResProgram); + MergedKernelNames.insert(GatheredKernelNames.begin(), + GatheredKernelNames.end()); + } + } auto MergedRTCInfo = detail::KernelCompilerBinaryInfo::Merge(RTCInfoPtrs); - ur_program_handle_t ResProgram = getBuiltURProgram( - std::move(BinImgs), ContextImpl, Devs, &DevImgWithDeps, SpecConstBlob); - DeviceImageImplPtr ExecImpl = std::make_shared( ResultBinImg, Context, std::vector{Devs}, bundle_state::executable, std::move(KernelIDs), ResProgram, std::move(SpecConstMap), std::move(SpecConstBlob), CombinedOrigins, - std::move(MergedRTCInfo), std::move(MergedImageStorage)); + std::move(MergedRTCInfo), std::move(MergedKernelNames), + std::move(MergedImageStorage)); return createSyclObjFromImpl(std::move(ExecImpl)); } diff --git a/sycl/source/detail/program_manager/program_manager.hpp b/sycl/source/detail/program_manager/program_manager.hpp index 7d0d7d01b86c8..6c753278a571e 100644 --- a/sycl/source/detail/program_manager/program_manager.hpp +++ b/sycl/source/detail/program_manager/program_manager.hpp @@ -387,6 +387,12 @@ class ProgramManager { const device &Dev, bool ErrorOnUnresolvableImport); + static bundle_state getBinImageState(const RTDeviceBinaryImage *BinImage); + + static std::vector + getKernelNamesFromURProgram(const AdapterPtr &Adapter, + ur_program_handle_t UrProgram); + private: ProgramManager(ProgramManager const &) = delete; ProgramManager &operator=(ProgramManager const &) = delete; diff --git a/sycl/source/detail/property_set_io.hpp b/sycl/source/detail/property_set_io.hpp new file mode 100644 index 0000000000000..860c1ef27f50b --- /dev/null +++ b/sycl/source/detail/property_set_io.hpp @@ -0,0 +1,367 @@ +//==-- PropertySetIO.h -- models a sequence of property sets and their I/O -==// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +// Adjusted copy of llvm/include/llvm/Support/PropertySetIO.h. +// TODO: Remove once we can consistently link the SYCL runtime library with +// LLVMSupport. + +#pragma once + +#include "detail/base64.hpp" +#include "sycl/exception.hpp" + +#include + +namespace sycl { +inline namespace _V1 { +namespace detail { + +// Helper function for converting a string_view to an integer. Allows only +// integer values and the empty string (interpreted as 0). +template +static IntT stringViewToInt(const std::string_view &SV) { + static_assert(std::is_integral_v); + + IntT Result = static_cast(0); + if (SV.empty()) + return Result; + + bool Negate = std::is_signed_v && SV[0] == '-'; + + for (size_t I = static_cast(Negate); I < SV.size(); ++I) { + const char CurrentC = SV[I]; + if (CurrentC < '0' || CurrentC > '9') + throw sycl::exception(make_error_code(errc::invalid), + "Invalid integer numeral: " + + std::string{CurrentC}); + Result *= static_cast(10); + Result += static_cast(CurrentC - '0'); + } + return Negate ? -Result : Result; +} + +// Represents a property value. PropertyValue name is stored in the encompassing +// container. +class PropertyValue { +public: + // Type of the size of the value. Value size gets serialized along with the + // value data in some cases for later reading at runtime, so size_t is not + // suitable as its size varies. + using SizeTy = uint64_t; + using byte = uint8_t; + + // Defines supported property types + enum Type { first = 0, NONE = first, UINT32, BYTE_ARRAY, last = BYTE_ARRAY }; + + // Translates C++ type to the corresponding type tag. + template static Type getTypeTag() { + static_assert(std::is_same_v || std::is_same_v); + if constexpr (std::is_same_v) { + return UINT32; + } else { + return BYTE_ARRAY; + } + } + + // Casts from int value to a type tag. + static Type getTypeTag(int T) { + if (T < first || T > last) + throw sycl::exception(make_error_code(errc::invalid), + "Bad property type."); + return static_cast(T); + } + + ~PropertyValue() { + if ((getType() == BYTE_ARRAY) && Val.ByteArrayVal) + delete[] Val.ByteArrayVal; + } + + PropertyValue() = default; + PropertyValue(Type T) : Ty(T) {} + + PropertyValue(uint32_t Val) : Ty(UINT32), Val({Val}) {} + PropertyValue(const byte *Data, SizeTy DataBitSize) { + constexpr int ByteSizeInBits = 8; + Ty = BYTE_ARRAY; + SizeTy DataSize = (DataBitSize + (ByteSizeInBits - 1)) / ByteSizeInBits; + constexpr size_t SizeFieldSize = sizeof(SizeTy); + + // Allocate space for size and data. + Val.ByteArrayVal = new byte[SizeFieldSize + DataSize]; + + // Write the size into first bytes. + for (size_t I = 0; I < SizeFieldSize; ++I) { + Val.ByteArrayVal[I] = (byte)DataBitSize; + DataBitSize >>= ByteSizeInBits; + } + // Append data. + std::memcpy(Val.ByteArrayVal + SizeFieldSize, Data, DataSize); + } + template + PropertyValue(const C &Data) + : PropertyValue(reinterpret_cast(Data.data()), + Data.size() * sizeof(T) * /* bits in one byte */ 8) {} + PropertyValue(const std::string_view Str) + : PropertyValue(reinterpret_cast(Str.data()), + Str.size() * sizeof(char) * /* bits in one byte */ 8) {} + PropertyValue(const PropertyValue &P) { *this = P; } + PropertyValue(PropertyValue &&P) { *this = std::move(P); } + + PropertyValue &operator=(PropertyValue &&P) { + copy(P); + + if (P.getType() == BYTE_ARRAY) + P.Val.ByteArrayVal = nullptr; + P.Ty = NONE; + return *this; + } + + PropertyValue &operator=(const PropertyValue &P) { + if (P.getType() == BYTE_ARRAY) + *this = PropertyValue(P.asByteArray(), P.getByteArraySizeInBits()); + else + copy(P); + return *this; + } + + // get property value as unsigned 32-bit integer + uint32_t asUint32() const { + if (Ty != UINT32) + throw sycl::exception(make_error_code(errc::invalid), + "Must be UINT32 value."); + return Val.UInt32Val; + } + + // Get raw data size in bits. + SizeTy getByteArraySizeInBits() const { + if (Ty != BYTE_ARRAY) + throw sycl::exception(make_error_code(errc::invalid), + "Must be BYTE_ARRAY value."); + SizeTy Res = 0; + + for (size_t I = 0; I < sizeof(SizeTy); ++I) + Res |= (SizeTy)Val.ByteArrayVal[I] << (8 * I); + return Res; + } + + // Get byte array data size in bytes. + SizeTy getByteArraySize() const { + SizeTy SizeInBits = getByteArraySizeInBits(); + constexpr unsigned int MASK = 0x7; + return ((SizeInBits + MASK) & ~MASK) / 8; + } + + // Get byte array data size in bytes, including the leading bytes encoding the + // size. + SizeTy getRawByteArraySize() const { + return getByteArraySize() + sizeof(SizeTy); + } + + // Get byte array data including the leading bytes encoding the size. + const byte *asRawByteArray() const { + if (Ty != BYTE_ARRAY) + throw sycl::exception(make_error_code(errc::invalid), + "Must be BYTE_ARRAY value."); + return Val.ByteArrayVal; + } + + // Get byte array data excluding the leading bytes encoding the size. + const byte *asByteArray() const { + if (Ty != BYTE_ARRAY) + throw sycl::exception(make_error_code(errc::invalid), + "Must be BYTE_ARRAY value."); + return Val.ByteArrayVal + sizeof(SizeTy); + } + + bool isValid() const { return getType() != NONE; } + + // set property value; the 'T' type must be convertible to a property type tag + template void set(T V) { + if (getTypeTag() != Ty) + throw sycl::exception(make_error_code(errc::invalid), + "Invalid type tag for this operation."); + getValueRef() = V; + } + + Type getType() const { return Ty; } + + SizeTy size() const { + switch (Ty) { + case UINT32: + return sizeof(Val.UInt32Val); + case BYTE_ARRAY: + return getRawByteArraySize(); + default: + throw sycl::exception(make_error_code(errc::invalid), + "Unsupported property type."); + } + } + + const char *data() const { + switch (Ty) { + case UINT32: + return reinterpret_cast(&Val.UInt32Val); + case BYTE_ARRAY: + return reinterpret_cast(Val.ByteArrayVal); + default: + throw sycl::exception(make_error_code(errc::invalid), + "Unsupported property type."); + } + } + +private: + template T &getValueRef() { + static_assert(std::is_same_v || std::is_same_v); + if constexpr (std::is_same_v) { + return Val.UInt32Val; + } else { + return Val.ByteArrayVal; + } + } + + void copy(const PropertyValue &P) { + Ty = P.Ty; + Val = P.Val; + } + + Type Ty = NONE; + // TODO: replace this union with std::variant when uplifting to C++17 + union { + uint32_t UInt32Val; + // Holds first sizeof(size_t) bytes of size followed by actual raw data. + byte *ByteArrayVal; + } Val; +}; + +using PropertySet = std::unordered_map; + +/// A registry of property sets. Maps a property set name to its +/// content. +/// +/// The order of keys is preserved and corresponds to the order of insertion. +class PropertySetRegistry { +public: + using MapTy = std::unordered_map; + + // SYCLBIN specific property sets. + static constexpr char SYCLBIN_GLOBAL_METADATA[] = "SYCLBIN/global metadata"; + static constexpr char SYCLBIN_IR_MODULE_METADATA[] = + "SYCLBIN/ir module metadata"; + static constexpr char SYCLBIN_NATIVE_DEVICE_CODE_IMAGE_METADATA[] = + "SYCLBIN/native device code image metadata"; + + static std::unique_ptr read(std::string_view Src) { + auto Res = std::make_unique(); + PropertySet *CurPropSet = nullptr; + + // special case when there is no property data, i.e. the resulting property + // set registry should be empty + if (Src.size() == 0) + return Res; + + size_t CurrentStart = 0; + while (CurrentStart < Src.size()) { + size_t CurrentEnd = CurrentStart; + size_t SkipChars = 0; + for (CurrentEnd = CurrentStart; CurrentEnd < Src.size(); ++CurrentEnd) { + if (Src[CurrentEnd] == '\n') { + SkipChars = 1; + break; + } + if (Src[CurrentEnd] == '\r' && CurrentEnd + 1 != Src.size() && + Src[CurrentEnd + 1] == '\n') { + SkipChars = 2; + break; + } + } + + std::string_view Line = + Src.substr(CurrentStart, CurrentEnd - CurrentStart); + CurrentStart = CurrentEnd + SkipChars; + + // see if this line starts a new property set + if (Line.front() == '[') { + // yes - parse the category (property name) + auto EndPos = Line.rfind(']'); + if (EndPos == std::string_view::npos) + throw sycl::exception(make_error_code(errc::invalid), + "Invalid line: " + std::string{Line}); + std::string_view Category = Line.substr(1, EndPos - 1); + CurPropSet = &(*Res)[Category]; + continue; + } + if (!CurPropSet) + throw sycl::exception(make_error_code(errc::invalid), + "Property category missing."); + + auto SplitSW = [](const std::string_view &View, char C) { + std::string_view Left = View.substr(0, View.find(C)); + if (Left.size() >= View.size() - 1) + return std::make_pair(Left, std::string_view{}); + std::string_view Right = View.substr(Left.size() + 1); + return std::make_pair(Left, Right); + }; + + // parse name and type+value + auto Parts = SplitSW(Line, '='); + + if (Parts.first.empty() || Parts.second.empty()) + throw sycl::exception(make_error_code(errc::invalid), + "Invalid property line: " + std::string{Line}); + auto TypeVal = SplitSW(Parts.second, '|'); + + if (TypeVal.first.empty() || TypeVal.second.empty()) + throw sycl::exception(make_error_code(errc::invalid), + "Invalid property value: " + + std::string{Parts.second}); + + // parse type + int Tint = stringViewToInt(TypeVal.first); + PropertyValue::Type Ttag = PropertyValue::getTypeTag(Tint); + std::string_view Val = TypeVal.second; + + PropertyValue Prop(Ttag); + + // parse value depending on its type + switch (Ttag) { + case PropertyValue::Type::UINT32: { + Prop.set(stringViewToInt(Val)); + break; + } + case PropertyValue::Type::BYTE_ARRAY: { + std::unique_ptr DecArr = Base64::decode(Val.data(), Val.size()); + Prop.set(DecArr.release()); + break; + } + default: + throw sycl::exception(make_error_code(errc::invalid), + "Unsupported property type: " + + std::to_string(Tint)); + } + (*CurPropSet)[std::string{Parts.first}] = std::move(Prop); + } + + return Res; + } + + MapTy::const_iterator begin() const { return PropSetMap.begin(); } + MapTy::const_iterator end() const { return PropSetMap.end(); } + + /// Retrieves a property set with given \p Name . + PropertySet &operator[](std::string_view Name) { + return PropSetMap[std::string{Name}]; + } + /// Constant access to the underlying map. + const MapTy &getPropSets() const { return PropSetMap; } + +private: + MapTy PropSetMap; +}; + +} // namespace detail +} // namespace _V1 +} // namespace sycl diff --git a/sycl/source/detail/syclbin.cpp b/sycl/source/detail/syclbin.cpp new file mode 100644 index 0000000000000..52c47ab24b4ba --- /dev/null +++ b/sycl/source/detail/syclbin.cpp @@ -0,0 +1,392 @@ +//==--------------------- syclbin.cpp - SYCLBIN parser ---------------------==// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +// Adjusted copy of llvm/lib/Object/SYCLBIN.cpp. +// TODO: Remove once we can consistently link the SYCL runtime library with +// LLVMObject. + +#include +#include + +namespace sycl { +inline namespace _V1 { +namespace detail { + +namespace { + +std::unique_ptr ContentCopy(const char *Data, size_t Size) { + std::unique_ptr Result{new char[Size]}; + std::memcpy(Result.get(), Data, Size); + return Result; +} + +// Offload binary header and entry. +constexpr uint8_t OffloadBinaryMagic[4] = {0x10, 0xFF, 0x10, 0xAD}; +struct OffloadBinaryHeaderType { + uint8_t Magic[4]; + uint32_t Version; + uint64_t Size; + uint64_t EntryOffset; + uint64_t EntrySize; +}; +struct OffloadBinaryEntryType { + uint16_t ImageKind; + uint16_t OffloadKind; + uint32_t Flags; + uint64_t StringOffset; + uint64_t NumStrings; + uint64_t ImageOffset; + uint64_t ImageSize; +}; + +class BlockReader { +protected: + BlockReader(const char *Data, size_t Size) : Data{Data}, Size{Size} {} + + void ReadSizeCheck(size_t ByteOffset, size_t ReadSize) { + if (ByteOffset + ReadSize > Size) + throw sycl::exception(make_error_code(errc::invalid), + "Unexpected file contents size."); + } + + const char *Data = nullptr; + size_t Size = 0; +}; + +class HeaderBlockReader : public BlockReader { +public: + HeaderBlockReader(const char *Data, size_t Size) : BlockReader(Data, Size) {} + + template const HeaderT *GetHeaderPtr(size_t ByteOffset) { + ReadSizeCheck(ByteOffset, sizeof(HeaderT)); + return reinterpret_cast(Data + ByteOffset); + } +}; + +class SYCLBINByteTableBlockReader : public BlockReader { +public: + SYCLBINByteTableBlockReader(const char *Data, size_t Size) + : BlockReader(Data, Size) {} + + std::string_view GetBinaryBlob(size_t ByteOffset, uint64_t BlobSize) { + ReadSizeCheck(ByteOffset, BlobSize); + return {Data + ByteOffset, BlobSize}; + } + + std::unique_ptr GetMetadata(size_t ByteOffset, + uint64_t MetadataSize) { + return PropertySetRegistry::read(GetBinaryBlob(ByteOffset, MetadataSize)); + } +}; + +std::pair getImageInOffloadBinary(const char *Data, + size_t Size) { + if (sizeof(OffloadBinaryHeaderType) > Size) + throw sycl::exception(make_error_code(errc::invalid), + "Invalid Offload Binary size."); + + // Read the header. + const OffloadBinaryHeaderType *Header = + reinterpret_cast(Data); + if (memcmp(Header->Magic, OffloadBinaryMagic, 4) != 0) + throw sycl::exception(make_error_code(errc::invalid), + "Incorrect Offload Binary magic number."); + + if (Header->Version != 1) + throw sycl::exception(make_error_code(errc::invalid), + "Unsupported Offload Binary version number."); + + if (Header->Version != 1) + throw sycl::exception(make_error_code(errc::invalid), + "Unsupported Offload Binary version number."); + + if (Header->EntrySize != sizeof(OffloadBinaryEntryType)) + throw sycl::exception(make_error_code(errc::invalid), + "Unexpected number of offload entries."); + + if (Header->EntryOffset + sizeof(OffloadBinaryEntryType) > Size) + throw sycl::exception(make_error_code(errc::invalid), + "Invalid entry offset."); + + // Read the table entry. + const OffloadBinaryEntryType *Entry = + reinterpret_cast(Data + + Header->EntryOffset); + + if (Entry->ImageKind != /*IMG_SYCLBIN*/ 6) + throw sycl::exception(make_error_code(errc::invalid), + "Unexpected image type."); + + if (Entry->ImageOffset + Entry->ImageSize > Size) + throw sycl::exception(make_error_code(errc::invalid), + "Invalid image offset and size."); + + return std::make_pair(Data + Entry->ImageOffset, Entry->ImageSize); +} + +} // namespace + +SYCLBIN::SYCLBIN(const char *Data, size_t Size) { + auto [SYCLBINData, SYCLBINSize] = getImageInOffloadBinary(Data, Size); + + if (SYCLBINSize < sizeof(FileHeaderType)) + throw sycl::exception(make_error_code(errc::invalid), + "Unexpected file contents size."); + + // Read the file header. + const FileHeaderType *FileHeader = + reinterpret_cast(SYCLBINData); + if (FileHeader->Magic != MagicNumber) + throw sycl::exception(make_error_code(errc::invalid), + "Incorrect SYCLBIN magic number."); + + if (FileHeader->Version > CurrentVersion) + throw sycl::exception(make_error_code(errc::invalid), + "Unsupported SYCLBIN version " + + std::to_string(FileHeader->Version) + "."); + Version = FileHeader->Version; + + const uint64_t AMHeaderBlockSize = + sizeof(AbstractModuleHeaderType) * FileHeader->AbstractModuleCount; + const uint64_t IRMHeaderBlockSize = + sizeof(IRModuleHeaderType) * FileHeader->IRModuleCount; + const uint64_t NDCIHeaderBlockSize = sizeof(NativeDeviceCodeImageHeaderType) * + FileHeader->NativeDeviceCodeImageCount; + const uint64_t HeaderBlockSize = sizeof(FileHeaderType) + AMHeaderBlockSize + + IRMHeaderBlockSize + NDCIHeaderBlockSize; + // Align metadata table size to 8. + const uint64_t AlignedMetadataByteTableSize = + FileHeader->MetadataByteTableSize + + (-FileHeader->MetadataByteTableSize & 7); + if (SYCLBINSize < HeaderBlockSize + AlignedMetadataByteTableSize + + FileHeader->BinaryByteTableSize) + throw sycl::exception(make_error_code(errc::invalid), + "Unexpected file contents size."); + + // Create reader objects. These help with checking out-of-bounds access. + HeaderBlockReader HeaderBlockReader{SYCLBINData, HeaderBlockSize}; + SYCLBINByteTableBlockReader MetadataByteTableBlockReader{ + SYCLBINData + HeaderBlockSize, FileHeader->MetadataByteTableSize}; + SYCLBINByteTableBlockReader BinaryByteTableBlockReader{ + SYCLBINData + HeaderBlockSize + AlignedMetadataByteTableSize, + FileHeader->BinaryByteTableSize}; + + // Read global metadata. + GlobalMetadata = MetadataByteTableBlockReader.GetMetadata( + FileHeader->GlobalMetadataOffset, FileHeader->GlobalMetadataSize); + + // Read the abstract modules. + AbstractModules.resize(FileHeader->AbstractModuleCount); + for (uint32_t I = 0; I < FileHeader->AbstractModuleCount; ++I) { + AbstractModule &AM = AbstractModules[I]; + + // Read the header for the current abstract module. + const uint64_t AMHeaderByteOffset = + sizeof(FileHeaderType) + sizeof(AbstractModuleHeaderType) * I; + const AbstractModuleHeaderType *AMHeader = + HeaderBlockReader.GetHeaderPtr( + AMHeaderByteOffset); + + // Read the metadata for the current abstract module. + AM.Metadata = MetadataByteTableBlockReader.GetMetadata( + AMHeader->MetadataOffset, AMHeader->MetadataSize); + + // Read the IR modules of the current abstract module. + AM.IRModules.resize(AMHeader->IRModuleCount); + for (uint32_t J = 0; J < AMHeader->IRModuleCount; ++J) { + IRModule &IRM = AM.IRModules[J]; + + // Read the header for the current IR module. + const uint64_t IRMHeaderByteOffset = + sizeof(FileHeaderType) + AMHeaderBlockSize + + sizeof(IRModuleHeaderType) * (AMHeader->IRModuleOffset + J); + const IRModuleHeaderType *IRMHeader = + HeaderBlockReader.GetHeaderPtr( + IRMHeaderByteOffset); + + // Read the metadata for the current IR module. + IRM.Metadata = MetadataByteTableBlockReader.GetMetadata( + IRMHeader->MetadataOffset, IRMHeader->MetadataSize); + + // Read the binary blob for the current IR module. + IRM.RawIRBytes = BinaryByteTableBlockReader.GetBinaryBlob( + IRMHeader->RawIRBytesOffset, IRMHeader->RawIRBytesSize); + } + + // Read the native device code images of the current abstract module. + AM.NativeDeviceCodeImages.resize(AMHeader->NativeDeviceCodeImageCount); + for (uint32_t J = 0; J < AMHeader->NativeDeviceCodeImageCount; ++J) { + NativeDeviceCodeImage &NDCI = AM.NativeDeviceCodeImages[J]; + + // Read the header for the current native device code image. + const uint64_t NDCIHeaderByteOffset = + sizeof(FileHeaderType) + AMHeaderBlockSize + IRMHeaderBlockSize + + sizeof(NativeDeviceCodeImageHeaderType) * + (AMHeader->NativeDeviceCodeImageOffset + J); + const NativeDeviceCodeImageHeaderType *NDCIHeader = + HeaderBlockReader.GetHeaderPtr( + NDCIHeaderByteOffset); + + // Read the metadata for the current native device code image. + NDCI.Metadata = MetadataByteTableBlockReader.GetMetadata( + NDCIHeader->MetadataOffset, NDCIHeader->MetadataSize); + + // Read the binary blob for the current native device code image. + NDCI.RawDeviceCodeImageBytes = BinaryByteTableBlockReader.GetBinaryBlob( + NDCIHeader->BinaryBytesOffset, NDCIHeader->BinaryBytesSize); + } + } +} + +SYCLBINBinaries::SYCLBINBinaries(const char *SYCLBINContent, size_t SYCLBINSize) + : SYCLBINContentCopy{ContentCopy(SYCLBINContent, SYCLBINSize)}, + SYCLBINContentCopySize{SYCLBINSize}, + ParsedSYCLBIN(SYCLBIN{SYCLBINContentCopy.get(), SYCLBINSize}) { + size_t NumJITBinaries = 0, NumNativeBinaries = 0; + for (const SYCLBIN::AbstractModule &AM : ParsedSYCLBIN.AbstractModules) { + NumJITBinaries += AM.IRModules.size(); + NumNativeBinaries += AM.NativeDeviceCodeImages.size(); + } + DeviceBinaries.reserve(NumJITBinaries + NumNativeBinaries); + JITDeviceBinaryImages.reserve(NumJITBinaries); + NativeDeviceBinaryImages.reserve(NumNativeBinaries); + + for (SYCLBIN::AbstractModule &AM : ParsedSYCLBIN.AbstractModules) { + // Construct properties from SYCLBIN metadata. + std::vector<_sycl_device_binary_property_set_struct> &BinPropertySets = + convertAbstractModuleProperties(AM); + + for (SYCLBIN::IRModule &IRM : AM.IRModules) { + sycl_device_binary_struct &DeviceBinary = DeviceBinaries.emplace_back(); + DeviceBinary.Version = SYCL_DEVICE_BINARY_VERSION; + DeviceBinary.Kind = 4; + DeviceBinary.Format = SYCL_DEVICE_BINARY_TYPE_SPIRV; // TODO: Determine. + DeviceBinary.DeviceTargetSpec = + __SYCL_DEVICE_BINARY_TARGET_SPIRV64; // TODO: Determine. + DeviceBinary.CompileOptions = nullptr; + DeviceBinary.LinkOptions = nullptr; + DeviceBinary.ManifestStart = nullptr; + DeviceBinary.ManifestEnd = nullptr; + DeviceBinary.BinaryStart = + reinterpret_cast(IRM.RawIRBytes.data()); + DeviceBinary.BinaryEnd = reinterpret_cast( + IRM.RawIRBytes.data() + IRM.RawIRBytes.size()); + DeviceBinary.EntriesBegin = nullptr; + DeviceBinary.EntriesEnd = nullptr; + DeviceBinary.PropertySetsBegin = BinPropertySets.data(); + DeviceBinary.PropertySetsEnd = + BinPropertySets.data() + BinPropertySets.size(); + // Create an image from it. + JITDeviceBinaryImages.emplace_back(&DeviceBinary); + } + + for (const SYCLBIN::NativeDeviceCodeImage &NDCI : + AM.NativeDeviceCodeImages) { + sycl_device_binary_struct &DeviceBinary = DeviceBinaries.emplace_back(); + DeviceBinary.Version = SYCL_DEVICE_BINARY_VERSION; + DeviceBinary.Kind = 4; + DeviceBinary.Format = SYCL_DEVICE_BINARY_TYPE_NATIVE; + DeviceBinary.DeviceTargetSpec = + __SYCL_DEVICE_BINARY_TARGET_UNKNOWN; // TODO: Determine. + DeviceBinary.CompileOptions = nullptr; + DeviceBinary.LinkOptions = nullptr; + DeviceBinary.ManifestStart = nullptr; + DeviceBinary.ManifestEnd = nullptr; + DeviceBinary.BinaryStart = reinterpret_cast( + NDCI.RawDeviceCodeImageBytes.data()); + DeviceBinary.BinaryEnd = reinterpret_cast( + NDCI.RawDeviceCodeImageBytes.data() + + NDCI.RawDeviceCodeImageBytes.size()); + DeviceBinary.EntriesBegin = nullptr; + DeviceBinary.EntriesEnd = nullptr; + DeviceBinary.PropertySetsBegin = BinPropertySets.data(); + DeviceBinary.PropertySetsEnd = + BinPropertySets.data() + BinPropertySets.size(); + // Create an image from it. + NativeDeviceBinaryImages.emplace_back(&DeviceBinary); + } + } +} + +std::vector<_sycl_device_binary_property_set_struct> & +SYCLBINBinaries::convertAbstractModuleProperties(SYCLBIN::AbstractModule &AM) { + std::vector<_sycl_device_binary_property_set_struct> &BinPropertySets = + BinaryPropertySets.emplace_back(); + BinPropertySets.reserve(AM.Metadata->getPropSets().size()); + for (auto &PropSetIt : *AM.Metadata) { + auto &PropSetName = PropSetIt.first; + auto &PropSetVal = PropSetIt.second; + + // Add a new vector to BinaryProperties and reserve room for all the + // properties we are converting. + std::vector<_sycl_device_binary_property_struct> &PropsList = + BinaryProperties.emplace_back(); + PropsList.reserve(PropSetVal.size()); + + // Then convert all properties in the property set. + for (auto &PropIt : PropSetVal) { + auto &PropName = PropIt.first; + auto &PropVal = PropIt.second; + _sycl_device_binary_property_struct &BinProp = PropsList.emplace_back(); + BinProp.Name = const_cast(PropName.data()); + BinProp.Type = PropVal.getType(); + if (BinProp.Type == SYCL_PROPERTY_TYPE_UINT32) { + // UINT32 properties have their value stored in the size instead. + BinProp.ValAddr = nullptr; + std::memcpy(&BinProp.ValSize, PropVal.data(), sizeof(uint32_t)); + } else { + BinProp.ValAddr = const_cast(PropVal.data()); + BinProp.ValSize = PropVal.size(); + } + } + + // Add a new property set to the list. + _sycl_device_binary_property_set_struct &BinPropSet = + BinPropertySets.emplace_back(); + BinPropSet.Name = const_cast(PropSetName.data()); + BinPropSet.PropertiesBegin = PropsList.data(); + BinPropSet.PropertiesEnd = PropsList.data() + PropsList.size(); + } + return BinPropertySets; +} + +std::vector +SYCLBINBinaries::getBestCompatibleImages(const device &Dev) { + auto SelectCompatibleImages = + [&](const std::vector &Imgs) { + std::vector CompatImgs; + for (const RTDeviceBinaryImage &Img : Imgs) + if (doesDevSupportDeviceRequirements(Dev, Img) && + doesImageTargetMatchDevice(Img, getSyclObjImpl(Dev).get())) + CompatImgs.push_back(&Img); + return CompatImgs; + }; + + // Try with native images first. + std::vector NativeImgs = + SelectCompatibleImages(NativeDeviceBinaryImages); + if (!NativeImgs.empty()) + return NativeImgs; + + // If there were no native images, pick JIT images. + return SelectCompatibleImages(JITDeviceBinaryImages); +} + +std::vector +SYCLBINBinaries::getBestCompatibleImages(const std::vector &Devs) { + std::set Images; + for (const device &Dev : Devs) { + std::vector BestImagesForDev = + getBestCompatibleImages(Dev); + Images.insert(BestImagesForDev.cbegin(), BestImagesForDev.cend()); + } + return {Images.cbegin(), Images.cend()}; +} + +} // namespace detail +} // namespace _V1 +} // namespace sycl diff --git a/sycl/source/detail/syclbin.hpp b/sycl/source/detail/syclbin.hpp new file mode 100644 index 0000000000000..312162372938b --- /dev/null +++ b/sycl/source/detail/syclbin.hpp @@ -0,0 +1,161 @@ +//==--------------------- syclbin.hpp - SYCLBIN parser ---------------------==// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +// Adjusted copy of llvm/include/llvm/Object/SYCLBIN.h. +// TODO: Remove once we can consistently link the SYCL runtime library with +// LLVMObject. + +#pragma once + +#include "detail/compiler.hpp" +#include "detail/device_binary_image.hpp" +#include "detail/property_set_io.hpp" +#include "sycl/exception.hpp" + +#include +#include +#include +#include +#include + +namespace sycl { +inline namespace _V1 { + +class device; + +namespace detail { + +// Representation of a SYCLBIN binary object. This is intended for use as an +// image inside a OffloadBinary. +// Adjusted from llvm/include/llvm/Object/SYCLBIN.h and can be removed if +// LLVMObject gets linked into the SYCL runtime library. +class SYCLBIN { +public: + SYCLBIN(const char *Data, size_t Size); + + SYCLBIN(const SYCLBIN &Other) = delete; + SYCLBIN(SYCLBIN &&Other) = default; + + SYCLBIN &operator=(const SYCLBIN &Other) = delete; + SYCLBIN &operator=(SYCLBIN &&Other) = default; + + /// The current version of the binary used for backwards compatibility. + static constexpr uint32_t CurrentVersion = 1; + + /// Magic number used to identify SYCLBIN files. + static constexpr uint32_t MagicNumber = 0x53594249; + + struct IRModule { + std::unique_ptr Metadata; + std::string_view RawIRBytes; + }; + struct NativeDeviceCodeImage { + std::unique_ptr Metadata; + std::string_view RawDeviceCodeImageBytes; + }; + + struct AbstractModule { + std::unique_ptr Metadata; + std::vector IRModules; + std::vector NativeDeviceCodeImages; + }; + + uint32_t Version; + std::unique_ptr GlobalMetadata; + std::vector AbstractModules; + +private: + struct alignas(8) FileHeaderType { + uint32_t Magic; + uint32_t Version; + uint32_t AbstractModuleCount; + uint32_t IRModuleCount; + uint32_t NativeDeviceCodeImageCount; + uint64_t MetadataByteTableSize; + uint64_t BinaryByteTableSize; + uint64_t GlobalMetadataOffset; + uint64_t GlobalMetadataSize; + }; + + struct alignas(8) AbstractModuleHeaderType { + uint64_t MetadataOffset; + uint64_t MetadataSize; + uint32_t IRModuleCount; + uint32_t IRModuleOffset; + uint32_t NativeDeviceCodeImageCount; + uint32_t NativeDeviceCodeImageOffset; + }; + + struct alignas(8) IRModuleHeaderType { + uint64_t MetadataOffset; + uint64_t MetadataSize; + uint64_t RawIRBytesOffset; + uint64_t RawIRBytesSize; + }; + + struct alignas(8) NativeDeviceCodeImageHeaderType { + uint64_t MetadataOffset; + uint64_t MetadataSize; + uint64_t BinaryBytesOffset; + uint64_t BinaryBytesSize; + }; +}; + +// Helper class for managing both a SYCLBIN and binaries created from it, +// allowing existing infrastructure to better understand the contents of the +// SYCLBINs. +struct SYCLBINBinaries { + // Delete copy-ctor to keep binaries unique and avoid costly copies of a + // heavy structure. + SYCLBINBinaries(const SYCLBINBinaries &) = delete; + SYCLBINBinaries &operator=(const SYCLBINBinaries &) = delete; + + SYCLBINBinaries(SYCLBINBinaries &&) = default; + SYCLBINBinaries &operator=(SYCLBINBinaries &&) = default; + + SYCLBINBinaries(const char *SYCLBINContent, size_t SYCLBINSize); + + std::vector + getBestCompatibleImages(const device &Dev); + std::vector + getBestCompatibleImages(const std::vector &Dev); + + uint8_t getState() const noexcept { + PropertySet &GlobalMetadata = + (*ParsedSYCLBIN + .GlobalMetadata)[PropertySetRegistry::SYCLBIN_GLOBAL_METADATA]; + return static_cast( + GlobalMetadata[PropertySet::key_type{"state"}].asUint32()); + } + +private: + std::vector<_sycl_offload_entry_struct> & + convertAbstractModuleEntries(const SYCLBIN::AbstractModule &AM); + + std::vector<_sycl_device_binary_property_set_struct> & + convertAbstractModuleProperties(SYCLBIN::AbstractModule &AM); + + std::unique_ptr SYCLBINContentCopy = nullptr; + size_t SYCLBINContentCopySize = 0; + + SYCLBIN ParsedSYCLBIN; + + // Buffers for holding entries in the binary structs alive. + std::vector> BinaryOffloadEntries; + std::vector> + BinaryProperties; + std::vector> + BinaryPropertySets; + + std::vector DeviceBinaries; + std::vector JITDeviceBinaryImages; + std::vector NativeDeviceBinaryImages; +}; + +} // namespace detail +} // namespace _V1 +} // namespace sycl diff --git a/sycl/source/kernel_bundle.cpp b/sycl/source/kernel_bundle.cpp index ab12ff67e8590..a0861190815ec 100644 --- a/sycl/source/kernel_bundle.cpp +++ b/sycl/source/kernel_bundle.cpp @@ -212,6 +212,12 @@ get_kernel_bundle_impl(const context &Ctx, const std::vector &Devs, State); } +detail::KernelBundleImplPtr +get_kernel_bundle_impl(const context &Ctx, const std::vector &Devs, + const sycl::span &Bytes, bundle_state State) { + return std::make_shared(Ctx, Devs, Bytes, State); +} + detail::KernelBundleImplPtr get_empty_interop_kernel_bundle_impl(const context &Ctx, const std::vector &Devs) { diff --git a/sycl/test-e2e/SYCLBIN/Inputs/basic.hpp b/sycl/test-e2e/SYCLBIN/Inputs/basic.hpp new file mode 100644 index 0000000000000..96d793ee3aadd --- /dev/null +++ b/sycl/test-e2e/SYCLBIN/Inputs/basic.hpp @@ -0,0 +1,47 @@ +#include "common.hpp" + +#include + +static constexpr size_t NUM = 1024; +static constexpr size_t WGSIZE = 16; +static constexpr float EPS = 0.001; + +int main(int argc, char *argv[]) { + assert(argc == 2); + + sycl::queue Q; + + int Failed = CommonLoadCheck(Q.get_context(), argv[1]); + +#if defined(SYCLBIN_INPUT_STATE) + auto KBInput = syclexp::get_kernel_bundle( + Q.get_context(), std::string{argv[1]}); + auto KBExe = sycl::build(KBInput); +#elif defined(SYCLBIN_OBJECT_STATE) + auto KBObj = syclexp::get_kernel_bundle( + Q.get_context(), std::string{argv[1]}); + auto KBExe = sycl::link(KBObj); +#else // defined(SYCLBIN_EXECUTABLE_STATE) + auto KBExe = syclexp::get_kernel_bundle( + Q.get_context(), std::string{argv[1]}); +#endif + + assert(KBExe.ext_oneapi_has_kernel("iota")); + sycl::kernel IotaKern = KBExe.ext_oneapi_get_kernel("iota"); + + float *Ptr = sycl::malloc_shared(NUM, Q); + Q.submit([&](sycl::handler &CGH) { + CGH.set_args(3.14f, Ptr); + CGH.parallel_for(sycl::nd_range{{NUM}, {WGSIZE}}, IotaKern); + }).wait_and_throw(); + + for (int I = 0; I < NUM; I++) { + const float Truth = 3.14f + static_cast(I); + if (std::abs(Ptr[I] - Truth) > EPS) { + std::cout << "Result: " << Ptr[I] << " expected " << I << "\n"; + ++Failed; + } + } + sycl::free(Ptr, Q); + return Failed; +} diff --git a/sycl/test-e2e/SYCLBIN/Inputs/basic_kernel.cpp b/sycl/test-e2e/SYCLBIN/Inputs/basic_kernel.cpp new file mode 100644 index 0000000000000..0f9a04ae3762c --- /dev/null +++ b/sycl/test-e2e/SYCLBIN/Inputs/basic_kernel.cpp @@ -0,0 +1,10 @@ +#include + +namespace syclexp = sycl::ext::oneapi::experimental; +namespace syclext = sycl::ext::oneapi; + +extern "C" SYCL_EXT_ONEAPI_FUNCTION_PROPERTY( + (syclexp::nd_range_kernel<1>)) void iota(float start, float *ptr) { + size_t id = syclext::this_work_item::get_nd_item<1>().get_global_linear_id(); + ptr[id] = start + static_cast(id); +} diff --git a/sycl/test-e2e/SYCLBIN/Inputs/common.hpp b/sycl/test-e2e/SYCLBIN/Inputs/common.hpp new file mode 100644 index 0000000000000..f302672a1c717 --- /dev/null +++ b/sycl/test-e2e/SYCLBIN/Inputs/common.hpp @@ -0,0 +1,54 @@ +#pragma once + +#include +#include +#include + +namespace syclexp = sycl::ext::oneapi::experimental; +namespace syclext = sycl::ext::oneapi; + +#if !defined(SYCLBIN_INPUT_STATE) && !defined(SYCLBIN_OBJECT_STATE) && \ + !defined(SYCLBIN_EXECUTABLE_STATE) +#error "SYCLBIN state define missing!" +#endif + +template +constexpr std::string_view GetStateName() { + if constexpr (InvalidState == sycl::bundle_state::input) + return "input"; + else if constexpr (InvalidState == sycl::bundle_state::object) + return "object"; + else + return "executable"; +} + +template +int ExpectExceptionInvalidState(const sycl::context &Ctx, const char *File) { + try { + syclexp::get_kernel_bundle(Ctx, std::string{File}); + std::cout << "Unexpectedly created a kernel bundle for invalid state: " + << GetStateName() << std::endl; + return 1; + } catch (sycl::exception &) { + } + return 0; +} + +// SYCLBIN is only directly loadable in the state they were produced in, so +// we run checks to ensure other states will complain. +int CommonLoadCheck(const sycl::context &Ctx, const char *File) { + int Failed = 0; + +#ifndef SYCLBIN_INPUT_STATE + Failed += ExpectExceptionInvalidState(Ctx, File); +#endif +#ifndef SYCLBIN_OBJECT_STATE + Failed += ExpectExceptionInvalidState(Ctx, File); +#endif +#ifndef SYCLBIN_EXECUTABLE_STATE + Failed += + ExpectExceptionInvalidState(Ctx, File); +#endif + + return Failed; +} diff --git a/sycl/test-e2e/SYCLBIN/Inputs/exporting_function.cpp b/sycl/test-e2e/SYCLBIN/Inputs/exporting_function.cpp new file mode 100644 index 0000000000000..708444cac10e9 --- /dev/null +++ b/sycl/test-e2e/SYCLBIN/Inputs/exporting_function.cpp @@ -0,0 +1,6 @@ +#include + +SYCL_EXTERNAL void TestFunc(int *Ptr, int Size) { + for (size_t I = 0; I < Size; ++I) + Ptr[I] = I; +} diff --git a/sycl/test-e2e/SYCLBIN/Inputs/importing_kernel.cpp b/sycl/test-e2e/SYCLBIN/Inputs/importing_kernel.cpp new file mode 100644 index 0000000000000..bb113590b15ee --- /dev/null +++ b/sycl/test-e2e/SYCLBIN/Inputs/importing_kernel.cpp @@ -0,0 +1,11 @@ + +#include + +SYCL_EXTERNAL void TestFunc(int *Ptr, int Size); + +// use extern "C" to avoid name mangling +extern "C" SYCL_EXTERNAL SYCL_EXT_ONEAPI_FUNCTION_PROPERTY( + (sycl::ext::oneapi::experimental:: + single_task_kernel)) void TestKernel1(int *Ptr, int Size) { + TestFunc(Ptr, Size); +} diff --git a/sycl/test-e2e/SYCLBIN/Inputs/link.hpp b/sycl/test-e2e/SYCLBIN/Inputs/link.hpp new file mode 100644 index 0000000000000..ce14379c4e19a --- /dev/null +++ b/sycl/test-e2e/SYCLBIN/Inputs/link.hpp @@ -0,0 +1,60 @@ +#include "common.hpp" + +#include + +namespace syclex = sycl::ext::oneapi::experimental; + +static constexpr size_t NUM = 10; + +int main(int argc, char *argv[]) { + assert(argc == 3); + + sycl::queue Q; + + int Failed = CommonLoadCheck(Q.get_context(), argv[1]) + + CommonLoadCheck(Q.get_context(), argv[2]); + + // Load SYCLBINs. +#if defined(SYCLBIN_INPUT_STATE) + auto KBInput1 = syclex::get_kernel_bundle( + Q.get_context(), {Q.get_device()}, std::string{argv[1]}); + auto KBInput2 = syclex::get_kernel_bundle( + Q.get_context(), {Q.get_device()}, std::string{argv[2]}); + + // Compile the bundles. + auto KBObj1 = sycl::compile(KBInput1); + auto KBObj2 = sycl::compile(KBInput2); +#elif defined(SYCLBIN_OBJECT_STATE) + auto KBObj1 = syclex::get_kernel_bundle( + Q.get_context(), {Q.get_device()}, std::string{argv[1]}); + auto KBObj2 = syclex::get_kernel_bundle( + Q.get_context(), {Q.get_device()}, std::string{argv[2]}); +#else // defined(SYCLBIN_EXECUTABLE_STATE) +#error "Test does not work with executable state." +#endif + + // Link the bundles. + auto KBExe = sycl::link({KBObj1, KBObj2}); + + // TestKernel1 does not have any requirements, so should be there always. + assert(KBExe.ext_oneapi_has_kernel("TestKernel1")); + sycl::kernel TestKernel1 = KBExe.ext_oneapi_get_kernel("TestKernel1"); + + int *Ptr = sycl::malloc_shared(NUM, Q); + Q.fill(Ptr, int{0}, NUM).wait_and_throw(); + + Q.submit([&](sycl::handler &CGH) { + CGH.set_args(Ptr, int{NUM}); + CGH.single_task(TestKernel1); + }).wait_and_throw(); + + for (int I = 0; I < NUM; I++) { + if (Ptr[I] != I) { + std::cout << "Result: " << Ptr[I] << " expected " << I << "\n"; + ++Failed; + } + } + + sycl::free(Ptr, Q); + return Failed; +} diff --git a/sycl/test-e2e/SYCLBIN/Inputs/link_rtc.hpp b/sycl/test-e2e/SYCLBIN/Inputs/link_rtc.hpp new file mode 100644 index 0000000000000..f8f41d5ad25ed --- /dev/null +++ b/sycl/test-e2e/SYCLBIN/Inputs/link_rtc.hpp @@ -0,0 +1,79 @@ +#include "common.hpp" + +#include + +// TODO: remove SYCL_EXTERNAL from the kernel once it is no longer needed. +auto constexpr SYCLSource = R"===( +#include + +SYCL_EXTERNAL void TestFunc(int *Ptr, int Size); + +// use extern "C" to avoid name mangling +extern "C" SYCL_EXTERNAL SYCL_EXT_ONEAPI_FUNCTION_PROPERTY( + (sycl::ext::oneapi::experimental::single_task_kernel)) +void TestKernel1(int *Ptr, int Size) { + TestFunc(Ptr, Size); +} + +)==="; + +static constexpr size_t NUM = 10; + +int main(int argc, char *argv[]) { + assert(argc == 2); + + sycl::queue Q; + + if (!Q.get_device().ext_oneapi_can_compile(syclexp::source_language::sycl)) { + std::cout << "Device does not support one of the source languages: " + << Q.get_device().get_info() + << std::endl; + return 0; + } + + int Failed = CommonLoadCheck(Q.get_context(), argv[1]); + + // Load SYCLBIN and compile it. +#if defined(SYCLBIN_INPUT_STATE) + auto KBInput = syclexp::get_kernel_bundle( + Q.get_context(), {Q.get_device()}, std::string{argv[1]}); + auto KBSYCLBINObj = sycl::compile(KBInput); +#elif defined(SYCLBIN_OBJECT_STATE) + auto KBSYCLBINObj = syclexp::get_kernel_bundle( + Q.get_context(), std::string{argv[1]}); +#else // defined(SYCLBIN_EXECUTABLE_STATE) +#error "Test does not work with executable state." +#endif + + // Compile source kernel bundle. + auto KBSrc = syclexp::create_kernel_bundle_from_source( + Q.get_context(), syclexp::source_language::sycl, SYCLSource); + syclexp::properties BuildOpts{ + syclexp::build_options{"-fsycl-allow-device-image-dependencies"}}; + auto KBSrcObj = syclexp::compile(KBSrc, BuildOpts); + + // Link the bundles. + auto KBExe = sycl::link({KBSYCLBINObj, KBSrcObj}); + + // TestKernel1 does not have any requirements, so should be there always. + assert(KBExe.ext_oneapi_has_kernel("TestKernel1")); + sycl::kernel TestKernel1 = KBExe.ext_oneapi_get_kernel("TestKernel1"); + + int *Ptr = sycl::malloc_shared(NUM, Q); + Q.fill(Ptr, int{0}, NUM).wait_and_throw(); + + Q.submit([&](sycl::handler &CGH) { + CGH.set_args(Ptr, int{NUM}); + CGH.single_task(TestKernel1); + }).wait_and_throw(); + + for (int I = 0; I < NUM; I++) { + if (Ptr[I] != I) { + std::cout << "Result: " << Ptr[I] << " expected " << I << "\n"; + ++Failed; + } + } + + sycl::free(Ptr, Q); + return Failed; +} diff --git a/sycl/test-e2e/SYCLBIN/Inputs/lit.cfg.py b/sycl/test-e2e/SYCLBIN/Inputs/lit.cfg.py new file mode 100644 index 0000000000000..41588acc03e40 --- /dev/null +++ b/sycl/test-e2e/SYCLBIN/Inputs/lit.cfg.py @@ -0,0 +1,3 @@ +import lit + +config.suffixes = [] # Skip all files in this folder. diff --git a/sycl/test-e2e/SYCLBIN/Inputs/optional_kernel_features.cpp b/sycl/test-e2e/SYCLBIN/Inputs/optional_kernel_features.cpp new file mode 100644 index 0000000000000..15b20a48a8c1b --- /dev/null +++ b/sycl/test-e2e/SYCLBIN/Inputs/optional_kernel_features.cpp @@ -0,0 +1,15 @@ +#include + +extern "C" SYCL_EXT_ONEAPI_FUNCTION_PROPERTY( + (sycl::ext::oneapi::experimental:: + single_task_kernel)) void TestKernel1(int *Ptr, int Size) { + for (size_t I = 0; I < Size; ++I) + Ptr[I] = I; +} + +extern "C" SYCL_EXT_ONEAPI_FUNCTION_PROPERTY( + (sycl::ext::oneapi::experimental:: + single_task_kernel)) void TestKernel2(int *Ptr, int Size) { + for (size_t I = 0; I < Size; ++I) + Ptr[I] = static_cast(static_cast(I) / 2.0); +} diff --git a/sycl/test-e2e/SYCLBIN/Inputs/optional_kernel_features.hpp b/sycl/test-e2e/SYCLBIN/Inputs/optional_kernel_features.hpp new file mode 100644 index 0000000000000..e05cdd66e1a7f --- /dev/null +++ b/sycl/test-e2e/SYCLBIN/Inputs/optional_kernel_features.hpp @@ -0,0 +1,68 @@ +#include "common.hpp" + +#include + +static constexpr size_t NUM = 10; + +int main(int argc, char *argv[]) { + assert(argc == 2); + + sycl::queue Q; + + int Failed = CommonLoadCheck(Q.get_context(), argv[1]); + +#if defined(SYCLBIN_INPUT_STATE) + auto KBInput = syclexp::get_kernel_bundle( + Q.get_context(), {Q.get_device()}, std::string{argv[1]}); + auto KBExe = sycl::build(KBInput); +#elif defined(SYCLBIN_OBJECT_STATE) + auto KBObj = syclexp::get_kernel_bundle( + Q.get_context(), {Q.get_device()}, std::string{argv[1]}); + auto KBExe = sycl::link(KBObj); +#else // defined(SYCLBIN_EXECUTABLE_STATE) + auto KBExe = syclexp::get_kernel_bundle( + Q.get_context(), {Q.get_device()}, std::string{argv[1]}); +#endif + + // TestKernel1 does not have any requirements, so should be there always. + assert(KBExe.ext_oneapi_has_kernel("TestKernel1")); + sycl::kernel TestKernel1 = KBExe.ext_oneapi_get_kernel("TestKernel1"); + + int *Ptr = sycl::malloc_shared(NUM, Q); + Q.fill(Ptr, int{0}, NUM).wait_and_throw(); + + Q.submit([&](sycl::handler &CGH) { + CGH.set_args(Ptr, int{NUM}); + CGH.single_task(TestKernel1); + }).wait_and_throw(); + + for (int I = 0; I < NUM; I++) { + if (Ptr[I] != I) { + std::cout << "Result: " << Ptr[I] << " expected " << I << "\n"; + ++Failed; + } + } + + // TestKernel2 should only be there if the device supports fp64. + if (Q.get_device().has(sycl::aspect::fp64)) { + assert(KBExe.ext_oneapi_has_kernel("TestKernel2")); + sycl::kernel TestKernel2 = KBExe.ext_oneapi_get_kernel("TestKernel2"); + + Q.submit([&](sycl::handler &CGH) { + CGH.set_args(Ptr, int{NUM}); + CGH.single_task(TestKernel2); + }).wait_and_throw(); + + for (int I = 0; I < NUM; I++) { + if (Ptr[I] != static_cast(static_cast(I) / 2.0)) { + std::cout << "Result: " << Ptr[I] << " expected " << I << "\n"; + ++Failed; + } + } + } else { + assert(!KBExe.ext_oneapi_has_kernel("TestKernel2")); + } + + sycl::free(Ptr, Q); + return Failed; +} diff --git a/sycl/test-e2e/SYCLBIN/basic_executable.cpp b/sycl/test-e2e/SYCLBIN/basic_executable.cpp new file mode 100644 index 0000000000000..cfdb3ebf33039 --- /dev/null +++ b/sycl/test-e2e/SYCLBIN/basic_executable.cpp @@ -0,0 +1,25 @@ +//==--------- basic_executable.cpp --- SYCLBIN extension tests -------------==// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +// REQUIRES: aspect-usm_device_allocations + +// HIP and CUDA cannot answer kernel name queries on the binaries, so kernel +// names cannot be resolved for now. +// XFAIL: cuda || hip +// XFAIL-TRACKER: CMPLRLLVM-68469 + +// -- Basic test for compiling and loading a SYCLBIN kernel_bundle in executable +// -- state. + +// RUN: %clangxx --offload-new-driver -fsyclbin=executable %S/Inputs/basic_kernel.cpp -o %t.syclbin +// RUN: %{build} -o %t.out +// RUN: %{l0_leak_check} %{run} %t.out %t.syclbin + +#define SYCLBIN_EXECUTABLE_STATE + +#include "Inputs/basic.hpp" diff --git a/sycl/test-e2e/SYCLBIN/basic_input.cpp b/sycl/test-e2e/SYCLBIN/basic_input.cpp new file mode 100644 index 0000000000000..892b4096b2f83 --- /dev/null +++ b/sycl/test-e2e/SYCLBIN/basic_input.cpp @@ -0,0 +1,25 @@ +//==--------- basic_input.cpp --- SYCLBIN extension tests ------------------==// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +// REQUIRES: aspect-usm_device_allocations + +// HIP and CUDA cannot answer kernel name queries on the binaries, so kernel +// names cannot be resolved for now. +// XFAIL: cuda || hip +// XFAIL-TRACKER: CMPLRLLVM-68469 + +// -- Basic test for compiling and loading a SYCLBIN kernel_bundle in input +// -- state. + +// RUN: %clangxx --offload-new-driver -fsyclbin=input %S/Inputs/basic_kernel.cpp -o %t.syclbin +// RUN: %{build} -o %t.out +// RUN: %{l0_leak_check} %{run} %t.out %t.syclbin + +#define SYCLBIN_INPUT_STATE + +#include "Inputs/basic.hpp" diff --git a/sycl/test-e2e/SYCLBIN/basic_object.cpp b/sycl/test-e2e/SYCLBIN/basic_object.cpp new file mode 100644 index 0000000000000..91a04fc3eac55 --- /dev/null +++ b/sycl/test-e2e/SYCLBIN/basic_object.cpp @@ -0,0 +1,25 @@ +//==--------- basic_object.cpp --- SYCLBIN extension tests -----------------==// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +// REQUIRES: aspect-usm_device_allocations + +// HIP and CUDA cannot answer kernel name queries on the binaries, so kernel +// names cannot be resolved for now. +// XFAIL: cuda || hip +// XFAIL-TRACKER: CMPLRLLVM-68469 + +// -- Basic test for compiling and loading a SYCLBIN kernel_bundle in object +// -- state. + +// RUN: %clangxx --offload-new-driver -fsyclbin=object %S/Inputs/basic_kernel.cpp -o %t.syclbin +// RUN: %{build} -o %t.out +// RUN: %{l0_leak_check} %{run} %t.out %t.syclbin + +#define SYCLBIN_OBJECT_STATE + +#include "Inputs/basic.hpp" diff --git a/sycl/test-e2e/SYCLBIN/link_input.cpp b/sycl/test-e2e/SYCLBIN/link_input.cpp new file mode 100644 index 0000000000000..928d05e08c69b --- /dev/null +++ b/sycl/test-e2e/SYCLBIN/link_input.cpp @@ -0,0 +1,25 @@ +//==-------------- link_input.cpp --- SYCLBIN extension tests --------------==// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +// REQUIRES: aspect-usm_shared_allocations + +// HIP and CUDA cannot answer kernel name queries on the binaries, so kernel +// names cannot be resolved for now. +// XFAIL: cuda || hip +// XFAIL-TRACKER: CMPLRLLVM-68469 + +// -- Test for linking two SYCLBIN kernel_bundle. + +// RUN: %clangxx --offload-new-driver -fsyclbin=input -fsycl-allow-device-image-dependencies %S/Inputs/exporting_function.cpp -o %t.export.syclbin +// RUN: %clangxx --offload-new-driver -fsyclbin=input -fsycl-allow-device-image-dependencies %S/Inputs/importing_kernel.cpp -o %t.import.syclbin +// RUN: %{build} -o %t.out +// RUN: %{l0_leak_check} %{run} %t.out %t.export.syclbin %t.import.syclbin + +#define SYCLBIN_INPUT_STATE + +#include "Inputs/link.hpp" diff --git a/sycl/test-e2e/SYCLBIN/link_object.cpp b/sycl/test-e2e/SYCLBIN/link_object.cpp new file mode 100644 index 0000000000000..db726bf62e104 --- /dev/null +++ b/sycl/test-e2e/SYCLBIN/link_object.cpp @@ -0,0 +1,25 @@ +//==-------------- link_input.cpp --- SYCLBIN extension tests --------------==// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +// REQUIRES: aspect-usm_shared_allocations + +// HIP and CUDA cannot answer kernel name queries on the binaries, so kernel +// names cannot be resolved for now. +// XFAIL: cuda || hip +// XFAIL-TRACKER: CMPLRLLVM-68469 + +// -- Test for linking two SYCLBIN kernel_bundle. + +// RUN: %clangxx --offload-new-driver -fsyclbin=object -fsycl-allow-device-image-dependencies %S/Inputs/exporting_function.cpp -o %t.export.syclbin +// RUN: %clangxx --offload-new-driver -fsyclbin=object -fsycl-allow-device-image-dependencies %S/Inputs/importing_kernel.cpp -o %t.import.syclbin +// RUN: %{build} -o %t.out +// RUN: %{l0_leak_check} %{run} %t.out %t.export.syclbin %t.import.syclbin + +#define SYCLBIN_OBJECT_STATE + +#include "Inputs/link.hpp" diff --git a/sycl/test-e2e/SYCLBIN/link_rtc_input.cpp b/sycl/test-e2e/SYCLBIN/link_rtc_input.cpp new file mode 100644 index 0000000000000..dcf84def5bcf5 --- /dev/null +++ b/sycl/test-e2e/SYCLBIN/link_rtc_input.cpp @@ -0,0 +1,24 @@ +//==------------ link_rtc_input.cpp --- SYCLBIN extension tests ------------==// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +// REQUIRES: (opencl || level_zero) +// REQUIRES: aspect-usm_shared_allocations + +// UNSUPPORTED: accelerator +// UNSUPPORTED-INTENDED: while accelerator is AoT only, this cannot run there. + +// -- Test for linking where one kernel is runtime-compiled and one is compiled +// -- to SYCLBIN. + +// RUN: %clangxx --offload-new-driver -fsyclbin=input -fsycl-allow-device-image-dependencies %S/Inputs/exporting_function.cpp -o %t.syclbin +// RUN: %{build} -o %t.out +// RUN: %{l0_leak_check} %{run} %t.out %t.syclbin + +#define SYCLBIN_INPUT_STATE + +#include "Inputs/link_rtc.hpp" diff --git a/sycl/test-e2e/SYCLBIN/link_rtc_object.cpp b/sycl/test-e2e/SYCLBIN/link_rtc_object.cpp new file mode 100644 index 0000000000000..3b0a073f4537f --- /dev/null +++ b/sycl/test-e2e/SYCLBIN/link_rtc_object.cpp @@ -0,0 +1,24 @@ +//==------------ link_rtc_object.cpp --- SYCLBIN extension tests -----------==// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +// REQUIRES: (opencl || level_zero) +// REQUIRES: aspect-usm_shared_allocations + +// UNSUPPORTED: accelerator +// UNSUPPORTED-INTENDED: while accelerator is AoT only, this cannot run there. + +// -- Test for linking where one kernel is runtime-compiled and one is compiled +// -- to SYCLBIN. + +// RUN: %clangxx --offload-new-driver -fsyclbin=object -fsycl-allow-device-image-dependencies %S/Inputs/exporting_function.cpp -o %t.syclbin +// RUN: %{build} -o %t.out +// RUN: %{l0_leak_check} %{run} %t.out %t.syclbin + +#define SYCLBIN_OBJECT_STATE + +#include "Inputs/link_rtc.hpp" diff --git a/sycl/test-e2e/SYCLBIN/optional_kernel_features_executable.cpp b/sycl/test-e2e/SYCLBIN/optional_kernel_features_executable.cpp new file mode 100644 index 0000000000000..b36d5957b36c9 --- /dev/null +++ b/sycl/test-e2e/SYCLBIN/optional_kernel_features_executable.cpp @@ -0,0 +1,25 @@ +//==- optional_kernel_features_executable.cpp --- SYCLBIN extension tests --==// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +// REQUIRES: aspect-usm_device_allocations + +// HIP and CUDA cannot answer kernel name queries on the binaries, so kernel +// names cannot be resolved for now. +// XFAIL: cuda || hip +// XFAIL-TRACKER: CMPLRLLVM-68469 + +// -- Test for compiling and loading a kernel bundle with a SYCLBIN containing +// the use of optional kernel features. + +// RUN: %clangxx --offload-new-driver -fsyclbin=executable %S/Inputs/optional_kernel_features.cpp -o %t.syclbin +// RUN: %{build} -o %t.out +// RUN: %{l0_leak_check} %{run} %t.out %t.syclbin + +#define SYCLBIN_EXECUTABLE_STATE + +#include "Inputs/optional_kernel_features.hpp" diff --git a/sycl/test-e2e/SYCLBIN/optional_kernel_features_input.cpp b/sycl/test-e2e/SYCLBIN/optional_kernel_features_input.cpp new file mode 100644 index 0000000000000..d0df88a94bbe9 --- /dev/null +++ b/sycl/test-e2e/SYCLBIN/optional_kernel_features_input.cpp @@ -0,0 +1,25 @@ +//==--- optional_kernel_features_input.cpp --- SYCLBIN extension tests -----==// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +// REQUIRES: aspect-usm_device_allocations + +// HIP and CUDA cannot answer kernel name queries on the binaries, so kernel +// names cannot be resolved for now. +// XFAIL: cuda || hip +// XFAIL-TRACKER: CMPLRLLVM-68469 + +// -- Test for compiling and loading a kernel bundle with a SYCLBIN containing +// the use of optional kernel features. + +// RUN: %clangxx --offload-new-driver -fsyclbin=input %S/Inputs/optional_kernel_features.cpp -o %t.syclbin +// RUN: %{build} -o %t.out +// RUN: %{l0_leak_check} %{run} %t.out %t.syclbin + +#define SYCLBIN_INPUT_STATE + +#include "Inputs/optional_kernel_features.hpp" diff --git a/sycl/test-e2e/SYCLBIN/optional_kernel_features_object.cpp b/sycl/test-e2e/SYCLBIN/optional_kernel_features_object.cpp new file mode 100644 index 0000000000000..43ae03b254c42 --- /dev/null +++ b/sycl/test-e2e/SYCLBIN/optional_kernel_features_object.cpp @@ -0,0 +1,26 @@ +//==--- optional_kernel_features_object.cpp --- SYCLBIN extension tests +//-----==// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +// REQUIRES: aspect-usm_device_allocations + +// HIP and CUDA cannot answer kernel name queries on the binaries, so kernel +// names cannot be resolved for now. +// XFAIL: cuda || hip +// XFAIL-TRACKER: CMPLRLLVM-68469 + +// -- Test for compiling and loading a kernel bundle with a SYCLBIN containing +// the use of optional kernel features. + +// RUN: %clangxx --offload-new-driver -fsyclbin=object %S/Inputs/optional_kernel_features.cpp -o %t.syclbin +// RUN: %{build} -o %t.out +// RUN: %{l0_leak_check} %{run} %t.out %t.syclbin + +#define SYCLBIN_OBJECT_STATE + +#include "Inputs/optional_kernel_features.hpp" diff --git a/sycl/test/abi/sycl_symbols_linux.dump b/sycl/test/abi/sycl_symbols_linux.dump index d64837ee4e1ec..bb05e3c1d36f5 100644 --- a/sycl/test/abi/sycl_symbols_linux.dump +++ b/sycl/test/abi/sycl_symbols_linux.dump @@ -3324,6 +3324,7 @@ _ZN4sycl3_V16detail21LocalAccessorBaseHostC2ENS0_5rangeILi3EEEiiRKNS0_13property _ZN4sycl3_V16detail22addHostAccessorAndWaitEPNS1_16AccessorImplHostE _ZN4sycl3_V16detail22get_kernel_bundle_implERKNS0_7contextERKSt6vectorINS0_6deviceESaIS6_EENS0_12bundle_stateE _ZN4sycl3_V16detail22get_kernel_bundle_implERKNS0_7contextERKSt6vectorINS0_6deviceESaIS6_EENS0_12bundle_stateERKSt8functionIFbRKSt10shared_ptrINS1_17device_image_implEEEE +_ZN4sycl3_V16detail22get_kernel_bundle_implERKNS0_7contextERKSt6vectorINS0_6deviceESaIS6_EERKNS0_4spanIcLm18446744073709551615EEENS0_12bundle_stateE _ZN4sycl3_V16detail22get_kernel_bundle_implERKNS0_7contextERKSt6vectorINS0_6deviceESaIS6_EERKS5_INS0_9kernel_idESaISB_EENS0_12bundle_stateE _ZN4sycl3_V16detail22has_kernel_bundle_implERKNS0_7contextERKSt6vectorINS0_6deviceESaIS6_EENS0_12bundle_stateE _ZN4sycl3_V16detail22has_kernel_bundle_implERKNS0_7contextERKSt6vectorINS0_6deviceESaIS6_EERKS5_INS0_9kernel_idESaISB_EENS0_12bundle_stateE diff --git a/sycl/test/abi/sycl_symbols_windows.dump b/sycl/test/abi/sycl_symbols_windows.dump index f675fd68af594..db941c5343b6f 100644 --- a/sycl/test/abi/sycl_symbols_windows.dump +++ b/sycl/test/abi/sycl_symbols_windows.dump @@ -4167,6 +4167,7 @@ ?get_kernel@kernel_bundle_plain@detail@_V1@sycl@@IEBA?AVkernel@34@AEBVkernel_id@34@@Z ?get_kernel_bundle@kernel@_V1@sycl@@QEBA?AV?$kernel_bundle@$01@23@XZ ?get_kernel_bundle_impl@detail@_V1@sycl@@YA?AV?$shared_ptr@Vkernel_bundle_impl@detail@_V1@sycl@@@std@@AEBVcontext@23@AEBV?$vector@Vdevice@_V1@sycl@@V?$allocator@Vdevice@_V1@sycl@@@std@@@5@AEBV?$vector@Vkernel_id@_V1@sycl@@V?$allocator@Vkernel_id@_V1@sycl@@@std@@@5@W4bundle_state@23@@Z +?get_kernel_bundle_impl@detail@_V1@sycl@@YA?AV?$shared_ptr@Vkernel_bundle_impl@detail@_V1@sycl@@@std@@AEBVcontext@23@AEBV?$vector@Vdevice@_V1@sycl@@V?$allocator@Vdevice@_V1@sycl@@@std@@@5@AEBV?$span@D$0?0@23@W4bundle_state@23@@Z ?get_kernel_bundle_impl@detail@_V1@sycl@@YA?AV?$shared_ptr@Vkernel_bundle_impl@detail@_V1@sycl@@@std@@AEBVcontext@23@AEBV?$vector@Vdevice@_V1@sycl@@V?$allocator@Vdevice@_V1@sycl@@@std@@@5@W4bundle_state@23@@Z ?get_kernel_bundle_impl@detail@_V1@sycl@@YA?AV?$shared_ptr@Vkernel_bundle_impl@detail@_V1@sycl@@@std@@AEBVcontext@23@AEBV?$vector@Vdevice@_V1@sycl@@V?$allocator@Vdevice@_V1@sycl@@@std@@@5@W4bundle_state@23@AEBV?$function@$$A6A_NAEBV?$shared_ptr@Vdevice_image_impl@detail@_V1@sycl@@@std@@@Z@5@@Z ?get_kernel_id_impl@detail@_V1@sycl@@YA?AVkernel_id@23@Vstring_view@123@@Z diff --git a/sycl/test/e2e_test_requirements/no_sycl_hpp_in_e2e_tests.cpp b/sycl/test/e2e_test_requirements/no_sycl_hpp_in_e2e_tests.cpp index c913f0d6bcaa6..de2b939756ea0 100644 --- a/sycl/test/e2e_test_requirements/no_sycl_hpp_in_e2e_tests.cpp +++ b/sycl/test/e2e_test_requirements/no_sycl_hpp_in_e2e_tests.cpp @@ -6,7 +6,7 @@ // CHECK-DAG: README.md // CHECK-DAG: lit.cfg.py // -// CHECK-NUM-MATCHES: 20 +// CHECK-NUM-MATCHES: 25 // // This test verifies that `` isn't used in E2E tests. Instead, // fine-grained includes should used, see