Skip to content

[SYCL] Implement loading SYCLBIN into kernel_bundle #18949

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Open
wants to merge 16 commits into
base: sycl
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Original file line number Diff line number Diff line change
@@ -0,0 +1,83 @@
//==---- syclbin_kernel_bundle.hpp - SYCLBIN-based kernel_bundle tooling ---==//
//
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
// See https://llvm.org/LICENSE.txt for license information.
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//

#pragma once

#include <sycl/ext/oneapi/properties/properties.hpp>
#include <sycl/kernel_bundle.hpp>

#include <fstream>
#include <string>

#if __has_include(<filesystem>)
#include <filesystem>
#endif
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Don't these need an #else ? What will happen if the compiler doesn't have filesystem or span? IIRC, we are still trying to support GCC 7.5 whose support of C++17 filesystem feature is not complete. Take a look at what is done in os_util.cpp, where the fallback includes experimental/filesystem.

Also, std::span is C++20, iirc. But we have sycl::span , so maybe use that?

OR if I'm operating on yesterdays information and this is no longer a problem, let me know.


#if __has_include(<span>)
#include <span>
#endif

namespace sycl {
inline namespace _V1 {
namespace ext::oneapi::experimental {

template <bundle_state State, typename PropertyListT = empty_properties_t>
std::enable_if_t<State != bundle_state::ext_oneapi_source, kernel_bundle<State>>
get_kernel_bundle(const context &Ctxt, const std::vector<device> &Devs,
const sycl::span<char> &Bytes, PropertyListT = {}) {
std::vector<device> UniqueDevices =
sycl::detail::removeDuplicateDevices(Devs);

sycl::detail::KernelBundleImplPtr Impl =
sycl::detail::get_kernel_bundle_impl(Ctxt, UniqueDevices, Bytes, State);
return sycl::detail::createSyclObjFromImpl<kernel_bundle<State>>(Impl);
}

#if __cpp_lib_span
template <bundle_state State, typename PropertyListT = empty_properties_t>
std::enable_if_t<State != bundle_state::ext_oneapi_source, kernel_bundle<State>>
get_kernel_bundle(const context &Ctxt, const std::vector<device> &Devs,
const std::span<char> &Bytes, PropertyListT Props = {}) {
return experimental::get_kernel_bundle(
Ctxt, Devs, sycl::span<char>(Bytes.data(), Bytes.size()), Props);
}
#endif

#if __cpp_lib_filesystem
template <bundle_state State, typename PropertyListT = empty_properties_t>
std::enable_if_t<State != bundle_state::ext_oneapi_source, kernel_bundle<State>>
get_kernel_bundle(const context &Ctxt, const std::vector<device> &Devs,
const std::filesystem::path &Filename,
PropertyListT Props = {}) {
std::vector<char> RawSYCLBINData;
{
std::ifstream FileStream{Filename, std::ios::binary};
if (!FileStream.is_open())
throw sycl::exception(make_error_code(errc::invalid),
"Failed to open SYCLBIN file: " +
Filename.string());
RawSYCLBINData =
std::vector<char>{std::istreambuf_iterator<char>(FileStream),
std::istreambuf_iterator<char>()};
}
return experimental::get_kernel_bundle<State>(
Ctxt, Devs, sycl::span<char>{RawSYCLBINData}, Props);
}

template <bundle_state State, typename PropertyListT = empty_properties_t>
std::enable_if_t<State != bundle_state::ext_oneapi_source, kernel_bundle<State>>
get_kernel_bundle(const context &Ctxt, const std::filesystem::path &Filename,
PropertyListT Props = {}) {
return experimental::get_kernel_bundle<State>(Ctxt, Ctxt.get_devices(),
Filename, Props);
}
#endif

} // namespace ext::oneapi::experimental
} // namespace _V1
} // namespace sycl
7 changes: 6 additions & 1 deletion sycl/include/sycl/kernel_bundle.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -19,7 +19,8 @@
#include <sycl/kernel.hpp> // for kernel, kernel_bundle
#include <sycl/kernel_bundle_enums.hpp> // for bundle_state
#include <sycl/property_list.hpp> // for property_list
#include <ur_api.h> // for ur_native_handle_t
#include <sycl/sycl_span.hpp>
#include <ur_api.h>

#include <sycl/ext/oneapi/experimental/free_function_traits.hpp>
#include <sycl/ext/oneapi/properties/properties.hpp> // PropertyT
Expand Down Expand Up @@ -639,6 +640,10 @@ __SYCL_EXPORT detail::KernelBundleImplPtr
get_kernel_bundle_impl(const context &Ctx, const std::vector<device> &Devs,
bundle_state State);

__SYCL_EXPORT detail::KernelBundleImplPtr
get_kernel_bundle_impl(const context &Ctx, const std::vector<device> &Devs,
const sycl::span<char> &Bytes, bundle_state State);

__SYCL_EXPORT const std::vector<device>
removeDuplicateDevices(const std::vector<device> &Devs);

Expand Down
1 change: 1 addition & 0 deletions sycl/include/sycl/sycl.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -104,6 +104,7 @@
#include <sycl/ext/oneapi/experimental/raw_kernel_arg.hpp>
#include <sycl/ext/oneapi/experimental/reduction_properties.hpp>
#include <sycl/ext/oneapi/experimental/root_group.hpp>
#include <sycl/ext/oneapi/experimental/syclbin_kernel_bundle.hpp>
#include <sycl/ext/oneapi/experimental/tangle_group.hpp>
#include <sycl/ext/oneapi/experimental/work_group_memory.hpp>
#include <sycl/ext/oneapi/filter_selector.hpp>
Expand Down
1 change: 1 addition & 0 deletions sycl/source/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -299,6 +299,7 @@ set(SYCL_COMMON_SOURCES
"detail/reduction.cpp"
"detail/sampler_impl.cpp"
"detail/stream_impl.cpp"
"detail/syclbin.cpp"
"detail/scheduler/commands.cpp"
"detail/scheduler/leaves_collection.cpp"
"detail/scheduler/scheduler.cpp"
Expand Down
121 changes: 121 additions & 0 deletions sycl/source/detail/base64.hpp
Original file line number Diff line number Diff line change
@@ -0,0 +1,121 @@
//===--- Base64.h - Base64 Encoder/Decoder ----------------------*- C++ -*-===//
//
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
// See https://llvm.org/LICENSE.txt for license information.
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
// Adjusted copy of llvm/include/llvm/Support/Base64.h.
// TODO: Remove once we can consistently link the SYCL runtime library with
// LLVMSupport.

#pragma once

#include <cstdint>
#include <memory>
#include <string>
#include <vector>

namespace sycl {
inline namespace _V1 {
namespace detail {

class Base64 {
private:
// Decode a single character.
static inline int decode(char Ch) {
if (Ch >= 'A' && Ch <= 'Z') // 0..25
return Ch - 'A';
else if (Ch >= 'a' && Ch <= 'z') // 26..51
return Ch - 'a' + 26;
else if (Ch >= '0' && Ch <= '9') // 52..61
return Ch - '0' + 52;
else if (Ch == '+') // 62
return 62;
else if (Ch == '/') // 63
return 63;
return -1;
}

// Decode a quadruple of characters.
static inline void decode4(const char *Src, byte *Dst) {
int BadCh = -1;

for (auto I = 0; I < 4; ++I) {
char Ch = Src[I];
int Byte = decode(Ch);

if (Byte < 0) {
BadCh = Ch;
break;
}
Dst[I] = (byte)Byte;
}
if (BadCh != -1)
throw sycl::exception(make_error_code(errc::invalid),
"Invalid char in base 64 encoding.");
}

public:
using byte = uint8_t;

// Get the size of the encoded byte sequence of given size.
static size_t getDecodedSize(size_t SrcSize) { return (SrcSize * 3 + 3) / 4; }

// Decode a sequence of given size into a pre-allocated memory.
// Returns the number of bytes in the decoded result or 0 in case of error.
static size_t decode(const char *Src, byte *Dst, size_t SrcSize) {
size_t SrcOff = 0;
size_t DstOff = 0;

// decode full quads
for (size_t Qch = 0; Qch < SrcSize / 4; ++Qch, SrcOff += 4, DstOff += 3) {
byte Ch[4] = {0, 0, 0, 0};
decode4(Src + SrcOff, Ch);

// each quad of chars produces three bytes of output
Dst[DstOff + 0] = Ch[0] | (Ch[1] << 6);
Dst[DstOff + 1] = (Ch[1] >> 2) | (Ch[2] << 4);
Dst[DstOff + 2] = (Ch[2] >> 4) | (Ch[3] << 2);
}
auto RemChars = SrcSize - SrcOff;

if (RemChars == 0)
return DstOff;
// decode the remainder; variants:
// 2 chars remain - produces single byte
// 3 chars remain - produces two bytes

if (RemChars != 2 && RemChars != 3)
throw sycl::exception(make_error_code(errc::invalid),
"Invalid encoded sequence length.");

int Ch0 = decode(Src[SrcOff++]);
int Ch1 = decode(Src[SrcOff++]);
int Ch2 = RemChars == 3 ? decode(Src[SrcOff]) : 0;

if (Ch0 < 0 || Ch1 < 0 || Ch2 < 0)
throw sycl::exception(
make_error_code(errc::invalid),
"Invalid characters in the encoded sequence remainder.");
Dst[DstOff++] = Ch0 | (Ch1 << 6);

if (RemChars == 3)
Dst[DstOff++] = (Ch1 >> 2) | (Ch2 << 4);
return DstOff;
}

// Allocate minimum required amount of memory and decode a sequence of given
// size into it.
// Returns the decoded result. The size can be obtained via getDecodedSize.
static std::unique_ptr<byte[]> decode(const char *Src, size_t SrcSize) {
size_t DstSize = getDecodedSize(SrcSize);
std::unique_ptr<byte[]> Dst(new byte[DstSize]);
decode(Src, Dst.get(), SrcSize);
return Dst;
}
};

} // namespace detail
} // namespace _V1
} // namespace sycl
32 changes: 14 additions & 18 deletions sycl/source/detail/device_binary_image.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -162,7 +162,7 @@ RTDeviceBinaryImage::getProperty(const char *PropName) const {
return *It;
}

void RTDeviceBinaryImage::init(sycl_device_binary Bin) {
RTDeviceBinaryImage::RTDeviceBinaryImage(sycl_device_binary Bin) {
ImageId = ImageCounter++;

// If there was no binary, we let the owner handle initialization as they see
Expand Down Expand Up @@ -227,12 +227,11 @@ DynRTDeviceBinaryImage::DynRTDeviceBinaryImage() : RTDeviceBinaryImage() {
Bin->DeviceTargetSpec = __SYCL_DEVICE_BINARY_TARGET_UNKNOWN;
}

DynRTDeviceBinaryImage::DynRTDeviceBinaryImage(
std::unique_ptr<char[], std::function<void(void *)>> &&DataPtr,
size_t DataSize)
: DynRTDeviceBinaryImage() {
Data = std::move(DataPtr);
Bin->BinaryStart = reinterpret_cast<unsigned char *>(Data.get());
std::unique_ptr<sycl_device_binary_struct> CreateDefaultDynBinary(
const std::unique_ptr<char[], std::function<void(void *)>> &DataPtr,
size_t DataSize) {
auto Bin = std::make_unique<sycl_device_binary_struct>();
Bin->BinaryStart = reinterpret_cast<unsigned char *>(DataPtr.get());
Bin->BinaryEnd = Bin->BinaryStart + DataSize;
Bin->Format = ur::getBinaryImageFormat(Bin->BinaryStart, DataSize);
switch (Bin->Format) {
Expand All @@ -242,9 +241,15 @@ DynRTDeviceBinaryImage::DynRTDeviceBinaryImage(
default:
Bin->DeviceTargetSpec = __SYCL_DEVICE_BINARY_TARGET_UNKNOWN;
}
init(Bin);
return Bin;
}

DynRTDeviceBinaryImage::DynRTDeviceBinaryImage(
std::unique_ptr<char[], std::function<void(void *)>> &&DataPtr,
size_t DataSize)
: RTDeviceBinaryImage(CreateDefaultDynBinary(DataPtr, DataSize).release()),
Data{std::move(DataPtr)} {}

DynRTDeviceBinaryImage::~DynRTDeviceBinaryImage() {
delete Bin;
Bin = nullptr;
Expand Down Expand Up @@ -479,8 +484,6 @@ static void copyProperty(sycl_device_binary_property &NextFreeProperty,
DynRTDeviceBinaryImage::DynRTDeviceBinaryImage(
const std::vector<const RTDeviceBinaryImage *> &Imgs)
: DynRTDeviceBinaryImage() {
init(nullptr);

// Naive merges.
auto MergedSpecConstants =
naiveMergeBinaryProperties(Imgs, [](const RTDeviceBinaryImage &Img) {
Expand Down Expand Up @@ -675,18 +678,11 @@ DynRTDeviceBinaryImage::DynRTDeviceBinaryImage(
#ifdef SYCL_RT_ZSTD_AVAILABLE
CompressedRTDeviceBinaryImage::CompressedRTDeviceBinaryImage(
sycl_device_binary CompressedBin)
: RTDeviceBinaryImage() {

// 'CompressedBin' is part of the executable image loaded into memory
// which can't be modified easily. So, we need to make a copy of it.
Bin = new sycl_device_binary_struct(*CompressedBin);

: RTDeviceBinaryImage(new sycl_device_binary_struct(*CompressedBin)) {
// Get the decompressed size of the binary image.
m_ImageSize = ZSTDCompressor::GetDecompressedSize(
reinterpret_cast<const char *>(Bin->BinaryStart),
static_cast<size_t>(Bin->BinaryEnd - Bin->BinaryStart));

init(Bin);
}

void CompressedRTDeviceBinaryImage::Decompress() {
Expand Down
4 changes: 1 addition & 3 deletions sycl/source/detail/device_binary_image.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -140,7 +140,7 @@ class RTDeviceBinaryImage {

public:
RTDeviceBinaryImage() : Bin(nullptr) {}
RTDeviceBinaryImage(sycl_device_binary Bin) { init(Bin); }
RTDeviceBinaryImage(sycl_device_binary Bin);
// Explicitly delete copy constructor/operator= to avoid unintentional copies
RTDeviceBinaryImage(const RTDeviceBinaryImage &) = delete;
RTDeviceBinaryImage &operator=(const RTDeviceBinaryImage &) = delete;
Expand Down Expand Up @@ -247,8 +247,6 @@ class RTDeviceBinaryImage {
}

protected:
void init();
void init(sycl_device_binary Bin);
sycl_device_binary get() const { return Bin; }

sycl_device_binary Bin;
Expand Down
Loading