Skip to content

[OFFLOAD][OPENMP] 6.0 compatible interop interface #143491

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Open
wants to merge 1 commit into
base: main
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
149 changes: 144 additions & 5 deletions offload/include/OpenMP/InteropAPI.h
Original file line number Diff line number Diff line change
Expand Up @@ -13,17 +13,70 @@

#include "omp.h"

#include "PerThreadTable.h"
#include "omptarget.h"

extern "C" {

typedef enum kmp_interop_type_t {
kmp_interop_type_unknown = -1,
kmp_interop_type_platform,
kmp_interop_type_device,
kmp_interop_type_tasksync,
kmp_interop_type_target,
kmp_interop_type_targetsync,
} kmp_interop_type_t;

struct interop_attrs_t {
bool inorder : 1;
int reserved : 31;

/* Check if the supported attributes are compatible with the current
attributes. Only if an attribute is supported can the value be true,
otherwise it needs to be false
*/
bool checkSupportedOnly(interop_attrs_t supported) const {
return supported.inorder || (!supported.inorder && !inorder);
}
};

struct interop_spec_t {
int32_t fr_id;
interop_attrs_t attrs; // Common attributes
int64_t impl_attrs; // Implementation specific attributes (recognized by each
// plugin)
};

struct interop_flags_t {
bool implicit : 1; // dispatch (true) or interop (false)
bool nowait : 1; // has nowait flag
int reserved : 30;
};

struct interop_ctx_t {
uint16_t version; // version of the interface (current is 0)
interop_flags_t flags;
int gtid;
};

struct dep_pack_t {
int32_t ndeps;
kmp_depend_info_t *deplist;
int32_t ndeps_noalias;
kmp_depend_info_t *noalias_deplist;
};

struct omp_interop_val_t;

typedef void ompx_interop_cb_t(omp_interop_val_t *interop, void *data);

struct omp_interop_cb_instance_t {
ompx_interop_cb_t *cb;
void *data;

omp_interop_cb_instance_t(ompx_interop_cb_t *cb, void *data)
: cb(cb), data(data) {}

void operator()(omp_interop_val_t *interop) { cb(interop, data); }
};

/// The interop value type, aka. the interop object.
typedef struct omp_interop_val_t {
/// Device and interop-type are determined at construction time and fix.
Expand All @@ -34,10 +87,96 @@ typedef struct omp_interop_val_t {
__tgt_device_info device_info;
const kmp_interop_type_t interop_type;
const intptr_t device_id;
const omp_foreign_runtime_ids_t vendor_id = cuda;
const intptr_t backend_type_id = omp_interop_backend_type_cuda_1;
omp_vendor_id_t vendor_id = omp_vendor_llvm;
omp_foreign_runtime_id_t fr_id = omp_fr_none;
interop_attrs_t attrs{false, 0}; // Common prefer specification attributes
int64_t impl_attrs = 0; // Implementation prefer specification attributes

void *RTLProperty = nullptr; // Plugin dependent information
// For implicitly created Interop objects (e.g., from a dispatch construct)
// who owns the object
int OwnerGtid = -1;
// Marks whether the object was requested since the last time it was synced
bool Clean = true;

typedef llvm::SmallVector<omp_interop_cb_instance_t> callback_list_t;

callback_list_t CompletionCbs;

void reset() {
OwnerGtid = -1;
markClean();
clearCompletionCbs();
}

bool hasOwner() const { return OwnerGtid != -1; }

void setOwner(int gtid) { OwnerGtid = gtid; }
bool isOwnedBy(int gtid) { return OwnerGtid == gtid; }
bool isCompatibleWith(int32_t InteropType, const interop_spec_t &Spec);
bool isCompatibleWith(int32_t InteropType, const interop_spec_t &Spec,
int64_t DeviceNum, int gtid);
void markClean() { Clean = true; }
void markDirty() { Clean = false; }
bool isClean() const { return Clean; }

int32_t flush(DeviceTy &Device);
int32_t sync_barrier(DeviceTy &Device);
int32_t async_barrier(DeviceTy &Device);
int32_t release(DeviceTy &Device);

int32_t flush();
int32_t syncBarrier();
int32_t asyncBarrier();
int32_t release();

void addCompletionCb(ompx_interop_cb_t *cb, void *data) {
CompletionCbs.push_back(omp_interop_cb_instance_t(cb, data));
}

int numCompletionCbs() const { return CompletionCbs.size(); }
void clearCompletionCbs() { CompletionCbs.clear(); }

void runCompletionCbs() {
for (auto &cbInstance : CompletionCbs)
cbInstance(this);
clearCompletionCbs();
}
} omp_interop_val_t;

} // extern "C"

struct InteropTableEntry {
using ContainerTy = typename std::vector<omp_interop_val_t *>;
using iterator = typename ContainerTy::iterator;

ContainerTy Interops;

const int reservedEntriesPerThread =
20; // reserve some entries to avoid reallocation

void add(omp_interop_val_t *obj) {
if (Interops.capacity() == 0)
Interops.reserve(reservedEntriesPerThread);
Interops.push_back(obj);
}

template <class ClearFuncTy> void clear(ClearFuncTy f) {
for (auto &Obj : Interops) {
f(Obj);
}
}

/* vector interface */
int size() const { return Interops.size(); }
iterator begin() { return Interops.begin(); }
iterator end() { return Interops.end(); }
iterator erase(iterator it) { return Interops.erase(it); }
};

struct InteropTblTy
: public PerThreadTable<InteropTableEntry, omp_interop_val_t *> {
void clear();
};

#endif // OMPTARGET_OPENMP_INTEROP_API_H
51 changes: 29 additions & 22 deletions offload/include/OpenMP/omp.h
Original file line number Diff line number Diff line change
Expand Up @@ -80,15 +80,18 @@ typedef enum omp_interop_rc {
omp_irc_other = -6
} omp_interop_rc_t;

typedef enum omp_interop_fr {
omp_ifr_cuda = 1,
omp_ifr_cuda_driver = 2,
omp_ifr_opencl = 3,
omp_ifr_sycl = 4,
omp_ifr_hip = 5,
omp_ifr_level_zero = 6,
omp_ifr_last = 7
} omp_interop_fr_t;
/* Foreign runtime values from OpenMP Additional Definitions document v2.1 */
typedef enum omp_foreign_runtime_id_t {
omp_fr_none = 0,
omp_fr_cuda = 1,
omp_fr_cuda_driver = 2,
omp_fr_opencl = 3,
omp_fr_sycl = 4,
omp_fr_hip = 5,
omp_fr_level_zero = 6,
omp_fr_hsa = 7,
omp_fr_last = 8
} omp_foreign_runtime_id_t;

typedef void *omp_interop_t;

Expand Down Expand Up @@ -134,19 +137,23 @@ omp_get_interop_type_desc(const omp_interop_t, omp_interop_property_t);
extern const char *__KAI_KMPC_CONVENTION
omp_get_interop_rc_desc(const omp_interop_t, omp_interop_rc_t);

typedef enum omp_interop_backend_type_t {
// reserve 0
omp_interop_backend_type_cuda_1 = 1,
} omp_interop_backend_type_t;

typedef enum omp_foreign_runtime_ids {
cuda = 1,
cuda_driver = 2,
opencl = 3,
sycl = 4,
hip = 5,
level_zero = 6,
} omp_foreign_runtime_ids_t;
/* Vendor defined values from OpenMP Additional Definitions document v2.1*/
typedef enum omp_vendor_id {
omp_vendor_unknown = 0,
omp_vendor_amd = 1,
omp_vendor_arm = 2,
omp_vendor_bsc = 3,
omp_vendor_fujitsu = 4,
omp_vendor_gnu = 5,
omp_vendor_hpe = 6,
omp_vendor_ibm = 7,
omp_vendor_intel = 8,
omp_vendor_llvm = 9,
omp_vendor_nec = 10,
omp_vendor_nvidia = 11,
omp_vendor_ti = 12,
omp_vendor_last = 13
} omp_vendor_id_t;

///} InteropAPI

Expand Down
109 changes: 109 additions & 0 deletions offload/include/PerThreadTable.h
Original file line number Diff line number Diff line change
@@ -0,0 +1,109 @@
//===-- PerThreadTable.h -- PerThread Storage Structure ----*- C++ -*-===//
//
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
// See https://llvm.org/LICENSE.txt for license information.
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
// Table indexed with one entry per thread.
//
//===----------------------------------------------------------------------===//

#ifndef OFFLOAD_PERTHREADTABLE_H
#define OFFLOAD_PERTHREADTABLE_H

#include <list>
#include <memory>
#include <mutex>

// Using an STL container (such as std::vector) indexed by thread ID has
// too many race conditions issues so we store each thread entry into a
// thread_local variable.
// T is the container type used to store the objects, e.g., std::vector,
// std::set, etc. by each thread. O is the type of the stored objects e.g.,
// omp_interop_val_t *, ...

template <typename ContainerType, typename ObjectType> struct PerThreadTable {
using iterator = typename ContainerType::iterator;

struct PerThreadData {
size_t NElements = 0;
std::unique_ptr<ContainerType> ThEntry;
};

std::mutex Mtx;
std::list<PerThreadData *> ThreadDataList;

// define default constructors, disable copy and move constructors
PerThreadTable() = default;
PerThreadTable(const PerThreadTable &) = delete;
PerThreadTable(PerThreadTable &&) = delete;
PerThreadTable &operator=(const PerThreadTable &) = delete;
PerThreadTable &operator=(PerThreadTable &&) = delete;
~PerThreadTable() {
std::lock_guard<std::mutex> Lock(Mtx);
ThreadDataList.clear();
}

private:
PerThreadData &getThreadData() {
static thread_local PerThreadData ThData;
return ThData;
}

protected:
ContainerType &getThreadEntry() {
auto &ThData = getThreadData();
if (ThData.ThEntry)
return *ThData.ThEntry;
ThData.ThEntry = std::make_unique<ContainerType>();
std::lock_guard<std::mutex> Lock(Mtx);
ThreadDataList.push_back(&ThData);
return *ThData.ThEntry;
}

size_t &getThreadNElements() {
auto &ThData = getThreadData();
return ThData.NElements;
}

public:
void add(ObjectType obj) {
auto &Entry = getThreadEntry();
auto &NElements = getThreadNElements();
NElements++;
Entry.add(obj);
}

iterator erase(iterator it) {
auto &Entry = getThreadEntry();
auto &NElements = getThreadNElements();
NElements--;
return Entry.erase(it);
}

size_t size() { return getThreadNElements(); }

// Iterators to traverse objects owned by
// the current thread
iterator begin() {
auto &Entry = getThreadEntry();
return Entry.begin();
}
iterator end() {
auto &Entry = getThreadEntry();
return Entry.end();
}

template <class F> void clear(F f) {
std::lock_guard<std::mutex> Lock(Mtx);
for (auto ThData : ThreadDataList) {
ThData->ThEntry->clear(f);
ThData->NElements = 0;
}
ThreadDataList.clear();
}
};

#endif
7 changes: 6 additions & 1 deletion offload/include/PluginManager.h
Original file line number Diff line number Diff line change
Expand Up @@ -35,6 +35,8 @@
#include <mutex>
#include <string>

#include "OpenMP/InteropAPI.h"

using GenericPluginTy = llvm::omp::target::plugin::GenericPluginTy;

/// Struct for the data required to handle plugins
Expand Down Expand Up @@ -88,6 +90,9 @@ struct PluginManager {
HostPtrToTableMapTy HostPtrToTableMap;
std::mutex TblMapMtx; ///< For HostPtrToTableMap

/// Table of cached implicit interop objects
InteropTblTy InteropTbl;

// Work around for plugins that call dlopen on shared libraries that call
// tgt_register_lib during their initialisation. Stash the pointers in a
// vector until the plugins are all initialised and then register them.
Expand Down Expand Up @@ -185,5 +190,5 @@ void initRuntime();
void deinitRuntime();

extern PluginManager *PM;

extern std::atomic<bool> RTLAlive; // Indicates if the RTL has been initialized
#endif // OMPTARGET_PLUGIN_MANAGER_H
1 change: 1 addition & 0 deletions offload/include/Shared/APITypes.h
Original file line number Diff line number Diff line change
Expand Up @@ -36,6 +36,7 @@ struct __tgt_device_image {
struct __tgt_device_info {
void *Context = nullptr;
void *Device = nullptr;
void *Platform = nullptr;
};

/// This struct is a record of all the host code that may be offloaded to a
Expand Down
Loading
Loading