Skip to content

Commit 69c3b9f

Browse files
[SYCL] Cherry-pick sanitizer patches (#19867)
This is a combined cherry-pick of: --- [DevSAN] Cache internal queue to avoid repeated create/destroy (#19540) Patch-By: Maosu Zhao <[email protected]> --- [DevTSAN] Move AllocInfo into DeviceInfo to support indirect access (#19634) If we maintain AllocInfo in ContextInfo, this will cause the pointer which is allocated by another context can't be poisoned. Patch-By: Maosu Zhao <[email protected]> --- [DevMSAN] Correctly apply stride for dest/src pointer when do async copy (#19766) Currently, we always define stride in elements when writing to destination pointer. However, according to spirv spec, the stride can only be applied to global ptr. Patch-By: Maosu Zhao <[email protected]>
1 parent cb5a0a5 commit 69c3b9f

File tree

11 files changed

+110
-57
lines changed

11 files changed

+110
-57
lines changed

libdevice/sanitizer/msan_rtl.cpp

Lines changed: 14 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -303,11 +303,15 @@ inline void ReportError(const uint32_t size, const char __SYCL_CONSTANT__ *file,
303303

304304
// This function is only used for shadow propagation
305305
template <typename T>
306-
void GroupAsyncCopy(uptr Dest, uptr Src, size_t NumElements, size_t Stride) {
306+
void GroupAsyncCopy(uptr Dest, uptr Src, size_t NumElements, size_t Stride,
307+
bool StrideOnSrc) {
307308
auto DestPtr = (__SYCL_GLOBAL__ T *)Dest;
308309
auto SrcPtr = (const __SYCL_GLOBAL__ T *)Src;
309310
for (size_t i = 0; i < NumElements; i++) {
310-
DestPtr[i] = SrcPtr[i * Stride];
311+
if (StrideOnSrc)
312+
DestPtr[i] = SrcPtr[i * Stride];
313+
else
314+
DestPtr[i * Stride] = SrcPtr[i];
311315
}
312316
}
313317

@@ -748,16 +752,20 @@ __msan_unpoison_strided_copy(uptr dest, uint32_t dest_as, uptr src,
748752

749753
switch (element_size) {
750754
case 1:
751-
GroupAsyncCopy<int8_t>(shadow_dest, shadow_src, counts, stride);
755+
GroupAsyncCopy<int8_t>(shadow_dest, shadow_src, counts, stride,
756+
src_as == ADDRESS_SPACE_GLOBAL);
752757
break;
753758
case 2:
754-
GroupAsyncCopy<int16_t>(shadow_dest, shadow_src, counts, stride);
759+
GroupAsyncCopy<int16_t>(shadow_dest, shadow_src, counts, stride,
760+
src_as == ADDRESS_SPACE_GLOBAL);
755761
break;
756762
case 4:
757-
GroupAsyncCopy<int32_t>(shadow_dest, shadow_src, counts, stride);
763+
GroupAsyncCopy<int32_t>(shadow_dest, shadow_src, counts, stride,
764+
src_as == ADDRESS_SPACE_GLOBAL);
758765
break;
759766
case 8:
760-
GroupAsyncCopy<int64_t>(shadow_dest, shadow_src, counts, stride);
767+
GroupAsyncCopy<int64_t>(shadow_dest, shadow_src, counts, stride,
768+
src_as == ADDRESS_SPACE_GLOBAL);
761769
break;
762770
default:
763771
__spirv_ocl_printf(__msan_print_strided_copy_unsupport_type,

unified-runtime/source/loader/layers/sanitizer/asan/asan_buffer.cpp

Lines changed: 9 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -90,6 +90,7 @@ ur_result_t MemBuffer::getHandle(ur_device_handle_t Device, char *&Handle) {
9090
assert((void *)Device != nullptr && "Device cannot be nullptr");
9191

9292
std::scoped_lock<ur_shared_mutex> Guard(Mutex);
93+
auto CI = getAsanInterceptor()->getContextInfo(Context);
9394
auto &Allocation = Allocations[Device];
9495
ur_result_t URes = UR_RESULT_SUCCESS;
9596
if (!Allocation) {
@@ -106,9 +107,9 @@ ur_result_t MemBuffer::getHandle(ur_device_handle_t Device, char *&Handle) {
106107
}
107108

108109
if (HostPtr) {
109-
ManagedQueue Queue(Context, Device);
110+
ur_queue_handle_t InternalQueue = CI->getInternalQueue(Device);
110111
URes = getContext()->urDdiTable.Enqueue.pfnUSMMemcpy(
111-
Queue, true, Allocation, HostPtr, Size, 0, nullptr, nullptr);
112+
InternalQueue, true, Allocation, HostPtr, Size, 0, nullptr, nullptr);
112113
if (URes != UR_RESULT_SUCCESS) {
113114
UR_LOG_L(
114115
getContext()->logger, ERR,
@@ -147,10 +148,10 @@ ur_result_t MemBuffer::getHandle(ur_device_handle_t Device, char *&Handle) {
147148

148149
// Copy data from last synced device to host
149150
{
150-
ManagedQueue Queue(Context, LastSyncedDevice.hDevice);
151+
ur_queue_handle_t InternalQueue = CI->getInternalQueue(Device);
151152
URes = getContext()->urDdiTable.Enqueue.pfnUSMMemcpy(
152-
Queue, true, HostAllocation, LastSyncedDevice.MemHandle, Size, 0,
153-
nullptr, nullptr);
153+
InternalQueue, true, HostAllocation, LastSyncedDevice.MemHandle, Size,
154+
0, nullptr, nullptr);
154155
if (URes != UR_RESULT_SUCCESS) {
155156
UR_LOG_L(getContext()->logger, ERR,
156157
"Failed to migrate memory buffer data");
@@ -160,9 +161,10 @@ ur_result_t MemBuffer::getHandle(ur_device_handle_t Device, char *&Handle) {
160161

161162
// Sync data back to device
162163
{
163-
ManagedQueue Queue(Context, Device);
164+
ur_queue_handle_t InternalQueue = CI->getInternalQueue(Device);
164165
URes = getContext()->urDdiTable.Enqueue.pfnUSMMemcpy(
165-
Queue, true, Allocation, HostAllocation, Size, 0, nullptr, nullptr);
166+
InternalQueue, true, Allocation, HostAllocation, Size, 0, nullptr,
167+
nullptr);
166168
if (URes != UR_RESULT_SUCCESS) {
167169
UR_LOG_L(getContext()->logger, ERR,
168170
"Failed to migrate memory buffer data");

unified-runtime/source/loader/layers/sanitizer/asan/asan_ddi.cpp

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -677,7 +677,7 @@ __urdlllocal ur_result_t UR_APICALL urMemBufferCreate(
677677
std::shared_ptr<ContextInfo> CtxInfo =
678678
getAsanInterceptor()->getContextInfo(hContext);
679679
for (const auto &hDevice : CtxInfo->DeviceList) {
680-
ManagedQueue InternalQueue(hContext, hDevice);
680+
ur_queue_handle_t InternalQueue = CtxInfo->getInternalQueue(hDevice);
681681
char *Handle = nullptr;
682682
UR_CALL(pMemBuffer->getHandle(hDevice, Handle));
683683
UR_CALL(getContext()->urDdiTable.Enqueue.pfnUSMMemcpy(

unified-runtime/source/loader/layers/sanitizer/asan/asan_interceptor.cpp

Lines changed: 18 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -270,17 +270,15 @@ ur_result_t AsanInterceptor::preLaunchKernel(ur_kernel_handle_t Kernel,
270270
auto ContextInfo = getContextInfo(Context);
271271
auto DeviceInfo = getDeviceInfo(Device);
272272

273-
ManagedQueue InternalQueue(Context, Device);
274-
if (!InternalQueue) {
275-
UR_LOG_L(getContext()->logger, ERR, "Failed to create internal queue");
276-
return UR_RESULT_ERROR_INVALID_QUEUE;
277-
}
273+
ur_queue_handle_t InternalQueue = ContextInfo->getInternalQueue(Device);
278274

279275
UR_CALL(prepareLaunch(ContextInfo, DeviceInfo, InternalQueue, Kernel,
280276
LaunchInfo));
281277

282278
UR_CALL(updateShadowMemory(ContextInfo, DeviceInfo, InternalQueue));
283279

280+
UR_CALL(getContext()->urDdiTable.Queue.pfnFinish(InternalQueue));
281+
284282
return UR_RESULT_SUCCESS;
285283
}
286284

@@ -467,6 +465,7 @@ ur_result_t AsanInterceptor::unregisterProgram(ur_program_handle_t Program) {
467465

468466
ur_result_t AsanInterceptor::registerSpirKernels(ur_program_handle_t Program) {
469467
auto Context = GetContext(Program);
468+
auto CI = getContextInfo(Context);
470469
std::vector<ur_device_handle_t> Devices = GetDevices(Program);
471470

472471
for (auto Device : Devices) {
@@ -484,11 +483,11 @@ ur_result_t AsanInterceptor::registerSpirKernels(ur_program_handle_t Program) {
484483
assert((MetadataSize % sizeof(SpirKernelInfo) == 0) &&
485484
"SpirKernelMetadata size is not correct");
486485

487-
ManagedQueue Queue(Context, Device);
486+
ur_queue_handle_t InternalQueue = CI->getInternalQueue(Device);
488487

489488
std::vector<SpirKernelInfo> SKInfo(NumOfSpirKernel);
490489
Result = getContext()->urDdiTable.Enqueue.pfnUSMMemcpy(
491-
Queue, true, &SKInfo[0], MetadataPtr,
490+
InternalQueue, true, &SKInfo[0], MetadataPtr,
492491
sizeof(SpirKernelInfo) * NumOfSpirKernel, 0, nullptr, nullptr);
493492
if (Result != UR_RESULT_SUCCESS) {
494493
UR_LOG_L(getContext()->logger, ERR, "Can't read the value of <{}>: {}",
@@ -504,7 +503,7 @@ ur_result_t AsanInterceptor::registerSpirKernels(ur_program_handle_t Program) {
504503
}
505504
std::vector<char> KernelNameV(SKI.Size);
506505
Result = getContext()->urDdiTable.Enqueue.pfnUSMMemcpy(
507-
Queue, true, KernelNameV.data(), (void *)SKI.KernelName,
506+
InternalQueue, true, KernelNameV.data(), (void *)SKI.KernelName,
508507
sizeof(char) * SKI.Size, 0, nullptr, nullptr);
509508
if (Result != UR_RESULT_SUCCESS) {
510509
UR_LOG_L(getContext()->logger, ERR, "Can't read kernel name: {}",
@@ -537,7 +536,7 @@ AsanInterceptor::registerDeviceGlobals(ur_program_handle_t Program) {
537536
assert(ProgramInfo != nullptr && "unregistered program!");
538537

539538
for (auto Device : Devices) {
540-
ManagedQueue Queue(Context, Device);
539+
ur_queue_handle_t InternalQueue = ContextInfo->getInternalQueue(Device);
541540

542541
size_t MetadataSize;
543542
void *MetadataPtr;
@@ -554,7 +553,7 @@ AsanInterceptor::registerDeviceGlobals(ur_program_handle_t Program) {
554553
"DeviceGlobal metadata size is not correct");
555554
std::vector<DeviceGlobalInfo> GVInfos(NumOfDeviceGlobal);
556555
Result = getContext()->urDdiTable.Enqueue.pfnUSMMemcpy(
557-
Queue, true, &GVInfos[0], MetadataPtr,
556+
InternalQueue, true, &GVInfos[0], MetadataPtr,
558557
sizeof(DeviceGlobalInfo) * NumOfDeviceGlobal, 0, nullptr, nullptr);
559558
if (Result != UR_RESULT_SUCCESS) {
560559
UR_LOG_L(getContext()->logger, ERR, "Device Global[{}] Read Failed: {}",
@@ -932,6 +931,8 @@ bool ProgramInfo::isKernelInstrumented(ur_kernel_handle_t Kernel) const {
932931
ContextInfo::~ContextInfo() {
933932
Stats.Print(Handle);
934933

934+
InternalQueueMap.clear();
935+
935936
[[maybe_unused]] ur_result_t URes;
936937
if (USMPool) {
937938
URes = getContext()->urDdiTable.USM.pfnPoolRelease(USMPool);
@@ -971,6 +972,13 @@ ur_usm_pool_handle_t ContextInfo::getUSMPool() {
971972
return USMPool;
972973
}
973974

975+
ur_queue_handle_t ContextInfo::getInternalQueue(ur_device_handle_t Device) {
976+
std::scoped_lock<ur_shared_mutex> Guard(InternalQueueMapMutex);
977+
if (!InternalQueueMap[Device])
978+
InternalQueueMap[Device].emplace(Handle, Device);
979+
return *InternalQueueMap[Device];
980+
}
981+
974982
AsanRuntimeDataWrapper::~AsanRuntimeDataWrapper() {
975983
[[maybe_unused]] ur_result_t Result;
976984
if (Host.LocalArgs) {

unified-runtime/source/loader/layers/sanitizer/asan/asan_interceptor.hpp

Lines changed: 7 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -20,6 +20,7 @@
2020
#include "asan_statistics.hpp"
2121
#include "sanitizer_common/sanitizer_common.hpp"
2222
#include "sanitizer_common/sanitizer_options.hpp"
23+
#include "sanitizer_common/sanitizer_utils.hpp"
2324
#include "ur_sanitizer_layer.hpp"
2425

2526
#include <memory>
@@ -143,6 +144,10 @@ struct ContextInfo {
143144
std::vector<ur_device_handle_t> DeviceList;
144145
std::unordered_map<ur_device_handle_t, AllocInfoList> AllocInfosMap;
145146

147+
ur_shared_mutex InternalQueueMapMutex;
148+
std::unordered_map<ur_device_handle_t, std::optional<ManagedQueue>>
149+
InternalQueueMap;
150+
146151
AsanStatsWrapper Stats;
147152

148153
explicit ContextInfo(ur_context_handle_t Context) : Handle(Context) {
@@ -163,6 +168,8 @@ struct ContextInfo {
163168
}
164169

165170
ur_usm_pool_handle_t getUSMPool();
171+
172+
ur_queue_handle_t getInternalQueue(ur_device_handle_t);
166173
};
167174

168175
struct AsanRuntimeDataWrapper {

unified-runtime/source/loader/layers/sanitizer/sanitizer_common/sanitizer_utils.cpp

Lines changed: 4 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -31,9 +31,11 @@ ur_usm_type_t GetUSMType(ur_context_handle_t Context, const void *MemPtr) {
3131
} // namespace
3232

3333
ManagedQueue::ManagedQueue(ur_context_handle_t Context,
34-
ur_device_handle_t Device) {
34+
ur_device_handle_t Device, bool IsOutOfOrder) {
35+
ur_queue_properties_t Prop{UR_STRUCTURE_TYPE_QUEUE_PROPERTIES, nullptr,
36+
UR_QUEUE_FLAG_OUT_OF_ORDER_EXEC_MODE_ENABLE};
3537
[[maybe_unused]] auto Result = getContext()->urDdiTable.Queue.pfnCreate(
36-
Context, Device, nullptr, &Handle);
38+
Context, Device, IsOutOfOrder ? &Prop : nullptr, &Handle);
3739
assert(Result == UR_RESULT_SUCCESS && "Failed to create ManagedQueue");
3840
UR_LOG_L(getContext()->logger, DEBUG, ">>> ManagedQueue {}", (void *)Handle);
3941
}

unified-runtime/source/loader/layers/sanitizer/sanitizer_common/sanitizer_utils.hpp

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -23,7 +23,8 @@
2323
namespace ur_sanitizer_layer {
2424

2525
struct ManagedQueue {
26-
ManagedQueue(ur_context_handle_t Context, ur_device_handle_t Device);
26+
ManagedQueue(ur_context_handle_t Context, ur_device_handle_t Device,
27+
bool IsOutOfOrder = false);
2728
~ManagedQueue();
2829

2930
// Disable copy semantics

unified-runtime/source/loader/layers/sanitizer/tsan/tsan_buffer.cpp

Lines changed: 8 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -99,6 +99,7 @@ ur_result_t MemBuffer::getHandle(ur_device_handle_t Device, char *&Handle) {
9999

100100
std::scoped_lock<ur_shared_mutex> Guard(Mutex);
101101
auto &Allocation = Allocations[Device];
102+
auto CI = getTsanInterceptor()->getContextInfo(Context);
102103
ur_result_t URes = UR_RESULT_SUCCESS;
103104
if (!Allocation) {
104105
ur_usm_desc_t USMDesc{};
@@ -114,7 +115,7 @@ ur_result_t MemBuffer::getHandle(ur_device_handle_t Device, char *&Handle) {
114115
}
115116

116117
if (HostPtr) {
117-
ManagedQueue Queue(Context, Device);
118+
ur_queue_handle_t Queue = CI->getInternalQueue(Device);
118119
URes = getContext()->urDdiTable.Enqueue.pfnUSMMemcpy(
119120
Queue, true, Allocation, HostPtr, Size, 0, nullptr, nullptr);
120121
if (URes != UR_RESULT_SUCCESS) {
@@ -155,7 +156,7 @@ ur_result_t MemBuffer::getHandle(ur_device_handle_t Device, char *&Handle) {
155156

156157
// Copy data from last synced device to host
157158
{
158-
ManagedQueue Queue(Context, LastSyncedDevice.hDevice);
159+
ur_queue_handle_t Queue = CI->getInternalQueue(Device);
159160
URes = getContext()->urDdiTable.Enqueue.pfnUSMMemcpy(
160161
Queue, true, HostAllocation, LastSyncedDevice.MemHandle, Size, 0,
161162
nullptr, nullptr);
@@ -168,7 +169,7 @@ ur_result_t MemBuffer::getHandle(ur_device_handle_t Device, char *&Handle) {
168169

169170
// Sync data back to device
170171
{
171-
ManagedQueue Queue(Context, Device);
172+
ur_queue_handle_t Queue = CI->getInternalQueue(Device);
172173
URes = getContext()->urDdiTable.Enqueue.pfnUSMMemcpy(
173174
Queue, true, Allocation, HostAllocation, Size, 0, nullptr, nullptr);
174175
if (URes != UR_RESULT_SUCCESS) {
@@ -185,8 +186,10 @@ ur_result_t MemBuffer::getHandle(ur_device_handle_t Device, char *&Handle) {
185186
}
186187

187188
ur_result_t MemBuffer::free() {
188-
for (const auto &[_, Ptr] : Allocations) {
189-
ur_result_t URes = getTsanInterceptor()->releaseMemory(Context, Ptr);
189+
for (const auto &[Device, Ptr] : Allocations) {
190+
ur_result_t URes = Device
191+
? getTsanInterceptor()->releaseMemory(Context, Ptr)
192+
: getContext()->urDdiTable.USM.pfnFree(Context, Ptr);
190193
if (URes != UR_RESULT_SUCCESS) {
191194
UR_LOG_L(getContext()->logger, ERR, "Failed to free buffer handle {}",
192195
(void *)Ptr);

unified-runtime/source/loader/layers/sanitizer/tsan/tsan_ddi.cpp

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -402,7 +402,7 @@ ur_result_t urMemBufferCreate(
402402
std::shared_ptr<ContextInfo> CtxInfo =
403403
getTsanInterceptor()->getContextInfo(hContext);
404404
for (const auto &hDevice : CtxInfo->DeviceList) {
405-
ManagedQueue InternalQueue(hContext, hDevice);
405+
ur_queue_handle_t InternalQueue = CtxInfo->getInternalQueue(hDevice);
406406
char *Handle = nullptr;
407407
UR_CALL(pMemBuffer->getHandle(hDevice, Handle));
408408
UR_CALL(getContext()->urDdiTable.Enqueue.pfnUSMMemcpy(

0 commit comments

Comments
 (0)