From 8e0e77af595780afbec813bee2a6f3006e927396 Mon Sep 17 00:00:00 2001 From: "Zhao, Yang2" Date: Thu, 22 May 2025 10:16:16 +0200 Subject: [PATCH 01/26] wip --- clang/lib/Driver/SanitizerArgs.cpp | 3 + libdevice/sanitizer/msan_rtl.cpp | 41 ++++++--- .../Instrumentation/MemorySanitizer.cpp | 88 ++++++------------- unified-runtime/source/loader/CMakeLists.txt | 1 + .../layers/sanitizer/msan/msan_shadow.cpp | 10 +-- .../layers/sanitizer/msan/msan_shadow.hpp | 22 +++-- .../sanitizer_common/sanitizer_allocator.cpp | 69 +++++++++++++++ .../sanitizer_common/sanitizer_allocator.hpp | 13 +++ 8 files changed, 162 insertions(+), 85 deletions(-) create mode 100644 unified-runtime/source/loader/layers/sanitizer/sanitizer_common/sanitizer_allocator.cpp diff --git a/clang/lib/Driver/SanitizerArgs.cpp b/clang/lib/Driver/SanitizerArgs.cpp index 3c9a4a2471293..af1fd8b14bdd0 100644 --- a/clang/lib/Driver/SanitizerArgs.cpp +++ b/clang/lib/Driver/SanitizerArgs.cpp @@ -1279,6 +1279,9 @@ void SanitizerArgs::addArgs(const ToolChain &TC, const llvm::opt::ArgList &Args, CmdArgs.push_back("-mllvm"); CmdArgs.push_back("-msan-poison-stack-with-call=1"); + + CmdArgs.push_back("-mllvm"); + CmdArgs.push_back("-msan-track-origins=1"); } else if (Sanitizers.has(SanitizerKind::Thread)) { CmdArgs.push_back("-fsanitize=thread"); // The tsan function entry/exit builtins are used to record stack diff --git a/libdevice/sanitizer/msan_rtl.cpp b/libdevice/sanitizer/msan_rtl.cpp index 864a6c0c2c4af..179969c38c5e0 100644 --- a/libdevice/sanitizer/msan_rtl.cpp +++ b/libdevice/sanitizer/msan_rtl.cpp @@ -83,7 +83,7 @@ void __msan_internal_report_save(const uint32_t size, const char __SYCL_CONSTANT__ *file, const uint32_t line, const char __SYCL_CONSTANT__ *func, - const uptr origin) { + const uint32_t origin) { const int Expected = MSAN_REPORT_NONE; int Desired = MSAN_REPORT_START; @@ -136,7 +136,8 @@ void __msan_internal_report_save(const uint32_t size, void __msan_report_error(const uint32_t size, const char __SYCL_CONSTANT__ *file, const uint32_t line, - const char __SYCL_CONSTANT__ *func, uptr origin = 0) { + const char __SYCL_CONSTANT__ *func, + uint32_t origin = 0) { __msan_internal_report_save(size, file, line, func, origin); } @@ -167,15 +168,14 @@ inline uptr __msan_get_shadow_pvc(uptr addr, uint32_t as) { ConvertGenericPointer(addr, as); } - // Device USM only - if (as == ADDRESS_SPACE_GLOBAL && (addr & PVC_DEVICE_USM_MASK)) { - auto shadow_begin = GetMsanLaunchInfo->GlobalShadowOffset; - auto shadow_end = GetMsanLaunchInfo->GlobalShadowOffsetEnd; - if (addr < shadow_begin) { - return addr + (shadow_begin - PVC_DEVICE_USM_BEGIN); - } else { - return addr - (PVC_DEVICE_USM_END - shadow_end + 1); + if (as == ADDRESS_SPACE_GLOBAL) { + if (addr >> 52 == 0xff0) { + return addr - 0x5000'0000'0000ULL; } + // auto shadow_base = GetMsanLaunchInfo->GlobalShadowOffset; + // return (addr & 0xff'ffff'ffffULL) + ((addr & 0x8000'0000'0000ULL) >> 7) + + // shadow_base; + return GetMsanLaunchInfo->CleanShadow; } else if (as == ADDRESS_SPACE_LOCAL) { const auto shadow_offset = GetMsanLaunchInfo->LocalShadowOffset; if (shadow_offset != 0) { @@ -223,7 +223,7 @@ inline void __msan_exit() { #define MSAN_MAYBE_WARNING(type, size) \ DEVICE_EXTERN_C_NOINLINE void __msan_maybe_warning_##size( \ - type s, uptr o, const char __SYCL_CONSTANT__ *file, uint32_t line, \ + type s, uint32_t o, const char __SYCL_CONSTANT__ *file, uint32_t line, \ const char __SYCL_CONSTANT__ *func) { \ if (!GetMsanLaunchInfo) \ return; \ @@ -291,6 +291,25 @@ __msan_get_shadow(uptr addr, uint32_t as, return (__SYCL_GLOBAL__ void *)shadow_ptr; } +// For mapping detail, ref to +// "unified-runtime/source/loader/layers/sanitizer/msan/msan_shadow.hpp" +DEVICE_EXTERN_C_NOINLINE __SYCL_GLOBAL__ void *__msan_get_origin(uptr addr, + uint32_t as) { + // Return clean shadow (0s) by default + uptr origin_ptr = GetMsanLaunchInfo->CleanShadow; + + if (!GetMsanLaunchInfo) + return (__SYCL_GLOBAL__ void *)origin_ptr; + + if (as == ADDRESS_SPACE_GLOBAL) { + if (addr >> 52 == 0xff0) { + origin_ptr = addr - 0xa000'0000'0000ULL; + } + } + + return (__SYCL_GLOBAL__ void *)origin_ptr; +} + static __SYCL_CONSTANT__ const char __msan_print_memset[] = "[kernel] memset(beg=%p, shadow_beg=%p, shadow_end=%p)\n"; diff --git a/llvm/lib/Transforms/Instrumentation/MemorySanitizer.cpp b/llvm/lib/Transforms/Instrumentation/MemorySanitizer.cpp index 2f40cf7479c15..1f3f85da2d299 100644 --- a/llvm/lib/Transforms/Instrumentation/MemorySanitizer.cpp +++ b/llvm/lib/Transforms/Instrumentation/MemorySanitizer.cpp @@ -732,6 +732,7 @@ class MemorySanitizer { /// Get shadow memory address FunctionCallee MsanGetShadowFn; + FunctionCallee MsanGetOriginFn; /// Storage for return values of the MsanMetadataPtrXxx functions. Value *MsanMetadataAlloca; @@ -1485,14 +1486,14 @@ void MemorySanitizer::createUserspaceApi(Module &M, } else { // SPIR or SPIR-V // __msan_maybe_warning_N( // intN_t status, - // uptr origin, // possible shadow address of status + // int origin, // char* file, // unsigned int line, // char* func // ) MaybeWarningFn[AccessSizeIndex] = M.getOrInsertFunction( FunctionName, TLI.getAttrList(C, {0, 1}, /*Signed=*/false), - IRB.getVoidTy(), IRB.getIntNTy(AccessSize * 8), IntptrTy, + IRB.getVoidTy(), IRB.getIntNTy(AccessSize * 8), IRB.getInt32Ty(), IRB.getInt8PtrTy(kSpirOffloadConstantAS), IRB.getInt32Ty(), IRB.getInt8PtrTy(kSpirOffloadConstantAS)); } @@ -1567,7 +1568,11 @@ void MemorySanitizer::initializeCallbacks(Module &M, MsanGetShadowFn = M.getOrInsertFunction( "__msan_get_shadow", PointerType::get(*C, kSpirOffloadGlobalAS), IntptrTy, - IRB.getInt32Ty(), IRB.getInt8PtrTy(kSpirOffloadConstantAS)); + IRB.getInt32Ty()); + + MsanGetOriginFn = M.getOrInsertFunction( + "__msan_get_origin", PointerType::get(*C, kSpirOffloadGlobalAS), IntptrTy, + IRB.getInt32Ty()); if (CompileKernel) { createKernelApi(M, TLI); @@ -2180,37 +2185,7 @@ struct MemorySanitizerVisitor : public InstVisitor { } else { // SPIR or SPIR-V // Pass the pointer of shadow memory to the report function SmallVector Args = {ConvertedShadow2}; - - if (ClSpirOffloadDebug) { - // Attempt to get the shadow memory - if (auto *LoadShadow = dyn_cast(ConvertedShadow)) { - Args.emplace_back(IRB.CreatePointerCast( - LoadShadow->getPointerOperand(), MS.IntptrTy)); - } else if (auto *BinaryOp = - dyn_cast(ConvertedShadow)) { - Value *LastOperand = nullptr; - do { - LastOperand = BinaryOp->getOperand(0); - // TODO: assert second operand is 0 - BinaryOp = dyn_cast(LastOperand); - } while (BinaryOp && BinaryOp->getOpcode() == Instruction::Or); - - if (auto *LoadShadow = dyn_cast(LastOperand)) { - Args.emplace_back(IRB.CreatePointerCast( - LoadShadow->getPointerOperand(), MS.IntptrTy)); - } - } else if (auto *Trunc = dyn_cast(ConvertedShadow)) { - if (auto *LoadShadow = dyn_cast(Trunc->getOperand(0))) { - Args.emplace_back(IRB.CreatePointerCast( - LoadShadow->getPointerOperand(), MS.IntptrTy)); - } - } - } - - if (Args.size() == 1) { - Args.emplace_back(ConstantInt::get(MS.IntptrTy, 0)); - } - + Args.emplace_back(MS.TrackOrigins && Origin ? Origin : (Value *)IRB.getInt32(0)); appendDebugInfoToArgs(IRB, Args); CallBase *CB = IRB.CreateCall(Fn, Args); @@ -2526,29 +2501,10 @@ struct MemorySanitizerVisitor : public InstVisitor { Value *getShadowPtrOffset(Value *Addr, IRBuilder<> &IRB) { Type *IntptrTy = ptrToIntPtrType(Addr->getType()); Value *OffsetLong = IRB.CreatePointerCast(Addr, IntptrTy); - - if (!SpirOrSpirv) { - if (uint64_t AndMask = MS.MapParams->AndMask) - OffsetLong = - IRB.CreateAnd(OffsetLong, constToIntPtr(IntptrTy, ~AndMask)); - - if (uint64_t XorMask = MS.MapParams->XorMask) - OffsetLong = - IRB.CreateXor(OffsetLong, constToIntPtr(IntptrTy, XorMask)); - } else { // SPIR or SPIR-V - auto *ConstASPtrTy = - PointerType::get(Addr->getContext(), kSpirOffloadConstantAS); - auto *FuncNameGV = MS.Spirv.getOrCreateGlobalString( - "__msan_func", F.getName(), kSpirOffloadConstantAS); - - OffsetLong = IRB.CreateCall( - MS.MsanGetShadowFn, - {OffsetLong, IRB.getInt32(Addr->getType()->getPointerAddressSpace()), - ClSpirOffloadDebug - ? ConstantExpr::getPointerCast(FuncNameGV, ConstASPtrTy) - : ConstantPointerNull::get(ConstASPtrTy)}); - } - + if (uint64_t AndMask = MS.MapParams->AndMask) + OffsetLong = IRB.CreateAnd(OffsetLong, constToIntPtr(IntptrTy, ~AndMask)); + if (uint64_t XorMask = MS.MapParams->XorMask) + OffsetLong = IRB.CreateXor(OffsetLong, constToIntPtr(IntptrTy, XorMask)); return OffsetLong; } @@ -2563,6 +2519,20 @@ struct MemorySanitizerVisitor : public InstVisitor { std::pair getShadowOriginPtrUserspace(Value *Addr, IRBuilder<> &IRB, Type *ShadowTy, MaybeAlign Alignment) { + if (SpirOrSpirv) { + unsigned int AS = Addr->getType()->getPointerAddressSpace(); + Type *IntptrTy = ptrToIntPtrType(Addr->getType()); + Value *ShadowLong = IRB.CreatePointerCast(Addr, IntptrTy); + Value *ShadowPtr = + IRB.CreateCall(MS.MsanGetShadowFn, {ShadowLong, IRB.getInt32(AS)}); + Value *OriginPtr = nullptr; + if (MS.TrackOrigins) { + OriginPtr = + IRB.CreateCall(MS.MsanGetOriginFn, {ShadowLong, IRB.getInt32(AS)}); + } + return std::make_pair(ShadowPtr, OriginPtr); + } + VectorType *VectTy = dyn_cast(Addr->getType()); if (!VectTy) { assert(Addr->getType()->isPointerTy()); @@ -2577,9 +2547,7 @@ struct MemorySanitizerVisitor : public InstVisitor { IRB.CreateAdd(ShadowLong, constToIntPtr(IntptrTy, ShadowBase)); } Value *ShadowPtr = IRB.CreateIntToPtr( - ShadowLong, - getPtrToShadowPtrType(IntptrTy, ShadowTy, - SpirOrSpirv ? kSpirOffloadGlobalAS : 0)); + ShadowLong, getPtrToShadowPtrType(IntptrTy, ShadowTy)); Value *OriginPtr = nullptr; if (MS.TrackOrigins) { diff --git a/unified-runtime/source/loader/CMakeLists.txt b/unified-runtime/source/loader/CMakeLists.txt index 5b6d9dbfbadcf..437f1b6a761f5 100644 --- a/unified-runtime/source/loader/CMakeLists.txt +++ b/unified-runtime/source/loader/CMakeLists.txt @@ -174,6 +174,7 @@ if(UR_ENABLE_SANITIZER) ${CMAKE_CURRENT_SOURCE_DIR}/layers/sanitizer/sanitizer_common/linux/backtrace.cpp ${CMAKE_CURRENT_SOURCE_DIR}/layers/sanitizer/sanitizer_common/linux/sanitizer_utils.cpp ${CMAKE_CURRENT_SOURCE_DIR}/layers/sanitizer/sanitizer_common/sanitizer_allocator.hpp + ${CMAKE_CURRENT_SOURCE_DIR}/layers/sanitizer/sanitizer_common/sanitizer_allocator.cpp ${CMAKE_CURRENT_SOURCE_DIR}/layers/sanitizer/sanitizer_common/sanitizer_common.hpp ${CMAKE_CURRENT_SOURCE_DIR}/layers/sanitizer/sanitizer_common/sanitizer_libdevice.hpp ${CMAKE_CURRENT_SOURCE_DIR}/layers/sanitizer/sanitizer_common/sanitizer_stacktrace.cpp diff --git a/unified-runtime/source/loader/layers/sanitizer/msan/msan_shadow.cpp b/unified-runtime/source/loader/layers/sanitizer/msan/msan_shadow.cpp index d5a263de255d0..18c934c17d637 100644 --- a/unified-runtime/source/loader/layers/sanitizer/msan/msan_shadow.cpp +++ b/unified-runtime/source/loader/layers/sanitizer/msan/msan_shadow.cpp @@ -392,12 +392,12 @@ ur_result_t MsanShadowMemoryGPU::AllocPrivateShadow(ur_queue_handle_t Queue, } uptr MsanShadowMemoryPVC::MemToShadow(uptr Ptr) { - assert(MsanShadowMemoryPVC::IsDeviceUSM(Ptr) && "Ptr must be device USM"); - if (Ptr < ShadowBegin) { - return Ptr + (ShadowBegin - 0xff00'0000'0000'0000ULL); - } else { - return Ptr - (0xff00'ffff'ffff'ffffULL - ShadowEnd + 1); + if (MsanShadowMemoryPVC::IsDeviceUSM(Ptr)) { + return Ptr - 0x5000'0000'0000ULL; } + // host/shared USM + return (Ptr & 0xff'ffff'ffffULL) + ((Ptr & 0x8000'0000'0000ULL) >> 7) + + ShadowBegin; } uptr MsanShadowMemoryDG2::MemToShadow(uptr Ptr) { diff --git a/unified-runtime/source/loader/layers/sanitizer/msan/msan_shadow.hpp b/unified-runtime/source/loader/layers/sanitizer/msan/msan_shadow.hpp index c3aa931abbde1..701c3a878f725 100644 --- a/unified-runtime/source/loader/layers/sanitizer/msan/msan_shadow.hpp +++ b/unified-runtime/source/loader/layers/sanitizer/msan/msan_shadow.hpp @@ -158,17 +158,21 @@ struct MsanShadowMemoryGPU : public MsanShadowMemory { /// Shadow Memory layout of GPU PVC device /// /// USM Allocation Range (56 bits) -/// Host USM : 0x0000_0000_0000_0000 ~ 0x00ff_ffff_ffff_ffff -/// Shared USM : 0x0000_0000_0000_0000 ~ 0x0000_7fff_ffff_ffff +/// Host USM : 0x00ff_ff00_0000_0000 ~ 0x00ff_ffff_ffff_ffff +/// Shared USM : 0x0000_7f00_0000_0000 ~ 0x0000_7fff_ffff_ffff /// DeviceĀ USM : 0xff00_0000_0000_0000 ~ 0xff00_ffff_ffff_ffff /// /// Shadow Memory Mapping -/// We support device USM only, because it's hard to do shadow propagation on host/shared USM which can be accessed by host code. -/// MSan needs to reserve half of device USM as shadow memory and does 1:1 mapping. -/// 0xff00_0000_0000_0000 - MSAN_SHADOW_BASE : "app-1" -/// MSAN_SHADOW_BASE - MSAN_SHADOW_END1 : "shadow-1" (MSAN_SHADOW_END1 - MSAN_SHADOW_BASE == MSAN_SHADOW_BASE - 0xff00_0000_0000_0000) -/// MSAN_SHADOW_END1 - MSAN_SHADOW_END2 : "shadow-2" (MSAN_SHADOW_END2 - MSAN_SHADOW_END1 == 0xff01_0000_0000_0000 - MSAN_SHADOW_END2) -/// MSAN_SHADOW_END2 - 0xff01_0000_0000_0000 : "app-2" +/// 0xff00_0000_0000_0000 - MSAN_SHADOW_BASE : "invalid" +/// MSAN_SHADOW_BASE - MSAN_SHADOW_END1 : "shadow-1" (MSAN_SHADOW_END1 - MSAN_SHADOW_BASE = 0x0200_0000_0000_0000) +/// MSAN_SHADOW_END1 - MSAN_SHADOW_END2 : "origin-1" (MSAN_SHADOW_END1 - MSAN_SHADOW_END2 = 0x0200_0000_0000_0000) +/// (gap) +/// MSAN_SHADOW_END3 - MSAN_SHADOW_END4 : "origin-2" (MSAN_SHADOW_END4 - MSAN_SHADOW_END3 = 0x5000_0000_0000_0000) +/// MSAN_SHADOW_END4 - MSAN_SHADOW_END5 : "shadow-2" (MSAN_SHADOW_END5 - MSAN_SHADOW_END4 = 0x5000_0000_0000_0000) +/// MSAN_SHADOW_END5 - 0xff00_ffff_ffff_ffff : "app" (MSAN_SHADOW_END5 - MSAN_SHADOW_BASE = 0xB400_0000_0000_0000) +/// +/// here, "shadow-1" and "origin-1" is use for host/shared USM, "shadow-2" and "origin-2" is used for device USM, "app" is device USM. +/// the size of "app" is less than 0x5000_0000_0000_0000. We assume "invalid" is not usable for user application. // clang-format on struct MsanShadowMemoryPVC final : public MsanShadowMemoryGPU { MsanShadowMemoryPVC(ur_context_handle_t Context, ur_device_handle_t Device) @@ -178,7 +182,7 @@ struct MsanShadowMemoryPVC final : public MsanShadowMemoryGPU { uptr MemToShadow(uptr Ptr) override; - size_t GetShadowSize() override { return 0x8000'0000'0000ULL; } + size_t GetShadowSize() override { return 0xb400'0000'0000ULL; } uptr GetStartAddress() override { return 0x100'0000'0000'0000ULL; } }; diff --git a/unified-runtime/source/loader/layers/sanitizer/sanitizer_common/sanitizer_allocator.cpp b/unified-runtime/source/loader/layers/sanitizer/sanitizer_common/sanitizer_allocator.cpp new file mode 100644 index 0000000000000..7ff08a8f53e3c --- /dev/null +++ b/unified-runtime/source/loader/layers/sanitizer/sanitizer_common/sanitizer_allocator.cpp @@ -0,0 +1,69 @@ +/* + * + * Copyright (C) 2025 Intel Corporation + * + * Part of the Unified-Runtime Project, under the Apache License v2.0 with LLVM + * Exceptions. See LICENSE.TXT + * + * SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception + * + * @file sanitizer_allocator.cpp + * + */ + +#include "sanitizer_allocator.hpp" +#include "sanitizer_libdevice.hpp" +#include "sanitizer_utils.hpp" +#include "ur_sanitizer_layer.hpp" + +namespace ur_sanitizer_layer { + +namespace { +void validate(uptr Allocated, AllocType AllocType, DeviceType DeviceType) { + if (DeviceType == DeviceType::GPU_PVC) { + switch (AllocType) { + case AllocType::DEVICE_USM: + assert((Allocated >> 52) == 0xff0); + break; + case AllocType::HOST_USM: + assert((Allocated >> 40) == 0xffff); + break; + case AllocType::SHARED_USM: + assert((Allocated >> 40) == 0x7f); + break; + default: + return; + } + } +} +} // namespace + +void *Allocator::allocate(uptr Size, const ur_usm_desc_t *Properties, + AllocType Type) { + void *Allocated = nullptr; + ur_result_t Result; + ur_usm_pool_handle_t Pool = nullptr; + + if (Type == AllocType::DEVICE_USM) { + Result = getContext()->urDdiTable.USM.pfnDeviceAlloc( + Context, Device, Properties, Pool, Size, &Allocated); + } else if (Type == AllocType::HOST_USM) { + Result = getContext()->urDdiTable.USM.pfnHostAlloc(Context, Properties, + Pool, Size, &Allocated); + } else if (Type == AllocType::SHARED_USM) { + Result = getContext()->urDdiTable.USM.pfnSharedAlloc( + Context, Device, Properties, Pool, Size, &Allocated); + } else { + return nullptr; + } + + if (Result != UR_RESULT_SUCCESS) { + return nullptr; + } + + validate((uptr)Allocated, Type, GetDeviceType(Context, Device)); + + return Allocated; +} + +} // namespace ur_sanitizer_layer diff --git a/unified-runtime/source/loader/layers/sanitizer/sanitizer_common/sanitizer_allocator.hpp b/unified-runtime/source/loader/layers/sanitizer/sanitizer_common/sanitizer_allocator.hpp index 29d2ef8572874..09034788a1c36 100644 --- a/unified-runtime/source/loader/layers/sanitizer/sanitizer_common/sanitizer_allocator.hpp +++ b/unified-runtime/source/loader/layers/sanitizer/sanitizer_common/sanitizer_allocator.hpp @@ -13,6 +13,8 @@ #pragma once +#include "sanitizer_common.hpp" + namespace ur_sanitizer_layer { enum class AllocType { @@ -41,4 +43,15 @@ inline const char *ToString(AllocType Type) { } } +class Allocator { +public: + Allocator(); + + void *allocate(uptr Size, const ur_usm_desc_t *Properties, AllocType Type); + +private: + ur_context_handle_t Context; + ur_device_handle_t Device; +}; + } // namespace ur_sanitizer_layer From c36c53aef2edb4f74527827ee6dddb055f30460f Mon Sep 17 00:00:00 2001 From: "Zhao, Yang2" Date: Thu, 22 May 2025 10:16:31 +0200 Subject: [PATCH 02/26] wip --- sycl/test-e2e/MemorySanitizer/check_call.cpp | 7 ++----- 1 file changed, 2 insertions(+), 5 deletions(-) diff --git a/sycl/test-e2e/MemorySanitizer/check_call.cpp b/sycl/test-e2e/MemorySanitizer/check_call.cpp index aa2a608027876..d7d5aea09cd7d 100644 --- a/sycl/test-e2e/MemorySanitizer/check_call.cpp +++ b/sycl/test-e2e/MemorySanitizer/check_call.cpp @@ -9,17 +9,14 @@ #include #include -__attribute__((noinline)) long long foo(int data1, long long data2) { - return data1 + data2; -} +__attribute__((noinline)) int foo(int data1) { return data1; } int main() { sycl::queue Q; auto *array = sycl::malloc_device(2, Q); Q.submit([&](sycl::handler &h) { - h.single_task( - [=]() { array[0] = foo(array[0], array[1]); }); + h.single_task([=]() { array[0] = foo(array[1]); }); }); Q.wait(); // CHECK-NOT: [kernel] From 68cf62832d6217c1b686e6832c7c8c907ccd67df Mon Sep 17 00:00:00 2001 From: "Zhao, Yang2" Date: Wed, 28 May 2025 09:40:57 +0200 Subject: [PATCH 03/26] wip --- clang/lib/Driver/SanitizerArgs.cpp | 2 + libdevice/sanitizer/msan_rtl.cpp | 30 +- .../Instrumentation/MemorySanitizer.cpp | 9 +- unified-runtime/source/loader/CMakeLists.txt | 28 +- .../sanitizer/msan/msan_interceptor.cpp | 60 +++- .../layers/sanitizer/msan/msan_origin.cpp | 19 ++ .../layers/sanitizer/msan/msan_origin.hpp | 256 ++++++++++++++++++ .../layers/sanitizer/msan/msan_report.cpp | 25 +- .../layers/sanitizer/msan/msan_shadow.cpp | 108 ++++++-- .../layers/sanitizer/msan/msan_shadow.hpp | 41 ++- .../sanitizer_common/sanitizer_stackdepot.cpp | 49 ++++ .../sanitizer_common/sanitizer_stackdepot.hpp | 23 ++ 12 files changed, 566 insertions(+), 84 deletions(-) create mode 100644 unified-runtime/source/loader/layers/sanitizer/msan/msan_origin.cpp create mode 100644 unified-runtime/source/loader/layers/sanitizer/msan/msan_origin.hpp create mode 100644 unified-runtime/source/loader/layers/sanitizer/sanitizer_common/sanitizer_stackdepot.cpp create mode 100644 unified-runtime/source/loader/layers/sanitizer/sanitizer_common/sanitizer_stackdepot.hpp diff --git a/clang/lib/Driver/SanitizerArgs.cpp b/clang/lib/Driver/SanitizerArgs.cpp index af1fd8b14bdd0..aee3d1b326794 100644 --- a/clang/lib/Driver/SanitizerArgs.cpp +++ b/clang/lib/Driver/SanitizerArgs.cpp @@ -1282,6 +1282,8 @@ void SanitizerArgs::addArgs(const ToolChain &TC, const llvm::opt::ArgList &Args, CmdArgs.push_back("-mllvm"); CmdArgs.push_back("-msan-track-origins=1"); + CmdArgs.push_back("-mllvm"); + CmdArgs.push_back("-msan-print-stack-names=0"); } else if (Sanitizers.has(SanitizerKind::Thread)) { CmdArgs.push_back("-fsanitize=thread"); // The tsan function entry/exit builtins are used to record stack diff --git a/libdevice/sanitizer/msan_rtl.cpp b/libdevice/sanitizer/msan_rtl.cpp index 179969c38c5e0..db220f7177ee3 100644 --- a/libdevice/sanitizer/msan_rtl.cpp +++ b/libdevice/sanitizer/msan_rtl.cpp @@ -169,12 +169,13 @@ inline uptr __msan_get_shadow_pvc(uptr addr, uint32_t as) { } if (as == ADDRESS_SPACE_GLOBAL) { - if (addr >> 52 == 0xff0) { + if (addr >> 52 == 0xff0) { // device USM return addr - 0x5000'0000'0000ULL; } - // auto shadow_base = GetMsanLaunchInfo->GlobalShadowOffset; - // return (addr & 0xff'ffff'ffffULL) + ((addr & 0x8000'0000'0000ULL) >> 7) + - // shadow_base; + // host/shared USM + auto shadow_base = GetMsanLaunchInfo->GlobalShadowOffset; + return (addr & 0xff'ffff'ffffULL) + ((addr & 0x8000'0000'0000ULL) >> 7) + + shadow_base; return GetMsanLaunchInfo->CleanShadow; } else if (as == ADDRESS_SPACE_LOCAL) { const auto shadow_offset = GetMsanLaunchInfo->LocalShadowOffset; @@ -302,9 +303,14 @@ DEVICE_EXTERN_C_NOINLINE __SYCL_GLOBAL__ void *__msan_get_origin(uptr addr, return (__SYCL_GLOBAL__ void *)origin_ptr; if (as == ADDRESS_SPACE_GLOBAL) { - if (addr >> 52 == 0xff0) { - origin_ptr = addr - 0xa000'0000'0000ULL; + if (addr >> 52 == 0xff0) { // device USM + return (__SYCL_GLOBAL__ void *)(addr - 0xa000'0000'0000ULL); } + // host/shared USM + uptr shadow_base = GetMsanLaunchInfo->GlobalShadowOffset; + return (__SYCL_GLOBAL__ void *)((addr & 0xff'ffff'ffffULL) + + ((addr & 0x8000'0000'0000ULL) >> 7) + + shadow_base + 0x0200'0000'0000ULL); } return (__SYCL_GLOBAL__ void *)origin_ptr; @@ -609,4 +615,16 @@ __msan_set_private_base(__SYCL_PRIVATE__ void *ptr) { MSAN_DEBUG(__spirv_ocl_printf(__msan_print_private_base, sid, ptr)); } +DEVICE_EXTERN_C_NOINLINE void +__msan_set_alloca_origin_no_descr(void *a, uptr size, + __SYCL_GLOBAL__ u32 *id_ptr) { + // SetAllocaOrigin(a, size, id_ptr, nullptr, GET_CALLER_PC()); +} + +DEVICE_EXTERN_C_NOINLINE void +__msan_set_alloca_origin_with_descr(void *a, uptr size, + __SYCL_GLOBAL__ u32 *id_ptr, char *descr) { + // SetAllocaOrigin(a, size, id_ptr, descr, GET_CALLER_PC()); +} + #endif // __SPIR__ || __SPIRV__ diff --git a/llvm/lib/Transforms/Instrumentation/MemorySanitizer.cpp b/llvm/lib/Transforms/Instrumentation/MemorySanitizer.cpp index 1f3f85da2d299..70e695594c2f5 100644 --- a/llvm/lib/Transforms/Instrumentation/MemorySanitizer.cpp +++ b/llvm/lib/Transforms/Instrumentation/MemorySanitizer.cpp @@ -1508,9 +1508,10 @@ void MemorySanitizer::createUserspaceApi(Module &M, MsanSetAllocaOriginWithDescriptionFn = M.getOrInsertFunction("__msan_set_alloca_origin_with_descr", IRB.getVoidTy(), PtrTy, IntptrTy, PtrTy, PtrTy); - MsanSetAllocaOriginNoDescriptionFn = - M.getOrInsertFunction("__msan_set_alloca_origin_no_descr", - IRB.getVoidTy(), PtrTy, IntptrTy, PtrTy); + MsanSetAllocaOriginNoDescriptionFn = M.getOrInsertFunction( + "__msan_set_alloca_origin_no_descr", IRB.getVoidTy(), PtrTy, IntptrTy, + TargetTriple.isSPIROrSPIRV() ? PointerType::get(*C, kSpirOffloadGlobalAS) + : PtrTy); MsanPoisonStackFn = M.getOrInsertFunction("__msan_poison_stack", IRB.getVoidTy(), PtrTy, IntptrTy); } @@ -6472,7 +6473,7 @@ struct MemorySanitizerVisitor : public InstVisitor { Value *getLocalVarIdptr(AllocaInst &I) { ConstantInt *IntConst = - ConstantInt::get(Type::getInt32Ty((*F.getParent()).getContext()), 0); + ConstantInt::get(Type::getInt32Ty(I.getContext()), 0); return new GlobalVariable(*F.getParent(), IntConst->getType(), /*isConstant=*/false, GlobalValue::PrivateLinkage, IntConst); diff --git a/unified-runtime/source/loader/CMakeLists.txt b/unified-runtime/source/loader/CMakeLists.txt index 437f1b6a761f5..bcb0649025538 100644 --- a/unified-runtime/source/loader/CMakeLists.txt +++ b/unified-runtime/source/loader/CMakeLists.txt @@ -156,10 +156,26 @@ if(UR_ENABLE_SANITIZER) ${CMAKE_CURRENT_SOURCE_DIR}/layers/sanitizer/msan/msan_interceptor.cpp ${CMAKE_CURRENT_SOURCE_DIR}/layers/sanitizer/msan/msan_interceptor.hpp ${CMAKE_CURRENT_SOURCE_DIR}/layers/sanitizer/msan/msan_libdevice.hpp + ${CMAKE_CURRENT_SOURCE_DIR}/layers/sanitizer/msan/msan_origin.cpp + ${CMAKE_CURRENT_SOURCE_DIR}/layers/sanitizer/msan/msan_origin.hpp ${CMAKE_CURRENT_SOURCE_DIR}/layers/sanitizer/msan/msan_report.cpp ${CMAKE_CURRENT_SOURCE_DIR}/layers/sanitizer/msan/msan_report.hpp ${CMAKE_CURRENT_SOURCE_DIR}/layers/sanitizer/msan/msan_shadow.cpp ${CMAKE_CURRENT_SOURCE_DIR}/layers/sanitizer/msan/msan_shadow.hpp + ${CMAKE_CURRENT_SOURCE_DIR}/layers/sanitizer/sanitizer_common/linux/backtrace.cpp + ${CMAKE_CURRENT_SOURCE_DIR}/layers/sanitizer/sanitizer_common/linux/sanitizer_utils.cpp + ${CMAKE_CURRENT_SOURCE_DIR}/layers/sanitizer/sanitizer_common/sanitizer_allocator.cpp + ${CMAKE_CURRENT_SOURCE_DIR}/layers/sanitizer/sanitizer_common/sanitizer_allocator.hpp + ${CMAKE_CURRENT_SOURCE_DIR}/layers/sanitizer/sanitizer_common/sanitizer_common.hpp + ${CMAKE_CURRENT_SOURCE_DIR}/layers/sanitizer/sanitizer_common/sanitizer_libdevice.hpp + ${CMAKE_CURRENT_SOURCE_DIR}/layers/sanitizer/sanitizer_common/sanitizer_options.cpp + ${CMAKE_CURRENT_SOURCE_DIR}/layers/sanitizer/sanitizer_common/sanitizer_options.hpp + ${CMAKE_CURRENT_SOURCE_DIR}/layers/sanitizer/sanitizer_common/sanitizer_stackdepot.cpp + ${CMAKE_CURRENT_SOURCE_DIR}/layers/sanitizer/sanitizer_common/sanitizer_stackdepot.hpp + ${CMAKE_CURRENT_SOURCE_DIR}/layers/sanitizer/sanitizer_common/sanitizer_stacktrace.cpp + ${CMAKE_CURRENT_SOURCE_DIR}/layers/sanitizer/sanitizer_common/sanitizer_stacktrace.hpp + ${CMAKE_CURRENT_SOURCE_DIR}/layers/sanitizer/sanitizer_common/sanitizer_utils.cpp + ${CMAKE_CURRENT_SOURCE_DIR}/layers/sanitizer/sanitizer_common/sanitizer_utils.hpp ${CMAKE_CURRENT_SOURCE_DIR}/layers/sanitizer/tsan/tsan_buffer.cpp ${CMAKE_CURRENT_SOURCE_DIR}/layers/sanitizer/tsan/tsan_buffer.hpp ${CMAKE_CURRENT_SOURCE_DIR}/layers/sanitizer/tsan/tsan_ddi.cpp @@ -171,18 +187,6 @@ if(UR_ENABLE_SANITIZER) ${CMAKE_CURRENT_SOURCE_DIR}/layers/sanitizer/tsan/tsan_report.hpp ${CMAKE_CURRENT_SOURCE_DIR}/layers/sanitizer/tsan/tsan_shadow.cpp ${CMAKE_CURRENT_SOURCE_DIR}/layers/sanitizer/tsan/tsan_shadow.hpp - ${CMAKE_CURRENT_SOURCE_DIR}/layers/sanitizer/sanitizer_common/linux/backtrace.cpp - ${CMAKE_CURRENT_SOURCE_DIR}/layers/sanitizer/sanitizer_common/linux/sanitizer_utils.cpp - ${CMAKE_CURRENT_SOURCE_DIR}/layers/sanitizer/sanitizer_common/sanitizer_allocator.hpp - ${CMAKE_CURRENT_SOURCE_DIR}/layers/sanitizer/sanitizer_common/sanitizer_allocator.cpp - ${CMAKE_CURRENT_SOURCE_DIR}/layers/sanitizer/sanitizer_common/sanitizer_common.hpp - ${CMAKE_CURRENT_SOURCE_DIR}/layers/sanitizer/sanitizer_common/sanitizer_libdevice.hpp - ${CMAKE_CURRENT_SOURCE_DIR}/layers/sanitizer/sanitizer_common/sanitizer_stacktrace.cpp - ${CMAKE_CURRENT_SOURCE_DIR}/layers/sanitizer/sanitizer_common/sanitizer_stacktrace.hpp - ${CMAKE_CURRENT_SOURCE_DIR}/layers/sanitizer/sanitizer_common/sanitizer_utils.cpp - ${CMAKE_CURRENT_SOURCE_DIR}/layers/sanitizer/sanitizer_common/sanitizer_utils.hpp - ${CMAKE_CURRENT_SOURCE_DIR}/layers/sanitizer/sanitizer_common/sanitizer_options.cpp - ${CMAKE_CURRENT_SOURCE_DIR}/layers/sanitizer/sanitizer_common/sanitizer_options.hpp ${CMAKE_CURRENT_SOURCE_DIR}/layers/sanitizer/ur_sanddi.cpp ${CMAKE_CURRENT_SOURCE_DIR}/layers/sanitizer/ur_sanitizer_layer.cpp ${CMAKE_CURRENT_SOURCE_DIR}/layers/sanitizer/ur_sanitizer_layer.hpp diff --git a/unified-runtime/source/loader/layers/sanitizer/msan/msan_interceptor.cpp b/unified-runtime/source/loader/layers/sanitizer/msan/msan_interceptor.cpp index c4d5bee40342f..b5e5286028d82 100644 --- a/unified-runtime/source/loader/layers/sanitizer/msan/msan_interceptor.cpp +++ b/unified-runtime/source/loader/layers/sanitizer/msan/msan_interceptor.cpp @@ -14,6 +14,7 @@ #include "msan_interceptor.hpp" #include "msan_ddi.hpp" +#include "msan_origin.hpp" #include "msan_report.hpp" #include "msan_shadow.hpp" #include "sanitizer_common/sanitizer_stacktrace.hpp" @@ -53,17 +54,34 @@ ur_result_t MsanInterceptor::allocateMemory(ur_context_handle_t Context, std::shared_ptr DeviceInfo = Device ? getDeviceInfo(Device) : nullptr; + // Origin tracking needs alignment at leat is 4 + constexpr uint32_t MSAN_ORIGIN_TRACKING_GRANULARITY = 4; + + uint32_t Alignment = Properties ? Properties->align : 4; + // Alignment must be zero or a power-of-two + if (0 != (Alignment & (Alignment - 1))) { + return UR_RESULT_ERROR_INVALID_ARGUMENT; + } + if (Alignment < MSAN_ORIGIN_TRACKING_GRANULARITY) { + Alignment = MSAN_ORIGIN_TRACKING_GRANULARITY; + } + uptr RoundedSize = RoundUpTo(Size, Alignment); + void *Allocated = nullptr; if (Type == AllocType::DEVICE_USM) { UR_CALL(getContext()->urDdiTable.USM.pfnDeviceAlloc( - Context, Device, Properties, Pool, Size, &Allocated)); + Context, Device, Properties, Pool, RoundedSize, &Allocated)); } else if (Type == AllocType::HOST_USM) { UR_CALL(getContext()->urDdiTable.USM.pfnHostAlloc(Context, Properties, Pool, - Size, &Allocated)); + RoundedSize, &Allocated)); } else if (Type == AllocType::SHARED_USM) { UR_CALL(getContext()->urDdiTable.USM.pfnSharedAlloc( - Context, Device, Properties, Pool, Size, &Allocated)); + Context, Device, Properties, Pool, RoundedSize, &Allocated)); + } else { + UR_LOG_L(getContext()->logger, ERR, "Unsupported allocation type: {}", + ToString(Type)); + return UR_RESULT_ERROR_UNSUPPORTED_FEATURE; } *ResultPtr = Allocated; @@ -78,14 +96,9 @@ ur_result_t MsanInterceptor::allocateMemory(ur_context_handle_t Context, } assert(Device); - auto AI = std::make_shared(MsanAllocInfo{(uptr)Allocated, - Size, - false, - Context, - Device, - GetCurrentBacktrace(), - {}}); - + StackTrace Stack = GetCurrentBacktrace(); + auto AI = std::make_shared( + MsanAllocInfo{(uptr)Allocated, Size, false, Context, Device, Stack, {}}); AI->print(); // For memory release @@ -94,10 +107,27 @@ ur_result_t MsanInterceptor::allocateMemory(ur_context_handle_t Context, m_AllocationMap.emplace(AI->AllocBegin, AI); } + HeapType HeapType; + switch (Type) { + case AllocType::DEVICE_USM: + HeapType = HeapType::DeviceUSM; + break; + case AllocType::HOST_USM: + HeapType = HeapType::HostUSM; + break; + case AllocType::SHARED_USM: + HeapType = HeapType::SharedUSM; + break; + default: + assert(false); + } + + Origin HeapOrigin = Origin::CreateHeapOrigin(Stack, HeapType); + // Update shadow memory ManagedQueue Queue(Context, Device); - DeviceInfo->Shadow->EnqueuePoisonShadow(Queue, AI->AllocBegin, AI->AllocSize, - 0xff); + DeviceInfo->Shadow->EnqueuePoisonShadowWithOrigin( + Queue, AI->AllocBegin, AI->AllocSize, 0xff, HeapOrigin.raw_id()); return UR_RESULT_SUCCESS; } @@ -286,9 +316,9 @@ MsanInterceptor::registerDeviceGlobals(ur_program_handle_t Program) { // Only support device global USM if (DeviceInfo->Type == DeviceType::CPU || (DeviceInfo->Type == DeviceType::GPU_PVC && - MsanShadowMemoryPVC::IsDeviceUSM(GVInfo.Addr)) || + MsanShadowMemoryPVC::isDeviceUSM(GVInfo.Addr)) || (DeviceInfo->Type == DeviceType::GPU_DG2 && - MsanShadowMemoryDG2::IsDeviceUSM(GVInfo.Addr))) { + MsanShadowMemoryDG2::isDeviceUSM(GVInfo.Addr))) { UR_CALL(DeviceInfo->Shadow->EnqueuePoisonShadow(Queue, GVInfo.Addr, GVInfo.Size, 0)); ContextInfo->CleanShadowSize = diff --git a/unified-runtime/source/loader/layers/sanitizer/msan/msan_origin.cpp b/unified-runtime/source/loader/layers/sanitizer/msan/msan_origin.cpp new file mode 100644 index 0000000000000..ef0a0dfd1fff5 --- /dev/null +++ b/unified-runtime/source/loader/layers/sanitizer/msan/msan_origin.cpp @@ -0,0 +1,19 @@ +/* + * + * Copyright (C) 2025 Intel Corporation + * + * Part of the Unified-Runtime Project, under the Apache License v2.0 with LLVM + * Exceptions. See LICENSE.TXT + * + * SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception + * + * @file msan_origin.cpp + * + */ + +#include "msan_origin.hpp" +#include "ur_sanitizer_layer.hpp" + +namespace ur_sanitizer_layer { +namespace msan {} // namespace msan +} // namespace ur_sanitizer_layer diff --git a/unified-runtime/source/loader/layers/sanitizer/msan/msan_origin.hpp b/unified-runtime/source/loader/layers/sanitizer/msan/msan_origin.hpp new file mode 100644 index 0000000000000..2420cbff6ade7 --- /dev/null +++ b/unified-runtime/source/loader/layers/sanitizer/msan/msan_origin.hpp @@ -0,0 +1,256 @@ +/* + * + * Copyright (C) 2025 Intel Corporation + * + * Part of the Unified-Runtime Project, under the Apache License v2.0 with LLVM + * Exceptions. See LICENSE.TXT + * + * SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception + * + * @file msan_origin.hpp + * + */ + +#pragma once + +#include "sanitizer_common/sanitizer_stackdepot.hpp" +#include "sanitizer_common/sanitizer_stacktrace.hpp" + +#include +#include + +namespace ur_sanitizer_layer { +namespace msan { + +// Origin handling. +// +// 10xx xxxx xxxx xxxx device USM +// 110x xxxx xxxx xxxx host USM +// 1110 xxxx xxxx xxxx shared USM +// 1111 xxxx xxxx xxxx local memory +// 0000 xxxx xxxx xxxx private memory +// 0zzz xxxx xxxx xxxx chained +// +enum class HeapType { DeviceUSM, HostUSM, SharedUSM, Local }; + +inline const char *ToString(HeapType Type) { + switch (Type) { + case HeapType::DeviceUSM: + return "Device USM"; + case HeapType::HostUSM: + return "Host USM"; + case HeapType::SharedUSM: + return "Shared USM"; + case HeapType::Local: + return "Local Memory"; + default: + return "Unknown Heap Type"; + } +} + +class Origin { +public: + // static bool isValidId(uint32_t id) { return id != 0 && id != + // (uint32_t)-1; } + + uint32_t raw_id() const { return raw_id_; } + + bool isHeapOrigin() const { + return isDeviceUSMOrigin() || isHostUSMOrigin() || isSharedUSMOrigin() || + isLocalOrigin(); + } + + HeapType getHeapType() const { + if (isDeviceUSMOrigin()) + return HeapType::DeviceUSM; + if (isHostUSMOrigin()) + return HeapType::HostUSM; + if (isSharedUSMOrigin()) + return HeapType::SharedUSM; + if (isLocalOrigin()) + return HeapType::Local; + + assert(false && "Unknown heap type"); + return HeapType::DeviceUSM; // Default fallback, should never reach here + } + + uint32_t getHeapId() const { + switch (getHeapType()) { + case HeapType::DeviceUSM: + return getDeviceUSMId(); + case HeapType::HostUSM: + return getHostUSMId(); + case HeapType::SharedUSM: + return getSharedUSMId(); + case HeapType::Local: + return getLocalId(); + default: + assert(false && "Unknown heap type"); + return 0; + } + } + + bool isDeviceUSMOrigin() const { + // 10xx xxxx xxxx xxxx + return raw_id_ >> kDeviceUSMShift == kDeviceUSMBits; + } + bool isHostUSMOrigin() const { + // 110x xxxx xxxx xxxx + return raw_id_ >> kHostUSMShift == kHostUSMBits; + } + bool isSharedUSMOrigin() const { + // 1110 xxxx xxxx xxxx + return raw_id_ >> kSharedUSMShift == kSharedUSMBits; + } + + bool isLocalOrigin() const { + // 1111 xxxx xxxx xxxx + return raw_id_ >> kLocalShift == kLocalBits; + } + + bool isPrivateOrigin() const { + // 0000 xxxx xxxx xxxx + return (raw_id_ >> kDepthShift) == (1 << kDepthBits); + } + + bool isChainedOrigin() const { + // 0zzz xxxx xxxx xxxx, zzz != 000 + return (raw_id_ >> kDepthShift) > (1 << kDepthBits); + } + + uint32_t getDeviceUSMId() const { + assert(isDeviceUSMOrigin()); + return raw_id_ & kDeviceUSMIdMask; + } + + uint32_t getHostUSMId() const { + assert(isHostUSMOrigin()); + return raw_id_ & kHostUSMIdMask; + } + + uint32_t getSharedUSMId() const { + assert(isSharedUSMOrigin()); + return raw_id_ & kSharedUSMIdMask; + } + + uint32_t getLocalId() const { + assert(isLocalOrigin()); + return raw_id_ & kLocalIdMask; + } + + uint32_t getPrivateId() const { + assert(isPrivateOrigin()); + return raw_id_ & kChainedIdMask; + } + + uint32_t getChainedId() const { + assert(isChainedOrigin()); + return raw_id_ & kChainedIdMask; + } + + // Returns the next origin in the chain and the current stack trace. + // Origin getNextChainedOrigin(StackTrace *stack) const { + // assert(isChainedOrigin()); + // uint32_t prev_id; + // uint32_t StackId = ChainedOriginDepotGet(getChainedId(), &prev_id); + // if (stack) + // *stack = StackDepotGet(StackId); + // return Origin(prev_id); + // } + + // StackTrace getStackTraceForDeviceUSM() const { + // return StackDepotGet(getDeviceUSMId()); + // } + + // StackTrace getStackTraceForHostUSM() const { + // return StackDepotGet(getHostUSMId()); + // } + + // StackTrace getStackTraceForSharedUSM() const { + // return StackDepotGet(getSharedUSMId()); + // } + + // StackTrace getStackTraceForLocal() const { + // return StackDepotGet(getLocalId()); + // } + + // static Origin CreateStackOrigin(uint32_t id) { + // assert((id & kStackIdMask) == id); + // return Origin((1 << kHeapShift) | id); + // } + + StackTrace getHeapStackTrace() const { + assert(isHeapOrigin()); + uint32_t StackId = getHeapId(); + return StackDepotGet(StackId); + } + + static Origin CreateHeapOrigin(StackTrace &Stack, HeapType Type) { + uint32_t StackId = StackDepotPut(Stack); + assert(StackId); + switch (Type) { + case HeapType::DeviceUSM: + assert((StackId & kDeviceUSMIdMask) == StackId); + StackId = (kDeviceUSMBits << kDeviceUSMShift) | StackId; + break; + case HeapType::HostUSM: + assert((StackId & kHostUSMIdMask) == StackId); + StackId = (kHostUSMBits << kHostUSMShift) | StackId; + break; + case HeapType::SharedUSM: + assert((StackId & kSharedUSMIdMask) == StackId); + StackId = (kSharedUSMBits << kSharedUSMShift) | StackId; + break; + case HeapType::Local: + assert((StackId & kLocalIdMask) == StackId); + StackId = (kLocalBits << kLocalShift) | StackId; + break; + default: + assert(false && "Unknown heap type"); + return Origin(0); // Should never reach here + } + return Origin(StackId); + } + + static Origin FromRawId(uint32_t id) { return Origin(id); } + +private: + static const int kDeviceUSMBits = 2; + static const int kDeviceUSMShift = 32 - 2; + + static const int kHostUSMBits = 6; + static const int kHostUSMShift = 32 - 3; + + static const int kSharedUSMBits = 14; + static const int kSharedUSMShift = 32 - 4; + + static const int kLocalBits = 15; + static const int kLocalShift = 32 - 4; + + static const int kDepthBits = 3; + static const int kDepthShift = 32 - kDepthBits - 1; + + static const uint32_t kDeviceUSMIdMask = ((uint32_t)-1) >> + (32 - kDeviceUSMShift); + static const uint32_t kHostUSMIdMask = ((uint32_t)-1) >> (32 - kHostUSMShift); + static const uint32_t kSharedUSMIdMask = ((uint32_t)-1) >> + (32 - kSharedUSMShift); + static const uint32_t kLocalIdMask = ((uint32_t)-1) >> (32 - kLocalShift); + static const uint32_t kChainedIdMask = ((uint32_t)-1) >> (32 - kDepthShift); + static const uint32_t kStackIdMask = ((uint32_t)-1) >> (32 - kDepthShift); + + uint32_t raw_id_; + + explicit Origin(uint32_t raw_id) : raw_id_(raw_id) {} + + int depth() const { + assert(isChainedOrigin()); + return (raw_id_ >> kDepthShift) & ((1 << kDepthBits) - 1); + } + +public: + static const int kMaxDepth = (1 << kDepthBits) - 1; +}; + +} // namespace msan +} // namespace ur_sanitizer_layer diff --git a/unified-runtime/source/loader/layers/sanitizer/msan/msan_report.cpp b/unified-runtime/source/loader/layers/sanitizer/msan/msan_report.cpp index 19ceca7856a55..5be5aea4cb68e 100644 --- a/unified-runtime/source/loader/layers/sanitizer/msan/msan_report.cpp +++ b/unified-runtime/source/loader/layers/sanitizer/msan/msan_report.cpp @@ -13,6 +13,7 @@ #include "msan_report.hpp" #include "msan_libdevice.hpp" +#include "msan_origin.hpp" #include "sanitizer_common/sanitizer_common.hpp" #include "sanitizer_common/sanitizer_utils.hpp" @@ -30,15 +31,8 @@ void ReportUsesUninitializedValue(const MsanErrorReport &Report, // Try to demangle the kernel name KernelName = DemangleName(KernelName); - if (Report.Origin) { - UR_LOG_L( - getContext()->logger, QUIET, - "====WARNING: DeviceSanitizer: use-of-uninitialized-value (shadow: {})", - (void *)Report.Origin); - } else { - UR_LOG_L(getContext()->logger, QUIET, - "====WARNING: DeviceSanitizer: use-of-uninitialized-value"); - } + UR_LOG_L(getContext()->logger, QUIET, + "====WARNING: DeviceSanitizer: use-of-uninitialized-value"); UR_LOG_L(getContext()->logger, QUIET, "use of size {} at kernel <{}> LID({}, {}, {}) GID({}, " @@ -47,6 +41,19 @@ void ReportUsesUninitializedValue(const MsanErrorReport &Report, Report.LID2, Report.GID0, Report.GID1, Report.GID2); UR_LOG_L(getContext()->logger, QUIET, " #0 {} {}:{}", Func, File, Report.Line); + + if (!Report.Origin) { + return; + } + + Origin Origin = Origin::FromRawId(Report.Origin); + if (Origin.isHeapOrigin()) { + HeapType HeapType = Origin.getHeapType(); + StackTrace Stack = Origin.getHeapStackTrace(); + UR_LOG_L(getContext()->logger, QUIET, + "ORIGIN: {} allocation:", ToString(HeapType)); + Stack.print(); + } } } // namespace msan diff --git a/unified-runtime/source/loader/layers/sanitizer/msan/msan_shadow.cpp b/unified-runtime/source/loader/layers/sanitizer/msan/msan_shadow.cpp index 18c934c17d637..7ead3cb8c12cb 100644 --- a/unified-runtime/source/loader/layers/sanitizer/msan/msan_shadow.cpp +++ b/unified-runtime/source/loader/layers/sanitizer/msan/msan_shadow.cpp @@ -110,10 +110,21 @@ uptr MsanShadowMemoryCPU::MemToShadow(uptr Ptr) { return Ptr ^ CPU_SHADOW_MASK; } +uptr MsanShadowMemoryCPU::MemToOrigin(uptr Ptr) { + return Ptr ^ CPU_SHADOW_MASK; +} + ur_result_t MsanShadowMemoryCPU::EnqueuePoisonShadow( ur_queue_handle_t Queue, uptr Ptr, uptr Size, u8 Value, uint32_t NumEvents, const ur_event_handle_t *EventWaitList, ur_event_handle_t *OutEvent) { + return EnqueuePoisonShadowWithOrigin(Queue, Ptr, Size, Value, 0, NumEvents, + EventWaitList, OutEvent); +} +ur_result_t MsanShadowMemoryCPU::EnqueuePoisonShadowWithOrigin( + ur_queue_handle_t Queue, uptr Ptr, uptr Size, u8 Value, uint32_t Origin, + uint32_t NumEvents, const ur_event_handle_t *EventWaitList, + ur_event_handle_t *OutEvent) { if (Size) { const uptr ShadowBegin = MemToShadow(Ptr); const uptr ShadowEnd = MemToShadow(Ptr + Size - 1); @@ -183,20 +194,14 @@ ur_result_t MsanShadowMemoryGPU::Destory() { return Result; } -ur_result_t MsanShadowMemoryGPU::EnqueueMapShadow( - ur_queue_handle_t Queue, uptr Ptr, uptr Size, +ur_result_t MsanShadowMemoryGPU::EnqueueVirtualMemMap( + ur_queue_handle_t Queue, uptr VirtualBegin, uptr VirtualEnd, std::vector &EventWaitList, ur_event_handle_t *OutEvent) { - const size_t PageSize = GetVirtualMemGranularity(Context, Device); - - const uptr ShadowBegin = MemToShadow(Ptr); - const uptr ShadowEnd = MemToShadow(Ptr + Size - 1); - assert(ShadowBegin <= ShadowEnd); - // Make sure [Ptr, Ptr + Size] is mapped to physical memory - for (auto MappedPtr = RoundDownTo(ShadowBegin, PageSize); - MappedPtr <= ShadowEnd; MappedPtr += PageSize) { + for (auto MappedPtr = RoundDownTo(VirtualBegin, PageSize); + MappedPtr <= VirtualEnd; MappedPtr += PageSize) { std::scoped_lock Guard(VirtualMemMapsMutex); if (VirtualMemMaps.find(MappedPtr) == VirtualMemMaps.end()) { ur_physical_mem_handle_t PhysicalMem{}; @@ -236,10 +241,10 @@ ur_result_t MsanShadowMemoryGPU::EnqueueMapShadow( VirtualMemMaps[MappedPtr].first = PhysicalMem; } - auto AllocInfoItOp = getMsanInterceptor()->findAllocInfoByAddress(Ptr); - if (AllocInfoItOp) { - VirtualMemMaps[MappedPtr].second.insert((*AllocInfoItOp)->second); - } + // auto AllocInfoItOp = getMsanInterceptor()->findAllocInfoByAddress(Ptr); + // if (AllocInfoItOp) { + // VirtualMemMaps[MappedPtr].second.insert((*AllocInfoItOp)->second); + // } } return UR_RESULT_SUCCESS; @@ -248,6 +253,14 @@ ur_result_t MsanShadowMemoryGPU::EnqueueMapShadow( ur_result_t MsanShadowMemoryGPU::EnqueuePoisonShadow( ur_queue_handle_t Queue, uptr Ptr, uptr Size, u8 Value, uint32_t NumEvents, const ur_event_handle_t *EventWaitList, ur_event_handle_t *OutEvent) { + return EnqueuePoisonShadowWithOrigin(Queue, Ptr, Size, Value, 0, NumEvents, + EventWaitList, OutEvent); +} + +ur_result_t MsanShadowMemoryGPU::EnqueuePoisonShadowWithOrigin( + ur_queue_handle_t Queue, uptr Ptr, uptr Size, u8 Value, uint32_t Origin, + uint32_t NumEvents, const ur_event_handle_t *EventWaitList, + ur_event_handle_t *OutEvent) { if (Size == 0) { if (OutEvent) { UR_CALL(getContext()->urDdiTable.Enqueue.pfnEventsWait( @@ -258,22 +271,43 @@ ur_result_t MsanShadowMemoryGPU::EnqueuePoisonShadow( std::vector Events(EventWaitList, EventWaitList + NumEvents); - UR_CALL(EnqueueMapShadow(Queue, Ptr, Size, Events, OutEvent)); + { + uptr ShadowBegin = MemToShadow(Ptr); + uptr ShadowEnd = MemToShadow(Ptr + Size - 1); + assert(ShadowBegin <= ShadowEnd); - const uptr ShadowBegin = MemToShadow(Ptr); - const uptr ShadowEnd = MemToShadow(Ptr + Size - 1); - assert(ShadowBegin <= ShadowEnd); + UR_CALL( + EnqueueVirtualMemMap(Queue, ShadowBegin, ShadowEnd, Events, OutEvent)); - auto Result = EnqueueUSMBlockingSet(Queue, (void *)ShadowBegin, Value, - ShadowEnd - ShadowBegin + 1, - Events.size(), Events.data(), OutEvent); + UR_LOG_L(getContext()->logger, DEBUG, + "EnqueuePoisonShadow(addr={}, count={}, value={})", + (void *)ShadowBegin, ShadowEnd - ShadowBegin + 1, + (void *)(size_t)Value); - UR_LOG_L(getContext()->logger, DEBUG, - "EnqueuePoisonShadow(addr={}, count={}, value={}): {}", - (void *)ShadowBegin, ShadowEnd - ShadowBegin + 1, - (void *)(size_t)Value, Result); + UR_CALL(EnqueueUSMBlockingSet(Queue, (void *)ShadowBegin, Value, + ShadowEnd - ShadowBegin + 1, Events.size(), + Events.data(), OutEvent)); + } - return Result; + { + uptr OriginBegin = MemToOrigin(Ptr); + uptr OriginEnd = MemToOrigin(Ptr + Size - 1); + UR_CALL( + EnqueueVirtualMemMap(Queue, OriginBegin, OriginEnd, Events, OutEvent)); + + if (Origin != 0) { + UR_LOG_L(getContext()->logger, DEBUG, + "EnqueuePoisonOrigin(addr={}, count={}, value={})", + (void *)OriginBegin, OriginEnd - OriginBegin + 1, + (void *)(uptr)Origin); + + UR_CALL(getContext()->urDdiTable.Enqueue.pfnUSMFill( + Queue, (void *)OriginBegin, sizeof(Origin), &Origin, Size, NumEvents, + EventWaitList, OutEvent)); + } + } + + return UR_RESULT_SUCCESS; } ur_result_t @@ -392,7 +426,7 @@ ur_result_t MsanShadowMemoryGPU::AllocPrivateShadow(ur_queue_handle_t Queue, } uptr MsanShadowMemoryPVC::MemToShadow(uptr Ptr) { - if (MsanShadowMemoryPVC::IsDeviceUSM(Ptr)) { + if (MsanShadowMemoryPVC::isDeviceUSM(Ptr)) { return Ptr - 0x5000'0000'0000ULL; } // host/shared USM @@ -400,8 +434,26 @@ uptr MsanShadowMemoryPVC::MemToShadow(uptr Ptr) { ShadowBegin; } +uptr MsanShadowMemoryPVC::MemToOrigin(uptr Ptr) { + if (MsanShadowMemoryPVC::isDeviceUSM(Ptr)) { + return Ptr - 0xA000'0000'0000ULL; + } + // host/shared USM + return (Ptr & 0xff'ffff'ffffULL) + ((Ptr & 0x8000'0000'0000ULL) >> 7) + + ShadowBegin + 0x0200'0000'0000ULL; +} + uptr MsanShadowMemoryDG2::MemToShadow(uptr Ptr) { - assert(MsanShadowMemoryDG2::IsDeviceUSM(Ptr) && "Ptr must be device USM"); + assert(MsanShadowMemoryDG2::isDeviceUSM(Ptr) && "Ptr must be device USM"); + if (Ptr < ShadowBegin) { + return Ptr + (ShadowBegin - 0xffff'8000'0000'0000ULL); + } else { + return Ptr - (0xffff'ffff'ffff'ffffULL - ShadowEnd + 1); + } +} + +uptr MsanShadowMemoryDG2::MemToOrigin(uptr Ptr) { + assert(MsanShadowMemoryDG2::isDeviceUSM(Ptr) && "Ptr must be device USM"); if (Ptr < ShadowBegin) { return Ptr + (ShadowBegin - 0xffff'8000'0000'0000ULL); } else { diff --git a/unified-runtime/source/loader/layers/sanitizer/msan/msan_shadow.hpp b/unified-runtime/source/loader/layers/sanitizer/msan/msan_shadow.hpp index 701c3a878f725..0e7318b086561 100644 --- a/unified-runtime/source/loader/layers/sanitizer/msan/msan_shadow.hpp +++ b/unified-runtime/source/loader/layers/sanitizer/msan/msan_shadow.hpp @@ -32,6 +32,7 @@ struct MsanShadowMemory { virtual ur_result_t Destory() = 0; virtual uptr MemToShadow(uptr Ptr) = 0; + virtual uptr MemToOrigin(uptr Ptr) = 0; virtual ur_result_t EnqueuePoisonShadow(ur_queue_handle_t Queue, uptr Ptr, uptr Size, u8 Value, @@ -39,6 +40,11 @@ struct MsanShadowMemory { const ur_event_handle_t *EventWaitList = nullptr, ur_event_handle_t *OutEvent = nullptr) = 0; + virtual ur_result_t EnqueuePoisonShadowWithOrigin( + ur_queue_handle_t Queue, uptr Ptr, uptr Size, u8 Value, uint32_t Origin, + uint32_t NumEvents = 0, const ur_event_handle_t *EventWaitList = nullptr, + ur_event_handle_t *OutEvent = nullptr) = 0; + virtual ur_result_t ReleaseShadow(std::shared_ptr) { return UR_RESULT_SUCCESS; } @@ -87,6 +93,7 @@ struct MsanShadowMemoryCPU final : public MsanShadowMemory { ur_result_t Destory() override; uptr MemToShadow(uptr Ptr) override; + uptr MemToOrigin(uptr Ptr) override; ur_result_t EnqueuePoisonShadow(ur_queue_handle_t Queue, uptr Ptr, uptr Size, u8 Value, @@ -94,6 +101,11 @@ struct MsanShadowMemoryCPU final : public MsanShadowMemory { const ur_event_handle_t *EventWaitList = nullptr, ur_event_handle_t *OutEvent = nullptr) override; + ur_result_t EnqueuePoisonShadowWithOrigin( + ur_queue_handle_t Queue, uptr Ptr, uptr Size, u8 Value, uint32_t Origin, + uint32_t NumEvents = 0, const ur_event_handle_t *EventWaitList = nullptr, + ur_event_handle_t *OutEvent = nullptr) override; + ur_result_t AllocLocalShadow(ur_queue_handle_t, uint32_t, uptr &Begin, uptr &End) override { Begin = ShadowBegin; @@ -125,6 +137,11 @@ struct MsanShadowMemoryGPU : public MsanShadowMemory { const ur_event_handle_t *EventWaitList = nullptr, ur_event_handle_t *OutEvent = nullptr) override final; + ur_result_t EnqueuePoisonShadowWithOrigin( + ur_queue_handle_t Queue, uptr Ptr, uptr Size, u8 Value, uint32_t Origin, + uint32_t NumEvents = 0, const ur_event_handle_t *EventWaitList = nullptr, + ur_event_handle_t *OutEvent = nullptr) override; + ur_result_t ReleaseShadow(std::shared_ptr AI) override final; ur_result_t AllocLocalShadow(ur_queue_handle_t Queue, uint32_t NumWG, @@ -139,9 +156,11 @@ struct MsanShadowMemoryGPU : public MsanShadowMemory { virtual uptr GetStartAddress() { return 0; } private: - ur_result_t EnqueueMapShadow(ur_queue_handle_t Queue, uptr Ptr, uptr Size, - std::vector &EventWaitList, - ur_event_handle_t *OutEvent); + ur_result_t + EnqueueVirtualMemMap(ur_queue_handle_t Queue, uptr VirtualBegin, + uptr VirtualEnd, + std::vector &EventWaitList, + ur_event_handle_t *OutEvent); std::unordered_map< uptr, std::pair> 52 == 0xff0; } + static bool isDeviceUSM(uptr Ptr) { return Ptr >> 52 == 0xff0; } uptr MemToShadow(uptr Ptr) override; + uptr MemToOrigin(uptr Ptr) override; size_t GetShadowSize() override { return 0xb400'0000'0000ULL; } @@ -201,9 +221,10 @@ struct MsanShadowMemoryDG2 final : public MsanShadowMemoryGPU { MsanShadowMemoryDG2(ur_context_handle_t Context, ur_device_handle_t Device) : MsanShadowMemoryGPU(Context, Device) {} - static bool IsDeviceUSM(uptr Ptr) { return Ptr >> 48; } + static bool isDeviceUSM(uptr Ptr) { return Ptr >> 48; } uptr MemToShadow(uptr Ptr) override; + uptr MemToOrigin(uptr Ptr) override; size_t GetShadowSize() override { return 0x4000'0000'0000ULL; } }; diff --git a/unified-runtime/source/loader/layers/sanitizer/sanitizer_common/sanitizer_stackdepot.cpp b/unified-runtime/source/loader/layers/sanitizer/sanitizer_common/sanitizer_stackdepot.cpp new file mode 100644 index 0000000000000..2ecadbfca3554 --- /dev/null +++ b/unified-runtime/source/loader/layers/sanitizer/sanitizer_common/sanitizer_stackdepot.cpp @@ -0,0 +1,49 @@ +/* + * + * Copyright (C) 2025 Intel Corporation + * + * Part of the Unified-Runtime Project, under the Apache License v2.0 with LLVM + * Exceptions. See LICENSE.TXT + * + * SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception + * + * @file sanitizer_stackdepot.cpp + * + */ + +#pragma once + +#include "sanitizer_stackdepot.hpp" +#include +#include + +namespace ur_sanitizer_layer { + +class StackDepot { +public: + uint32_t Put(StackTrace Stack) { + uint32_t Id = _NextId.fetch_add(1); + _Depot[Id] = Stack; + return Id; + } + + StackTrace Get(uint32_t Id) { + auto It = _Depot.find(Id); + if (It != _Depot.end()) { + return It->second; + } + return StackTrace(); + } + +private: + std::atomic_uint32_t _NextId{1}; + std::unordered_map _Depot; +}; + +static StackDepot theDepot; + +uint32_t StackDepotPut(StackTrace &Stack) { return theDepot.Put(Stack); } + +StackTrace StackDepotGet(uint32_t Id) { return theDepot.Get(Id); } + +} // namespace ur_sanitizer_layer diff --git a/unified-runtime/source/loader/layers/sanitizer/sanitizer_common/sanitizer_stackdepot.hpp b/unified-runtime/source/loader/layers/sanitizer/sanitizer_common/sanitizer_stackdepot.hpp new file mode 100644 index 0000000000000..4e633d0c71d02 --- /dev/null +++ b/unified-runtime/source/loader/layers/sanitizer/sanitizer_common/sanitizer_stackdepot.hpp @@ -0,0 +1,23 @@ +/* + * + * Copyright (C) 2025 Intel Corporation + * + * Part of the Unified-Runtime Project, under the Apache License v2.0 with LLVM + * Exceptions. See LICENSE.TXT + * + * SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception + * + * @file sanitizer_stackdepot.hpp + * + */ + +#pragma once + +#include "sanitizer_stacktrace.hpp" + +namespace ur_sanitizer_layer { + +uint32_t StackDepotPut(StackTrace &Stack); +StackTrace StackDepotGet(uint32_t Id); + +} // namespace ur_sanitizer_layer From 28d287a8ba961bb183dd066e5d9f81f14d567f9e Mon Sep 17 00:00:00 2001 From: "Zhao, Yang2" Date: Tue, 3 Jun 2025 08:02:21 +0200 Subject: [PATCH 04/26] wip --- libdevice/sanitizer/msan_rtl.cpp | 72 ++++++++++++---- .../sanitizer/msan/msan_interceptor.cpp | 30 +++---- .../layers/sanitizer/msan/msan_origin.hpp | 61 ++----------- .../layers/sanitizer/msan/msan_report.cpp | 4 +- .../sanitizer_common/sanitizer_allocator.cpp | 86 ++++++++++--------- .../sanitizer_common/sanitizer_allocator.hpp | 16 ++-- .../sanitizer_common/sanitizer_stackdepot.cpp | 53 ++++++++++-- .../sanitizer_common/sanitizer_stackdepot.hpp | 21 ++++- 8 files changed, 193 insertions(+), 150 deletions(-) diff --git a/libdevice/sanitizer/msan_rtl.cpp b/libdevice/sanitizer/msan_rtl.cpp index db220f7177ee3..5ec674669425d 100644 --- a/libdevice/sanitizer/msan_rtl.cpp +++ b/libdevice/sanitizer/msan_rtl.cpp @@ -169,14 +169,14 @@ inline uptr __msan_get_shadow_pvc(uptr addr, uint32_t as) { } if (as == ADDRESS_SPACE_GLOBAL) { - if (addr >> 52 == 0xff0) { // device USM + // device USM + if (addr >> 52 == 0xff0) { return addr - 0x5000'0000'0000ULL; } // host/shared USM auto shadow_base = GetMsanLaunchInfo->GlobalShadowOffset; return (addr & 0xff'ffff'ffffULL) + ((addr & 0x8000'0000'0000ULL) >> 7) + shadow_base; - return GetMsanLaunchInfo->CleanShadow; } else if (as == ADDRESS_SPACE_LOCAL) { const auto shadow_offset = GetMsanLaunchInfo->LocalShadowOffset; if (shadow_offset != 0) { @@ -215,6 +215,32 @@ inline uptr __msan_get_shadow_pvc(uptr addr, uint32_t as) { return GetMsanLaunchInfo->CleanShadow; } +inline uptr __msan_get_origin_cpu(uptr addr) { + return addr ^ 0x500000000000ULL; +} + +inline uptr __msan_get_origin_dg2(uptr addr, uint32_t as) { return 0; } + +inline uptr __msan_get_origin_pvc(uptr addr, uint32_t as) { + if (as == ADDRESS_SPACE_GENERIC) { + ConvertGenericPointer(addr, as); + } + + if (as == ADDRESS_SPACE_GLOBAL) { + // device USM + if (addr >> 52 == 0xff0) { + return addr - 0xa000'0000'0000ULL; + } + // host/shared USM + uptr shadow_base = GetMsanLaunchInfo->GlobalShadowOffset; + return (addr & 0xff'ffff'ffffULL) + ((addr & 0x8000'0000'0000ULL) >> 7) + + shadow_base + 0x0200'0000'0000ULL; + } + + // Return clean shadow (0s) by default + return GetMsanLaunchInfo->CleanShadow; +} + inline void __msan_exit() { if (!GetMsanLaunchInfo->IsRecover) __devicelib_exit(); @@ -262,11 +288,11 @@ __msan_warning_noreturn(const char __SYCL_CONSTANT__ *file, uint32_t line, DEVICE_EXTERN_C_NOINLINE __SYCL_GLOBAL__ void * __msan_get_shadow(uptr addr, uint32_t as, const char __SYCL_CONSTANT__ *func = nullptr) { - // Return clean shadow (0s) by default - uptr shadow_ptr = GetMsanLaunchInfo->CleanShadow; - if (!GetMsanLaunchInfo) - return (__SYCL_GLOBAL__ void *)shadow_ptr; + return nullptr; + + // Return clean shadow (0s) by default + uptr shadow_ptr; #if defined(__LIBDEVICE_PVC__) shadow_ptr = __msan_get_shadow_pvc(addr, as); @@ -280,6 +306,7 @@ __msan_get_shadow(uptr addr, uint32_t as, } else if (GetMsanLaunchInfo->DeviceTy == DeviceType::GPU_DG2) { shadow_ptr = __msan_get_shadow_dg2(addr, as); } else { + shadow_ptr = GetMsanLaunchInfo->CleanShadow; MSAN_DEBUG(__spirv_ocl_printf(__msan_print_unsupport_device_type, GetMsanLaunchInfo->DeviceTy)); } @@ -296,22 +323,29 @@ __msan_get_shadow(uptr addr, uint32_t as, // "unified-runtime/source/loader/layers/sanitizer/msan/msan_shadow.hpp" DEVICE_EXTERN_C_NOINLINE __SYCL_GLOBAL__ void *__msan_get_origin(uptr addr, uint32_t as) { - // Return clean shadow (0s) by default - uptr origin_ptr = GetMsanLaunchInfo->CleanShadow; - if (!GetMsanLaunchInfo) - return (__SYCL_GLOBAL__ void *)origin_ptr; + return nullptr; - if (as == ADDRESS_SPACE_GLOBAL) { - if (addr >> 52 == 0xff0) { // device USM - return (__SYCL_GLOBAL__ void *)(addr - 0xa000'0000'0000ULL); - } - // host/shared USM - uptr shadow_base = GetMsanLaunchInfo->GlobalShadowOffset; - return (__SYCL_GLOBAL__ void *)((addr & 0xff'ffff'ffffULL) + - ((addr & 0x8000'0000'0000ULL) >> 7) + - shadow_base + 0x0200'0000'0000ULL); + // Return clean shadow (0s) by default + uptr origin_ptr; + +#if defined(__LIBDEVICE_PVC__) + origin_ptr = __msan_get_origin_pvc(addr, as); +#elif defined(__LIBDEVICE_CPU__) + origin_ptr = __msan_get_origin_cpu(addr); +#else + if (LIKELY(GetMsanLaunchInfo->DeviceTy == DeviceType::CPU)) { + origin_ptr = __msan_get_origin_cpu(addr); + } else if (GetMsanLaunchInfo->DeviceTy == DeviceType::GPU_PVC) { + origin_ptr = __msan_get_origin_pvc(addr, as); + } else if (GetMsanLaunchInfo->DeviceTy == DeviceType::GPU_DG2) { + origin_ptr = __msan_get_origin_dg2(addr, as); + } else { + origin_ptr = GetMsanLaunchInfo->CleanShadow; + MSAN_DEBUG(__spirv_ocl_printf(__msan_print_unsupport_device_type, + GetMsanLaunchInfo->DeviceTy)); } +#endif return (__SYCL_GLOBAL__ void *)origin_ptr; } diff --git a/unified-runtime/source/loader/layers/sanitizer/msan/msan_interceptor.cpp b/unified-runtime/source/loader/layers/sanitizer/msan/msan_interceptor.cpp index b5e5286028d82..26bd117983dc2 100644 --- a/unified-runtime/source/loader/layers/sanitizer/msan/msan_interceptor.cpp +++ b/unified-runtime/source/loader/layers/sanitizer/msan/msan_interceptor.cpp @@ -65,25 +65,20 @@ ur_result_t MsanInterceptor::allocateMemory(ur_context_handle_t Context, if (Alignment < MSAN_ORIGIN_TRACKING_GRANULARITY) { Alignment = MSAN_ORIGIN_TRACKING_GRANULARITY; } - uptr RoundedSize = RoundUpTo(Size, Alignment); - void *Allocated = nullptr; - - if (Type == AllocType::DEVICE_USM) { - UR_CALL(getContext()->urDdiTable.USM.pfnDeviceAlloc( - Context, Device, Properties, Pool, RoundedSize, &Allocated)); - } else if (Type == AllocType::HOST_USM) { - UR_CALL(getContext()->urDdiTable.USM.pfnHostAlloc(Context, Properties, Pool, - RoundedSize, &Allocated)); - } else if (Type == AllocType::SHARED_USM) { - UR_CALL(getContext()->urDdiTable.USM.pfnSharedAlloc( - Context, Device, Properties, Pool, RoundedSize, &Allocated)); + ur_usm_desc_t NewProperties; + if (Properties) { + NewProperties = *Properties; + NewProperties.align = Alignment; } else { - UR_LOG_L(getContext()->logger, ERR, "Unsupported allocation type: {}", - ToString(Type)); - return UR_RESULT_ERROR_UNSUPPORTED_FEATURE; + NewProperties = {UR_STRUCTURE_TYPE_USM_DESC, nullptr, + UR_USM_ADVICE_FLAG_DEFAULT, Alignment}; } + void *Allocated = nullptr; + UR_CALL( + SafeAllocate(Context, Device, Size, Properties, Pool, Type, &Allocated)); + *ResultPtr = Allocated; if (Type != AllocType::DEVICE_USM) { @@ -107,6 +102,7 @@ ur_result_t MsanInterceptor::allocateMemory(ur_context_handle_t Context, m_AllocationMap.emplace(AI->AllocBegin, AI); } + // For origin tracking HeapType HeapType; switch (Type) { case AllocType::DEVICE_USM: @@ -119,7 +115,7 @@ ur_result_t MsanInterceptor::allocateMemory(ur_context_handle_t Context, HeapType = HeapType::SharedUSM; break; default: - assert(false); + assert(false && "Unknown heap type"); } Origin HeapOrigin = Origin::CreateHeapOrigin(Stack, HeapType); @@ -127,7 +123,7 @@ ur_result_t MsanInterceptor::allocateMemory(ur_context_handle_t Context, // Update shadow memory ManagedQueue Queue(Context, Device); DeviceInfo->Shadow->EnqueuePoisonShadowWithOrigin( - Queue, AI->AllocBegin, AI->AllocSize, 0xff, HeapOrigin.raw_id()); + Queue, AI->AllocBegin, AI->AllocSize, 0xff, HeapOrigin.rawId()); return UR_RESULT_SUCCESS; } diff --git a/unified-runtime/source/loader/layers/sanitizer/msan/msan_origin.hpp b/unified-runtime/source/loader/layers/sanitizer/msan/msan_origin.hpp index 2420cbff6ade7..1d5593200f624 100644 --- a/unified-runtime/source/loader/layers/sanitizer/msan/msan_origin.hpp +++ b/unified-runtime/source/loader/layers/sanitizer/msan/msan_origin.hpp @@ -31,29 +31,10 @@ namespace msan { // 0000 xxxx xxxx xxxx private memory // 0zzz xxxx xxxx xxxx chained // -enum class HeapType { DeviceUSM, HostUSM, SharedUSM, Local }; - -inline const char *ToString(HeapType Type) { - switch (Type) { - case HeapType::DeviceUSM: - return "Device USM"; - case HeapType::HostUSM: - return "Host USM"; - case HeapType::SharedUSM: - return "Shared USM"; - case HeapType::Local: - return "Local Memory"; - default: - return "Unknown Heap Type"; - } -} class Origin { public: - // static bool isValidId(uint32_t id) { return id != 0 && id != - // (uint32_t)-1; } - - uint32_t raw_id() const { return raw_id_; } + uint32_t rawId() const { return raw_id_; } bool isHeapOrigin() const { return isDeviceUSMOrigin() || isHostUSMOrigin() || isSharedUSMOrigin() || @@ -148,46 +129,15 @@ class Origin { return raw_id_ & kChainedIdMask; } - // Returns the next origin in the chain and the current stack trace. - // Origin getNextChainedOrigin(StackTrace *stack) const { - // assert(isChainedOrigin()); - // uint32_t prev_id; - // uint32_t StackId = ChainedOriginDepotGet(getChainedId(), &prev_id); - // if (stack) - // *stack = StackDepotGet(StackId); - // return Origin(prev_id); - // } - - // StackTrace getStackTraceForDeviceUSM() const { - // return StackDepotGet(getDeviceUSMId()); - // } - - // StackTrace getStackTraceForHostUSM() const { - // return StackDepotGet(getHostUSMId()); - // } - - // StackTrace getStackTraceForSharedUSM() const { - // return StackDepotGet(getSharedUSMId()); - // } - - // StackTrace getStackTraceForLocal() const { - // return StackDepotGet(getLocalId()); - // } - - // static Origin CreateStackOrigin(uint32_t id) { - // assert((id & kStackIdMask) == id); - // return Origin((1 << kHeapShift) | id); - // } - StackTrace getHeapStackTrace() const { assert(isHeapOrigin()); - uint32_t StackId = getHeapId(); - return StackDepotGet(StackId); + return StackDepotGet(getHeapId(), getHeapType()); } static Origin CreateHeapOrigin(StackTrace &Stack, HeapType Type) { - uint32_t StackId = StackDepotPut(Stack); + uint32_t StackId = StackDepotPut(Stack, Type); assert(StackId); + switch (Type) { case HeapType::DeviceUSM: assert((StackId & kDeviceUSMIdMask) == StackId); @@ -207,8 +157,9 @@ class Origin { break; default: assert(false && "Unknown heap type"); - return Origin(0); // Should never reach here + StackId = 0; } + return Origin(StackId); } diff --git a/unified-runtime/source/loader/layers/sanitizer/msan/msan_report.cpp b/unified-runtime/source/loader/layers/sanitizer/msan/msan_report.cpp index 5be5aea4cb68e..ef2481659aac7 100644 --- a/unified-runtime/source/loader/layers/sanitizer/msan/msan_report.cpp +++ b/unified-runtime/source/loader/layers/sanitizer/msan/msan_report.cpp @@ -48,10 +48,10 @@ void ReportUsesUninitializedValue(const MsanErrorReport &Report, Origin Origin = Origin::FromRawId(Report.Origin); if (Origin.isHeapOrigin()) { - HeapType HeapType = Origin.getHeapType(); + HeapType Type = Origin.getHeapType(); StackTrace Stack = Origin.getHeapStackTrace(); UR_LOG_L(getContext()->logger, QUIET, - "ORIGIN: {} allocation:", ToString(HeapType)); + "ORIGIN: {} allocation:", ToString(Type)); Stack.print(); } } diff --git a/unified-runtime/source/loader/layers/sanitizer/sanitizer_common/sanitizer_allocator.cpp b/unified-runtime/source/loader/layers/sanitizer/sanitizer_common/sanitizer_allocator.cpp index 7ff08a8f53e3c..c476f91960323 100644 --- a/unified-runtime/source/loader/layers/sanitizer/sanitizer_common/sanitizer_allocator.cpp +++ b/unified-runtime/source/loader/layers/sanitizer/sanitizer_common/sanitizer_allocator.cpp @@ -19,51 +19,59 @@ namespace ur_sanitizer_layer { namespace { -void validate(uptr Allocated, AllocType AllocType, DeviceType DeviceType) { - if (DeviceType == DeviceType::GPU_PVC) { - switch (AllocType) { - case AllocType::DEVICE_USM: - assert((Allocated >> 52) == 0xff0); - break; - case AllocType::HOST_USM: - assert((Allocated >> 40) == 0xffff); - break; - case AllocType::SHARED_USM: - assert((Allocated >> 40) == 0x7f); - break; - default: - return; - } +void validateDeviceUSM(uptr Allocated, DeviceType DeviceType) { + switch (DeviceType) { + case DeviceType::GPU_PVC: { + assert((Allocated >> 52) == 0xff0); + break; + default: + break; + } } } -} // namespace -void *Allocator::allocate(uptr Size, const ur_usm_desc_t *Properties, - AllocType Type) { - void *Allocated = nullptr; - ur_result_t Result; - ur_usm_pool_handle_t Pool = nullptr; - - if (Type == AllocType::DEVICE_USM) { - Result = getContext()->urDdiTable.USM.pfnDeviceAlloc( - Context, Device, Properties, Pool, Size, &Allocated); - } else if (Type == AllocType::HOST_USM) { - Result = getContext()->urDdiTable.USM.pfnHostAlloc(Context, Properties, - Pool, Size, &Allocated); - } else if (Type == AllocType::SHARED_USM) { - Result = getContext()->urDdiTable.USM.pfnSharedAlloc( - Context, Device, Properties, Pool, Size, &Allocated); - } else { - return nullptr; +void validateSharedUSM(uptr Allocated, DeviceType DeviceType) { + switch (DeviceType) { + case DeviceType::GPU_PVC: { + assert((Allocated >> 40) == 0x7f); + break; + default: + break; } - - if (Result != UR_RESULT_SUCCESS) { - return nullptr; } +} +} // namespace - validate((uptr)Allocated, Type, GetDeviceType(Context, Device)); - - return Allocated; +ur_result_t SafeAllocate(ur_context_handle_t Context, ur_device_handle_t Device, + uptr Size, const ur_usm_desc_t *Properties, + ur_usm_pool_handle_t Pool, AllocType Type, + void **Allocated) { + DeviceType DevieType = + Device ? GetDeviceType(Context, Device) : DeviceType::UNKNOWN; + switch (Type) { + case AllocType::DEVICE_USM: + case AllocType::MEM_BUFFER: + UR_CALL(getContext()->urDdiTable.USM.pfnDeviceAlloc( + Context, Device, Properties, Pool, Size, Allocated)); + validateDeviceUSM((uptr)*Allocated, DevieType); + break; + case AllocType::HOST_USM: + UR_CALL(getContext()->urDdiTable.USM.pfnHostAlloc(Context, Properties, Pool, + Size, Allocated)); + // FIXME: it's hard to validate host USM pointer because we don't have + // device information here + break; + case AllocType::SHARED_USM: + UR_CALL(getContext()->urDdiTable.USM.pfnSharedAlloc( + Context, Device, Properties, Pool, Size, Allocated)); + validateSharedUSM((uptr)*Allocated, DevieType); + break; + default: + UR_LOG_L(getContext()->logger, ERR, "Unsupport memory type: {}", + ToString(Type)); + return UR_RESULT_ERROR_INVALID_ARGUMENT; + } + return UR_RESULT_SUCCESS; } } // namespace ur_sanitizer_layer diff --git a/unified-runtime/source/loader/layers/sanitizer/sanitizer_common/sanitizer_allocator.hpp b/unified-runtime/source/loader/layers/sanitizer/sanitizer_common/sanitizer_allocator.hpp index 09034788a1c36..722df27a9bc63 100644 --- a/unified-runtime/source/loader/layers/sanitizer/sanitizer_common/sanitizer_allocator.hpp +++ b/unified-runtime/source/loader/layers/sanitizer/sanitizer_common/sanitizer_allocator.hpp @@ -43,15 +43,11 @@ inline const char *ToString(AllocType Type) { } } -class Allocator { -public: - Allocator(); - - void *allocate(uptr Size, const ur_usm_desc_t *Properties, AllocType Type); - -private: - ur_context_handle_t Context; - ur_device_handle_t Device; -}; +// Allocating USM with validation, so that we can ensure the allocated addresses +// satisfy the assumption we made for shadow memory +ur_result_t SafeAllocate(ur_context_handle_t Context, ur_device_handle_t Device, + uptr Size, const ur_usm_desc_t *Properties, + ur_usm_pool_handle_t Pool, AllocType Type, + void **Allocated); } // namespace ur_sanitizer_layer diff --git a/unified-runtime/source/loader/layers/sanitizer/sanitizer_common/sanitizer_stackdepot.cpp b/unified-runtime/source/loader/layers/sanitizer/sanitizer_common/sanitizer_stackdepot.cpp index 2ecadbfca3554..3e5bed857e476 100644 --- a/unified-runtime/source/loader/layers/sanitizer/sanitizer_common/sanitizer_stackdepot.cpp +++ b/unified-runtime/source/loader/layers/sanitizer/sanitizer_common/sanitizer_stackdepot.cpp @@ -11,9 +11,8 @@ * */ -#pragma once - #include "sanitizer_stackdepot.hpp" + #include #include @@ -40,10 +39,52 @@ class StackDepot { std::unordered_map _Depot; }; -static StackDepot theDepot; - -uint32_t StackDepotPut(StackTrace &Stack) { return theDepot.Put(Stack); } +uint32_t StackDepotPut(StackTrace &Stack, HeapType Type) { + switch (Type) { + case HeapType::DeviceUSM: { + static StackDepot TheDepot; + return TheDepot.Put(Stack); + } + case HeapType::HostUSM: { + static StackDepot TheDepot; + return TheDepot.Put(Stack); + } + case HeapType::SharedUSM: { + static StackDepot TheDepot; + return TheDepot.Put(Stack); + } + case HeapType::Local: { + static StackDepot TheDepot; + return TheDepot.Put(Stack); + } + default: + assert(false && "Unknown heap type"); + return 0; + } +} -StackTrace StackDepotGet(uint32_t Id) { return theDepot.Get(Id); } +StackTrace StackDepotGet(uint32_t Id, HeapType Type) { + switch (Type) { + case HeapType::DeviceUSM: { + static StackDepot TheDepot; + return TheDepot.Get(Id); + } + case HeapType::HostUSM: { + static StackDepot TheDepot; + return TheDepot.Get(Id); + } + case HeapType::SharedUSM: { + static StackDepot TheDepot; + return TheDepot.Get(Id); + } + case HeapType::Local: { + static StackDepot TheDepot; + return TheDepot.Get(Id); + } + default: + assert(false && "Unknown heap type"); + return StackTrace(); + } +} } // namespace ur_sanitizer_layer diff --git a/unified-runtime/source/loader/layers/sanitizer/sanitizer_common/sanitizer_stackdepot.hpp b/unified-runtime/source/loader/layers/sanitizer/sanitizer_common/sanitizer_stackdepot.hpp index 4e633d0c71d02..4b452f737ccb1 100644 --- a/unified-runtime/source/loader/layers/sanitizer/sanitizer_common/sanitizer_stackdepot.hpp +++ b/unified-runtime/source/loader/layers/sanitizer/sanitizer_common/sanitizer_stackdepot.hpp @@ -17,7 +17,24 @@ namespace ur_sanitizer_layer { -uint32_t StackDepotPut(StackTrace &Stack); -StackTrace StackDepotGet(uint32_t Id); +enum class HeapType { DeviceUSM, HostUSM, SharedUSM, Local }; + +inline const char *ToString(HeapType Type) { + switch (Type) { + case HeapType::DeviceUSM: + return "Device USM"; + case HeapType::HostUSM: + return "Host USM"; + case HeapType::SharedUSM: + return "Shared USM"; + case HeapType::Local: + return "Local Memory"; + default: + return "Unknown Heap Type"; + } +} + +uint32_t StackDepotPut(StackTrace &Stack, HeapType Type); +StackTrace StackDepotGet(uint32_t Id, HeapType Type); } // namespace ur_sanitizer_layer From 0210b513b44820cda88b5ba37b3225e467683042 Mon Sep 17 00:00:00 2001 From: "Zhao, Yang2" Date: Wed, 4 Jun 2025 09:54:26 +0200 Subject: [PATCH 05/26] wip --- .../Instrumentation/MemorySanitizer.cpp | 20 +++-- .../layers/sanitizer/msan/msan_buffer.cpp | 20 +---- .../loader/layers/sanitizer/msan/msan_ddi.cpp | 82 ++++-------------- .../sanitizer/msan/msan_interceptor.cpp | 86 +++++-------------- .../sanitizer/msan/msan_interceptor.hpp | 39 +++------ .../layers/sanitizer/msan/msan_libdevice.hpp | 2 +- .../layers/sanitizer/msan/msan_origin.hpp | 8 +- .../layers/sanitizer/msan/msan_report.cpp | 4 +- .../sanitizer_common/sanitizer_stackdepot.cpp | 57 ++---------- .../sanitizer_common/sanitizer_stackdepot.hpp | 7 +- .../sanitizer_common/sanitizer_stacktrace.cpp | 1 + 11 files changed, 88 insertions(+), 238 deletions(-) diff --git a/llvm/lib/Transforms/Instrumentation/MemorySanitizer.cpp b/llvm/lib/Transforms/Instrumentation/MemorySanitizer.cpp index 68362a26ab166..8013aa27a4403 100644 --- a/llvm/lib/Transforms/Instrumentation/MemorySanitizer.cpp +++ b/llvm/lib/Transforms/Instrumentation/MemorySanitizer.cpp @@ -1498,6 +1498,12 @@ void MemorySanitizer::createUserspaceApi(Module &M, MaybeWarningFn[AccessSizeIndex] = M.getOrInsertFunction( FunctionName, TLI.getAttrList(C, {0, 1}, /*Signed=*/false), IRB.getVoidTy(), IRB.getIntNTy(AccessSize * 8), IRB.getInt32Ty()); + + FunctionName = "__msan_maybe_store_origin_" + itostr(AccessSize); + MaybeStoreOriginFn[AccessSizeIndex] = M.getOrInsertFunction( + FunctionName, TLI.getAttrList(C, {0, 2}, /*Signed=*/false), + IRB.getVoidTy(), IRB.getIntNTy(AccessSize * 8), PtrTy, + IRB.getInt32Ty()); } else { // SPIR or SPIR-V // __msan_maybe_warning_N( // intN_t status, @@ -1511,13 +1517,13 @@ void MemorySanitizer::createUserspaceApi(Module &M, IRB.getVoidTy(), IRB.getIntNTy(AccessSize * 8), IRB.getInt32Ty(), IRB.getInt8PtrTy(kSpirOffloadConstantAS), IRB.getInt32Ty(), IRB.getInt8PtrTy(kSpirOffloadConstantAS)); - } - FunctionName = "__msan_maybe_store_origin_" + itostr(AccessSize); - MaybeStoreOriginFn[AccessSizeIndex] = M.getOrInsertFunction( - FunctionName, TLI.getAttrList(C, {0, 2}, /*Signed=*/false), - IRB.getVoidTy(), IRB.getIntNTy(AccessSize * 8), PtrTy, - IRB.getInt32Ty()); + FunctionName = "__msan_maybe_store_origin_" + itostr(AccessSize); + MaybeStoreOriginFn[AccessSizeIndex] = M.getOrInsertFunction( + FunctionName, TLI.getAttrList(C, {0, 2}, /*Signed=*/false), + IRB.getVoidTy(), IRB.getIntNTy(AccessSize * 8), IntptrTy, + IRB.getInt32Ty()); + } } MsanSetAllocaOriginWithDescriptionFn = @@ -2112,6 +2118,8 @@ struct MemorySanitizerVisitor : public InstVisitor { FunctionCallee Fn = MS.MaybeStoreOriginFn[SizeIndex]; Value *ConvertedShadow2 = IRB.CreateZExt(ConvertedShadow, IRB.getIntNTy(8 * (1 << SizeIndex))); + if (SpirOrSpirv) + Addr = IRB.CreatePtrToInt(Addr, IRB.getIntPtrTy(DL)); CallBase *CB = IRB.CreateCall(Fn, {ConvertedShadow2, Addr, Origin}); CB->addParamAttr(0, Attribute::ZExt); CB->addParamAttr(2, Attribute::ZExt); diff --git a/unified-runtime/source/loader/layers/sanitizer/msan/msan_buffer.cpp b/unified-runtime/source/loader/layers/sanitizer/msan/msan_buffer.cpp index 2d036da1332a1..57d9f3e0e68be 100644 --- a/unified-runtime/source/loader/layers/sanitizer/msan/msan_buffer.cpp +++ b/unified-runtime/source/loader/layers/sanitizer/msan/msan_buffer.cpp @@ -49,11 +49,6 @@ ur_result_t EnqueueMemCopyRectHelper( char *DstOrigin = pDst + DstOffset.x + DstRowPitch * DstOffset.y + DstSlicePitch * DstOffset.z; - const bool IsDstDeviceUSM = - getMsanInterceptor()->findAllocInfoByAddress((uptr)DstOrigin).has_value(); - const bool IsSrcDeviceUSM = - getMsanInterceptor()->findAllocInfoByAddress((uptr)SrcOrigin).has_value(); - ur_device_handle_t Device = GetDevice(Queue); std::shared_ptr DeviceInfo = getMsanInterceptor()->getDeviceInfo(Device); @@ -70,7 +65,7 @@ ur_result_t EnqueueMemCopyRectHelper( Events.push_back(NewEvent); // Update shadow memory - if (IsDstDeviceUSM && IsSrcDeviceUSM) { + { NewEvent = nullptr; uptr DstShadowAddr = DeviceInfo->Shadow->MemToShadow((uptr)DstOrigin + (i * DstSlicePitch)); @@ -81,19 +76,6 @@ ur_result_t EnqueueMemCopyRectHelper( (void *)SrcShadowAddr, SrcRowPitch, Region.width, Region.height, NumEventsInWaitList, EventWaitList, &NewEvent)); Events.push_back(NewEvent); - } else if (IsDstDeviceUSM && !IsSrcDeviceUSM) { - uptr DstShadowAddr = DeviceInfo->Shadow->MemToShadow((uptr)DstOrigin + - (i * DstSlicePitch)); - const char Val = 0; - // opencl & l0 adapter doesn't implement urEnqueueUSMFill2D, so - // emulate the operation with urEnqueueUSMFill. - for (size_t HeightIndex = 0; HeightIndex < Region.height; HeightIndex++) { - NewEvent = nullptr; - UR_CALL(getContext()->urDdiTable.Enqueue.pfnUSMFill( - Queue, (void *)(DstShadowAddr + HeightIndex * DstRowPitch), 1, &Val, - Region.width, NumEventsInWaitList, EventWaitList, &NewEvent)); - Events.push_back(NewEvent); - } } } diff --git a/unified-runtime/source/loader/layers/sanitizer/msan/msan_ddi.cpp b/unified-runtime/source/loader/layers/sanitizer/msan/msan_ddi.cpp index 13aa868cbf0f0..4c72279e28bab 100644 --- a/unified-runtime/source/loader/layers/sanitizer/msan/msan_ddi.cpp +++ b/unified-runtime/source/loader/layers/sanitizer/msan/msan_ddi.cpp @@ -1473,14 +1473,10 @@ ur_result_t UR_APICALL urEnqueueUSMFill( numEventsInWaitList, phEventWaitList, &Event)); Events.push_back(Event); - const auto Mem = (uptr)pMem; - auto MemInfoItOp = getMsanInterceptor()->findAllocInfoByAddress(Mem); - if (MemInfoItOp) { - auto MemInfo = (*MemInfoItOp)->second; - - const auto &DeviceInfo = - getMsanInterceptor()->getDeviceInfo(MemInfo->Device); - const auto MemShadow = DeviceInfo->Shadow->MemToShadow(Mem); + { + ur_device_handle_t Device = GetDevice(hQueue); + const auto &DeviceInfo = getMsanInterceptor()->getDeviceInfo(Device); + const auto MemShadow = DeviceInfo->Shadow->MemToShadow((uptr)pMem); Event = nullptr; UR_CALL(EnqueueUSMBlockingSet(hQueue, (void *)MemShadow, 0, size, 0, @@ -1532,34 +1528,16 @@ ur_result_t UR_APICALL urEnqueueUSMMemcpy( phEventWaitList, &Event)); Events.push_back(Event); - const auto Src = (uptr)pSrc, Dst = (uptr)pDst; - auto SrcInfoItOp = getMsanInterceptor()->findAllocInfoByAddress(Src); - auto DstInfoItOp = getMsanInterceptor()->findAllocInfoByAddress(Dst); - - if (SrcInfoItOp && DstInfoItOp) { - auto SrcInfo = (*SrcInfoItOp)->second; - auto DstInfo = (*DstInfoItOp)->second; - - const auto &DeviceInfo = - getMsanInterceptor()->getDeviceInfo(SrcInfo->Device); - const auto SrcShadow = DeviceInfo->Shadow->MemToShadow(Src); - const auto DstShadow = DeviceInfo->Shadow->MemToShadow(Dst); + { + ur_device_handle_t Device = GetDevice(hQueue); + const auto &DeviceInfo = getMsanInterceptor()->getDeviceInfo(Device); + const auto SrcShadow = DeviceInfo->Shadow->MemToShadow((uptr)pSrc); + const auto DstShadow = DeviceInfo->Shadow->MemToShadow((uptr)pDst); Event = nullptr; UR_CALL(pfnUSMMemcpy(hQueue, blocking, (void *)DstShadow, (void *)SrcShadow, size, 0, nullptr, &Event)); Events.push_back(Event); - } else if (DstInfoItOp) { - auto DstInfo = (*DstInfoItOp)->second; - - const auto &DeviceInfo = - getMsanInterceptor()->getDeviceInfo(DstInfo->Device); - auto DstShadow = DeviceInfo->Shadow->MemToShadow(Dst); - - Event = nullptr; - UR_CALL(EnqueueUSMBlockingSet(hQueue, (void *)DstShadow, 0, size, 0, - nullptr, &Event)); - Events.push_back(Event); } if (phEvent) { @@ -1612,14 +1590,10 @@ ur_result_t UR_APICALL urEnqueueUSMFill2D( height, numEventsInWaitList, phEventWaitList, &Event)); Events.push_back(Event); - const auto Mem = (uptr)pMem; - auto MemInfoItOp = getMsanInterceptor()->findAllocInfoByAddress(Mem); - if (MemInfoItOp) { - auto MemInfo = (*MemInfoItOp)->second; - - const auto &DeviceInfo = - getMsanInterceptor()->getDeviceInfo(MemInfo->Device); - const auto MemShadow = DeviceInfo->Shadow->MemToShadow(Mem); + { + ur_device_handle_t Device = GetDevice(hQueue); + const auto &DeviceInfo = getMsanInterceptor()->getDeviceInfo(Device); + const auto MemShadow = DeviceInfo->Shadow->MemToShadow((uptr)pMem); const char Pattern = 0; Event = nullptr; @@ -1680,37 +1654,17 @@ ur_result_t UR_APICALL urEnqueueUSMMemcpy2D( &Event)); Events.push_back(Event); - const auto Src = (uptr)pSrc, Dst = (uptr)pDst; - auto SrcInfoItOp = getMsanInterceptor()->findAllocInfoByAddress(Src); - auto DstInfoItOp = getMsanInterceptor()->findAllocInfoByAddress(Dst); - - if (SrcInfoItOp && DstInfoItOp) { - auto SrcInfo = (*SrcInfoItOp)->second; - auto DstInfo = (*DstInfoItOp)->second; - - const auto &DeviceInfo = - getMsanInterceptor()->getDeviceInfo(SrcInfo->Device); - const auto SrcShadow = DeviceInfo->Shadow->MemToShadow(Src); - const auto DstShadow = DeviceInfo->Shadow->MemToShadow(Dst); + { + ur_device_handle_t Device = GetDevice(hQueue); + const auto &DeviceInfo = getMsanInterceptor()->getDeviceInfo(Device); + const auto SrcShadow = DeviceInfo->Shadow->MemToShadow((uptr)pSrc); + const auto DstShadow = DeviceInfo->Shadow->MemToShadow((uptr)pDst); Event = nullptr; UR_CALL(pfnUSMMemcpy2D(hQueue, blocking, (void *)DstShadow, dstPitch, (void *)SrcShadow, srcPitch, width, height, 0, nullptr, &Event)); Events.push_back(Event); - } else if (DstInfoItOp) { - auto DstInfo = (*DstInfoItOp)->second; - - const auto &DeviceInfo = - getMsanInterceptor()->getDeviceInfo(DstInfo->Device); - const auto DstShadow = DeviceInfo->Shadow->MemToShadow(Dst); - - const char Pattern = 0; - Event = nullptr; - UR_CALL(getContext()->urDdiTable.Enqueue.pfnUSMFill2D( - hQueue, (void *)DstShadow, dstPitch, 1, &Pattern, width, height, 0, - nullptr, &Event)); - Events.push_back(Event); } if (phEvent) { diff --git a/unified-runtime/source/loader/layers/sanitizer/msan/msan_interceptor.cpp b/unified-runtime/source/loader/layers/sanitizer/msan/msan_interceptor.cpp index 39b5afbf250b3..ba6efed967802 100644 --- a/unified-runtime/source/loader/layers/sanitizer/msan/msan_interceptor.cpp +++ b/unified-runtime/source/loader/layers/sanitizer/msan/msan_interceptor.cpp @@ -34,7 +34,6 @@ MsanInterceptor::~MsanInterceptor() { } m_MemBufferMap.clear(); - m_AllocationMap.clear(); m_KernelMap.clear(); m_ContextMap.clear(); @@ -51,8 +50,7 @@ ur_result_t MsanInterceptor::allocateMemory(ur_context_handle_t Context, void **ResultPtr) { auto ContextInfo = getContextInfo(Context); - std::shared_ptr DeviceInfo = - Device ? getDeviceInfo(Device) : nullptr; + std::shared_ptr DI = Device ? getDeviceInfo(Device) : nullptr; // Origin tracking needs alignment at leat is 4 constexpr uint32_t MSAN_ORIGIN_TRACKING_GRANULARITY = 4; @@ -81,26 +79,7 @@ ur_result_t MsanInterceptor::allocateMemory(ur_context_handle_t Context, *ResultPtr = Allocated; - if (Type != AllocType::DEVICE_USM) { - ContextInfo->CleanShadowSize = std::max(ContextInfo->CleanShadowSize, Size); - } - - // For host/shared usm, we only record the alloc size. - if (Type != AllocType::DEVICE_USM) { - return UR_RESULT_SUCCESS; - } - assert(Device); - - StackTrace Stack = GetCurrentBacktrace(); - auto AI = std::make_shared( - MsanAllocInfo{(uptr)Allocated, Size, false, Context, Device, Stack, {}}); - AI->print(); - - // For memory release - { - std::scoped_lock Guard(m_AllocationMapMutex); - m_AllocationMap.emplace(AI->AllocBegin, AI); - } + ContextInfo->CleanShadowSize = std::max(ContextInfo->CleanShadowSize, Size); // For origin tracking HeapType HeapType; @@ -118,26 +97,33 @@ ur_result_t MsanInterceptor::allocateMemory(ur_context_handle_t Context, assert(false && "Unknown heap type"); } + StackTrace Stack = GetCurrentBacktrace(); Origin HeapOrigin = Origin::CreateHeapOrigin(Stack, HeapType); // Update shadow memory - ManagedQueue Queue(Context, Device); - DeviceInfo->Shadow->EnqueuePoisonShadowWithOrigin( - Queue, AI->AllocBegin, AI->AllocSize, 0xff, HeapOrigin.rawId()); + auto EnqueuePoison = [&](ur_device_handle_t Device) { + ManagedQueue Queue(Context, Device); + std::shared_ptr DI = getDeviceInfo(Device); + DI->Shadow->EnqueuePoisonShadowWithOrigin(Queue, (uptr)Allocated, Size, + 0xff, HeapOrigin.rawId()); + }; + if (Device) { // shared/device USM + EnqueuePoison(Device); + } else { // host USM + for (const auto &[Device, _] : m_DeviceMap) { + EnqueuePoison(Device); + } + } + + UR_LOG_L(getContext()->logger, INFO, + "AllocInfo {} (Size={}, Type={}, Origin={})", (void *)Allocated, + Size, ToString(Type), (void *)(uptr)HeapOrigin.rawId()); return UR_RESULT_SUCCESS; } ur_result_t MsanInterceptor::releaseMemory(ur_context_handle_t Context, void *Ptr) { - auto Addr = reinterpret_cast(Ptr); - auto AddrInfoItOp = findAllocInfoByAddress(Addr); - - if (AddrInfoItOp) { - std::scoped_lock Guard(m_AllocationMapMutex); - m_AllocationMap.erase(*AddrInfoItOp); - } - return getContext()->urDdiTable.USM.pfnFree(Context, Ptr); } @@ -624,38 +610,6 @@ ur_result_t MsanInterceptor::prepareLaunch( return UR_RESULT_SUCCESS; } -std::optional -MsanInterceptor::findAllocInfoByAddress(uptr Address) { - std::shared_lock Guard(m_AllocationMapMutex); - auto It = m_AllocationMap.upper_bound(Address); - if (It == m_AllocationMap.begin()) { - return std::nullopt; - } - --It; - - // Since we haven't intercepted all USM APIs, we can't make sure the found - // AllocInfo is correct. - if (Address < It->second->AllocBegin || - Address >= It->second->AllocBegin + It->second->AllocSize) { - return std::nullopt; - } - - return It; -} - -std::vector -MsanInterceptor::findAllocInfoByContext(ur_context_handle_t Context) { - std::shared_lock Guard(m_AllocationMapMutex); - std::vector AllocInfos; - for (auto It = m_AllocationMap.begin(); It != m_AllocationMap.end(); It++) { - const auto &[_, AI] = *It; - if (AI->Context == Context) { - AllocInfos.emplace_back(It); - } - } - return AllocInfos; -} - ur_result_t DeviceInfo::allocShadowMemory(ur_context_handle_t Context) { Shadow = GetMsanShadowMemory(Context, Handle, Type); assert(Shadow && "Failed to get shadow memory"); diff --git a/unified-runtime/source/loader/layers/sanitizer/msan/msan_interceptor.hpp b/unified-runtime/source/loader/layers/sanitizer/msan/msan_interceptor.hpp index b7c5d274ae76a..04627de11d1ea 100644 --- a/unified-runtime/source/loader/layers/sanitizer/msan/msan_interceptor.hpp +++ b/unified-runtime/source/loader/layers/sanitizer/msan/msan_interceptor.hpp @@ -272,17 +272,17 @@ class MsanInterceptor { ur_result_t preLaunchKernel(ur_kernel_handle_t Kernel, ur_queue_handle_t Queue, - msan::USMLaunchInfo &LaunchInfo); + USMLaunchInfo &LaunchInfo); ur_result_t postLaunchKernel(ur_kernel_handle_t Kernel, ur_queue_handle_t Queue, - msan::USMLaunchInfo &LaunchInfo); + USMLaunchInfo &LaunchInfo); ur_result_t insertContext(ur_context_handle_t Context, - std::shared_ptr &CI); + std::shared_ptr &CI); ur_result_t eraseContext(ur_context_handle_t Context); ur_result_t insertDevice(ur_device_handle_t Device, - std::shared_ptr &CI); + std::shared_ptr &CI); ur_result_t eraseDevice(ur_device_handle_t Device); ur_result_t insertProgram(ur_program_handle_t Program); @@ -302,26 +302,19 @@ class MsanInterceptor { return UR_RESULT_SUCCESS; } - std::optional findAllocInfoByAddress(uptr Address); - - std::vector - findAllocInfoByContext(ur_context_handle_t Context); - - std::shared_ptr - getContextInfo(ur_context_handle_t Context) { + std::shared_ptr getContextInfo(ur_context_handle_t Context) { std::shared_lock Guard(m_ContextMapMutex); assert(m_ContextMap.find(Context) != m_ContextMap.end()); return m_ContextMap[Context]; } - std::shared_ptr getDeviceInfo(ur_device_handle_t Device) { + std::shared_ptr getDeviceInfo(ur_device_handle_t Device) { std::shared_lock Guard(m_DeviceMapMutex); assert(m_DeviceMap.find(Device) != m_DeviceMap.end()); return m_DeviceMap[Device]; } - std::shared_ptr - getProgramInfo(ur_program_handle_t Program) { + std::shared_ptr getProgramInfo(ur_program_handle_t Program) { std::shared_lock Guard(m_ProgramMapMutex); assert(m_ProgramMap.find(Program) != m_ProgramMap.end()); return m_ProgramMap[Program]; @@ -339,29 +332,29 @@ class MsanInterceptor { private: /// Initialize Global Variables & Kernel Name at first Launch - ur_result_t prepareLaunch(std::shared_ptr &DeviceInfo, + ur_result_t prepareLaunch(std::shared_ptr &DeviceInfo, ur_queue_handle_t Queue, ur_kernel_handle_t Kernel, - msan::USMLaunchInfo &LaunchInfo); + USMLaunchInfo &LaunchInfo); ur_result_t allocShadowMemory(ur_context_handle_t Context, - std::shared_ptr &DeviceInfo); + std::shared_ptr &DeviceInfo); ur_result_t registerSpirKernels(ur_program_handle_t Program); ur_result_t registerDeviceGlobals(ur_program_handle_t Program); private: - std::unordered_map> + std::unordered_map> m_ContextMap; ur_shared_mutex m_ContextMapMutex; - std::unordered_map> + std::unordered_map> m_DeviceMap; ur_shared_mutex m_DeviceMapMutex; - std::unordered_map> + std::unordered_map> m_ProgramMap; ur_shared_mutex m_ProgramMapMutex; - std::unordered_map> + std::unordered_map> m_KernelMap; ur_shared_mutex m_KernelMapMutex; @@ -369,10 +362,6 @@ class MsanInterceptor { m_MemBufferMap; ur_shared_mutex m_MemBufferMapMutex; - /// Assumption: all USM chunks are allocated in one VA - MsanAllocationMap m_AllocationMap; - ur_shared_mutex m_AllocationMapMutex; - std::unordered_set m_Adapters; ur_shared_mutex m_AdaptersMutex; diff --git a/unified-runtime/source/loader/layers/sanitizer/msan/msan_libdevice.hpp b/unified-runtime/source/loader/layers/sanitizer/msan/msan_libdevice.hpp index ccd5b7715591d..1f46f9c3e4769 100644 --- a/unified-runtime/source/loader/layers/sanitizer/msan/msan_libdevice.hpp +++ b/unified-runtime/source/loader/layers/sanitizer/msan/msan_libdevice.hpp @@ -37,7 +37,7 @@ struct MsanErrorReport { uint32_t AccessSize = 0; ErrorType ErrorTy = ErrorType::UNKNOWN; - uintptr_t Origin; + uint32_t Origin; }; struct MsanLocalArgsInfo { diff --git a/unified-runtime/source/loader/layers/sanitizer/msan/msan_origin.hpp b/unified-runtime/source/loader/layers/sanitizer/msan/msan_origin.hpp index 1d5593200f624..6a3833583abe6 100644 --- a/unified-runtime/source/loader/layers/sanitizer/msan/msan_origin.hpp +++ b/unified-runtime/source/loader/layers/sanitizer/msan/msan_origin.hpp @@ -131,12 +131,12 @@ class Origin { StackTrace getHeapStackTrace() const { assert(isHeapOrigin()); - return StackDepotGet(getHeapId(), getHeapType()); + return StackDepotGet(raw_id_); } static Origin CreateHeapOrigin(StackTrace &Stack, HeapType Type) { - uint32_t StackId = StackDepotPut(Stack, Type); - assert(StackId); + static std::array _NextIds; + uint32_t StackId = _NextIds[(uint32_t)Type].fetch_add(1); switch (Type) { case HeapType::DeviceUSM: @@ -160,6 +160,8 @@ class Origin { StackId = 0; } + StackDepotPut(StackId, Stack); + return Origin(StackId); } diff --git a/unified-runtime/source/loader/layers/sanitizer/msan/msan_report.cpp b/unified-runtime/source/loader/layers/sanitizer/msan/msan_report.cpp index ef2481659aac7..a61be3829973f 100644 --- a/unified-runtime/source/loader/layers/sanitizer/msan/msan_report.cpp +++ b/unified-runtime/source/loader/layers/sanitizer/msan/msan_report.cpp @@ -50,8 +50,8 @@ void ReportUsesUninitializedValue(const MsanErrorReport &Report, if (Origin.isHeapOrigin()) { HeapType Type = Origin.getHeapType(); StackTrace Stack = Origin.getHeapStackTrace(); - UR_LOG_L(getContext()->logger, QUIET, - "ORIGIN: {} allocation:", ToString(Type)); + UR_LOG_L(getContext()->logger, QUIET, "ORIGIN: {} allocation ({})", + ToString(Type), (void *)(uptr)Report.Origin); Stack.print(); } } diff --git a/unified-runtime/source/loader/layers/sanitizer/sanitizer_common/sanitizer_stackdepot.cpp b/unified-runtime/source/loader/layers/sanitizer/sanitizer_common/sanitizer_stackdepot.cpp index 3e5bed857e476..361b08462e579 100644 --- a/unified-runtime/source/loader/layers/sanitizer/sanitizer_common/sanitizer_stackdepot.cpp +++ b/unified-runtime/source/loader/layers/sanitizer/sanitizer_common/sanitizer_stackdepot.cpp @@ -20,13 +20,14 @@ namespace ur_sanitizer_layer { class StackDepot { public: - uint32_t Put(StackTrace Stack) { - uint32_t Id = _NextId.fetch_add(1); + uint32_t Put(uint32_t Id, StackTrace Stack) { + std::scoped_lock Guard(_Mutex); _Depot[Id] = Stack; return Id; } StackTrace Get(uint32_t Id) { + std::shared_lock Guard(_Mutex); auto It = _Depot.find(Id); if (It != _Depot.end()) { return It->second; @@ -35,56 +36,14 @@ class StackDepot { } private: - std::atomic_uint32_t _NextId{1}; + ur_shared_mutex _Mutex; std::unordered_map _Depot; }; -uint32_t StackDepotPut(StackTrace &Stack, HeapType Type) { - switch (Type) { - case HeapType::DeviceUSM: { - static StackDepot TheDepot; - return TheDepot.Put(Stack); - } - case HeapType::HostUSM: { - static StackDepot TheDepot; - return TheDepot.Put(Stack); - } - case HeapType::SharedUSM: { - static StackDepot TheDepot; - return TheDepot.Put(Stack); - } - case HeapType::Local: { - static StackDepot TheDepot; - return TheDepot.Put(Stack); - } - default: - assert(false && "Unknown heap type"); - return 0; - } -} +static StackDepot TheDepot; -StackTrace StackDepotGet(uint32_t Id, HeapType Type) { - switch (Type) { - case HeapType::DeviceUSM: { - static StackDepot TheDepot; - return TheDepot.Get(Id); - } - case HeapType::HostUSM: { - static StackDepot TheDepot; - return TheDepot.Get(Id); - } - case HeapType::SharedUSM: { - static StackDepot TheDepot; - return TheDepot.Get(Id); - } - case HeapType::Local: { - static StackDepot TheDepot; - return TheDepot.Get(Id); - } - default: - assert(false && "Unknown heap type"); - return StackTrace(); - } -} +void StackDepotPut(uint32_t Id, StackTrace &Stack) { TheDepot.Put(Id, Stack); } + +StackTrace StackDepotGet(uint32_t Id) { return TheDepot.Get(Id); } } // namespace ur_sanitizer_layer diff --git a/unified-runtime/source/loader/layers/sanitizer/sanitizer_common/sanitizer_stackdepot.hpp b/unified-runtime/source/loader/layers/sanitizer/sanitizer_common/sanitizer_stackdepot.hpp index 4b452f737ccb1..7c35a26575ab2 100644 --- a/unified-runtime/source/loader/layers/sanitizer/sanitizer_common/sanitizer_stackdepot.hpp +++ b/unified-runtime/source/loader/layers/sanitizer/sanitizer_common/sanitizer_stackdepot.hpp @@ -17,7 +17,8 @@ namespace ur_sanitizer_layer { -enum class HeapType { DeviceUSM, HostUSM, SharedUSM, Local }; +const uint32_t kHeapTypeCount = 4; +enum HeapType { DeviceUSM, HostUSM, SharedUSM, Local }; inline const char *ToString(HeapType Type) { switch (Type) { @@ -34,7 +35,7 @@ inline const char *ToString(HeapType Type) { } } -uint32_t StackDepotPut(StackTrace &Stack, HeapType Type); -StackTrace StackDepotGet(uint32_t Id, HeapType Type); +void StackDepotPut(uint32_t Id, StackTrace &Stack); +StackTrace StackDepotGet(uint32_t Id); } // namespace ur_sanitizer_layer diff --git a/unified-runtime/source/loader/layers/sanitizer/sanitizer_common/sanitizer_stacktrace.cpp b/unified-runtime/source/loader/layers/sanitizer/sanitizer_common/sanitizer_stacktrace.cpp index 676e753a02d4c..f52dfc63310b5 100644 --- a/unified-runtime/source/loader/layers/sanitizer/sanitizer_common/sanitizer_stacktrace.cpp +++ b/unified-runtime/source/loader/layers/sanitizer/sanitizer_common/sanitizer_stacktrace.cpp @@ -83,6 +83,7 @@ SourceInfo ParseSymbolizerOutput(const std::string &Output) { void StackTrace::print() const { if (!stack.size()) { UR_LOG_L(getContext()->logger, QUIET, " failed to acquire backtrace"); + return; } unsigned index = 0; From e5a2ceb7b1b8db3032f5797463afff65bbd51a57 Mon Sep 17 00:00:00 2001 From: "Zhao, Yang2" Date: Thu, 12 Jun 2025 12:50:36 +0200 Subject: [PATCH 06/26] wip --- libdevice/sanitizer/msan_rtl.cpp | 53 ++++++++++++++--- .../Instrumentation/MemorySanitizer.cpp | 57 ++++++++++++++----- .../loader/layers/sanitizer/msan/msan_ddi.cpp | 56 +++++++++++++++++- .../sanitizer/msan/msan_interceptor.cpp | 28 +++++---- .../layers/sanitizer/msan/msan_shadow.cpp | 44 +++++--------- .../sanitizer_common/sanitizer_options.cpp | 1 + .../sanitizer_common/sanitizer_options.hpp | 1 + 7 files changed, 177 insertions(+), 63 deletions(-) diff --git a/libdevice/sanitizer/msan_rtl.cpp b/libdevice/sanitizer/msan_rtl.cpp index 4d92c6953994e..bf6845bde7656 100644 --- a/libdevice/sanitizer/msan_rtl.cpp +++ b/libdevice/sanitizer/msan_rtl.cpp @@ -31,7 +31,10 @@ constexpr uptr DG2_DEVICE_USM_BEGIN = 0xffff'8000'0000'0000ULL; constexpr uptr DG2_DEVICE_USM_END = 0xffff'ffff'ffff'ffffULL; const __SYCL_CONSTANT__ char __msan_print_shadow[] = - "[kernel] __msan_get_shadow(addr=%p, as=%d) = %p: %02X <%s>\n"; + "[kernel] __msan_get_shadow(addr=%p, as=%d) = %p: %02X\n"; + +const __SYCL_CONSTANT__ char __msan_print_origin[] = + "[kernel] __msan_get_origin(addr=%p, as=%d) = %p: %02X\n"; const __SYCL_CONSTANT__ char __msan_print_unsupport_device_type[] = "[kernel] Unsupport device type: %d\n"; @@ -293,6 +296,24 @@ __msan_warning_noreturn(const char __SYCL_CONSTANT__ *file, uint32_t line, __msan_exit(); } +DEVICE_EXTERN_C_NOINLINE void +__msan_warning_with_origin(uint32_t origin, const char __SYCL_CONSTANT__ *file, + uint32_t line, const char __SYCL_CONSTANT__ *func) { + if (!GetMsanLaunchInfo) + return; + __msan_internal_report_save(1, file, line, func, origin); + __msan_exit(); +} + +DEVICE_EXTERN_C_NOINLINE void __msan_warning_with_origin_noreturn( + uint32_t origin, const char __SYCL_CONSTANT__ *file, uint32_t line, + const char __SYCL_CONSTANT__ *func) { + if (!GetMsanLaunchInfo) + return; + __msan_internal_report_save(1, file, line, func, origin); + __msan_exit(); +} + // For mapping detail, ref to // "unified-runtime/source/loader/layers/sanitizer/msan/msan_shadow.hpp" DEVICE_EXTERN_C_NOINLINE __SYCL_GLOBAL__ void * @@ -323,8 +344,7 @@ __msan_get_shadow(uptr addr, uint32_t as, #endif MSAN_DEBUG(__spirv_ocl_printf(__msan_print_shadow, (void *)addr, as, - (void *)shadow_ptr, *(u8 *)shadow_ptr, - func ? func : __msan_print_unknown)); + (void *)shadow_ptr, *(u8 *)shadow_ptr)); return (__SYCL_GLOBAL__ void *)shadow_ptr; } @@ -336,7 +356,7 @@ DEVICE_EXTERN_C_NOINLINE __SYCL_GLOBAL__ void *__msan_get_origin(uptr addr, if (!GetMsanLaunchInfo) return nullptr; - // Return clean shadow (0s) by default + uptr aligned_addr = addr & ~3ULL; uptr origin_ptr; #if defined(__LIBDEVICE_PVC__) @@ -345,21 +365,38 @@ DEVICE_EXTERN_C_NOINLINE __SYCL_GLOBAL__ void *__msan_get_origin(uptr addr, origin_ptr = __msan_get_origin_cpu(addr); #else if (LIKELY(GetMsanLaunchInfo->DeviceTy == DeviceType::CPU)) { - origin_ptr = __msan_get_origin_cpu(addr); + origin_ptr = __msan_get_origin_cpu(aligned_addr); } else if (GetMsanLaunchInfo->DeviceTy == DeviceType::GPU_PVC) { - origin_ptr = __msan_get_origin_pvc(addr, as); + origin_ptr = __msan_get_origin_pvc(aligned_addr, as); } else if (GetMsanLaunchInfo->DeviceTy == DeviceType::GPU_DG2) { - origin_ptr = __msan_get_origin_dg2(addr, as); + origin_ptr = __msan_get_origin_dg2(aligned_addr, as); } else { + // Return clean shadow (0s) by default origin_ptr = GetMsanLaunchInfo->CleanShadow; MSAN_DEBUG(__spirv_ocl_printf(__msan_print_unsupport_device_type, GetMsanLaunchInfo->DeviceTy)); } #endif + MSAN_DEBUG(__spirv_ocl_printf(__msan_print_origin, (void *)addr, as, + (void *)origin_ptr, 0)); + return (__SYCL_GLOBAL__ void *)origin_ptr; } +#define MSAN_MAYBE_STORE_ORIGIN(type, size) \ + DEVICE_EXTERN_C_NOINLINE void __msan_maybe_store_origin_##size( \ + type s, uptr addr, uint32_t as, uint32_t o) { \ + if (UNLIKELY(s)) { \ + *(__SYCL_GLOBAL__ u32 *)__msan_get_origin(addr, as) = o; \ + } \ + } + +MSAN_MAYBE_STORE_ORIGIN(u8, 1) +MSAN_MAYBE_STORE_ORIGIN(u16, 2) +MSAN_MAYBE_STORE_ORIGIN(u32, 4) +MSAN_MAYBE_STORE_ORIGIN(u64, 8) + static __SYCL_CONSTANT__ const char __msan_print_memset[] = "[kernel] memset(beg=%p, shadow_beg=%p, shadow_end=%p)\n"; @@ -643,7 +680,7 @@ DEVICE_EXTERN_C_NOINLINE void __msan_unpoison_stack(__SYCL_PRIVATE__ void *ptr, } static __SYCL_CONSTANT__ const char __msan_print_private_base[] = - "[kernel] __msan_set_private_base: %llu -> %p\n"; + "[kernel] __msan_set_private_base(sid=%llu): %p\n"; DEVICE_EXTERN_C_NOINLINE void __msan_set_private_base(__SYCL_PRIVATE__ void *ptr) { diff --git a/llvm/lib/Transforms/Instrumentation/MemorySanitizer.cpp b/llvm/lib/Transforms/Instrumentation/MemorySanitizer.cpp index 8013aa27a4403..cb99d628bf1d6 100644 --- a/llvm/lib/Transforms/Instrumentation/MemorySanitizer.cpp +++ b/llvm/lib/Transforms/Instrumentation/MemorySanitizer.cpp @@ -1437,9 +1437,22 @@ void MemorySanitizer::createUserspaceApi(Module &M, if (TrackOrigins) { StringRef WarningFnName = Recover ? "__msan_warning_with_origin" : "__msan_warning_with_origin_noreturn"; - WarningFn = M.getOrInsertFunction(WarningFnName, - TLI.getAttrList(C, {0}, /*Signed=*/false), - IRB.getVoidTy(), IRB.getInt32Ty()); + if (!TargetTriple.isSPIROrSPIRV()) { + WarningFn = M.getOrInsertFunction( + WarningFnName, TLI.getAttrList(C, {0}, /*Signed=*/false), + IRB.getVoidTy(), IRB.getInt32Ty()); + } else { + // __msan_warning_with_origin[_noreturn]( + // int origin, + // char* file, + // unsigned int line, + // char* func + // ) + WarningFn = M.getOrInsertFunction( + WarningFnName, IRB.getVoidTy(), IRB.getInt32Ty(), + IRB.getInt8PtrTy(kSpirOffloadConstantAS), IRB.getInt32Ty(), + IRB.getInt8PtrTy(kSpirOffloadConstantAS)); + } } else { StringRef WarningFnName = Recover ? "__msan_warning" : "__msan_warning_noreturn"; @@ -1518,11 +1531,17 @@ void MemorySanitizer::createUserspaceApi(Module &M, IRB.getInt8PtrTy(kSpirOffloadConstantAS), IRB.getInt32Ty(), IRB.getInt8PtrTy(kSpirOffloadConstantAS)); + // __msan_maybe_warning_N( + // intN_t status, + // uptr addr, + // uint32_t as, + // int origin, + // ) FunctionName = "__msan_maybe_store_origin_" + itostr(AccessSize); MaybeStoreOriginFn[AccessSizeIndex] = M.getOrInsertFunction( FunctionName, TLI.getAttrList(C, {0, 2}, /*Signed=*/false), IRB.getVoidTy(), IRB.getIntNTy(AccessSize * 8), IntptrTy, - IRB.getInt32Ty()); + IRB.getInt32Ty(), IRB.getInt32Ty()); } } @@ -2071,7 +2090,9 @@ struct MemorySanitizerVisitor : public InstVisitor { Align CurrentAlignment = Alignment; if (Alignment >= IntptrAlignment && IntptrSize > kOriginSize) { Value *IntptrOrigin = originToIntptr(IRB, Origin); - Value *IntptrOriginPtr = IRB.CreatePointerCast(OriginPtr, MS.PtrTy); + Value *IntptrOriginPtr = IRB.CreatePointerCast( + OriginPtr, + !SpirOrSpirv ? MS.PtrTy : IRB.getPtrTy(kSpirOffloadGlobalAS)); for (unsigned i = 0; i < Size / IntptrSize; ++i) { Value *Ptr = i ? IRB.CreateConstGEP1_32(MS.IntptrTy, IntptrOriginPtr, i) : IntptrOriginPtr; @@ -2118,11 +2139,19 @@ struct MemorySanitizerVisitor : public InstVisitor { FunctionCallee Fn = MS.MaybeStoreOriginFn[SizeIndex]; Value *ConvertedShadow2 = IRB.CreateZExt(ConvertedShadow, IRB.getIntNTy(8 * (1 << SizeIndex))); - if (SpirOrSpirv) - Addr = IRB.CreatePtrToInt(Addr, IRB.getIntPtrTy(DL)); - CallBase *CB = IRB.CreateCall(Fn, {ConvertedShadow2, Addr, Origin}); - CB->addParamAttr(0, Attribute::ZExt); - CB->addParamAttr(2, Attribute::ZExt); + if (!SpirOrSpirv) { + CallBase *CB = IRB.CreateCall(Fn, {ConvertedShadow2, Addr, Origin}); + CB->addParamAttr(0, Attribute::ZExt); + CB->addParamAttr(2, Attribute::ZExt); + } else { + Value *AddrInt = IRB.CreatePtrToInt(Addr, IRB.getIntPtrTy(DL)); + Value *AS = ConstantInt::get(IRB.getInt32Ty(), + Addr->getType()->getPointerAddressSpace()); + CallBase *CB = + IRB.CreateCall(Fn, {ConvertedShadow2, AddrInt, AS, Origin}); + CB->addParamAttr(0, Attribute::ZExt); + CB->addParamAttr(3, Attribute::ZExt); + } } else { Value *Cmp = convertToBool(ConvertedShadow, IRB, "_mscmp"); Instruction *CheckTerm = SplitBlockAndInsertIfThen( @@ -2229,6 +2258,8 @@ struct MemorySanitizerVisitor : public InstVisitor { IRB.CreateCall(MS.WarningFn)->setCannotMerge(); } else { // SPIR or SPIR-V SmallVector Args; + if (MS.TrackOrigins) + Args.push_back(Origin); appendDebugInfoToArgs(IRB, Args); IRB.CreateCall(MS.WarningFn, Args)->setCannotMerge(); } @@ -2258,10 +2289,10 @@ struct MemorySanitizerVisitor : public InstVisitor { CB->addParamAttr(1, Attribute::ZExt); } else { // SPIR or SPIR-V // Pass the pointer of shadow memory to the report function - SmallVector Args = {ConvertedShadow2}; - Args.emplace_back(MS.TrackOrigins && Origin ? Origin : (Value *)IRB.getInt32(0)); + SmallVector Args = { + ConvertedShadow2, + MS.TrackOrigins && Origin ? Origin : (Value *)IRB.getInt32(0)}; appendDebugInfoToArgs(IRB, Args); - CallBase *CB = IRB.CreateCall(Fn, Args); CB->addParamAttr(0, Attribute::ZExt); CB->addParamAttr(1, Attribute::ZExt); diff --git a/unified-runtime/source/loader/layers/sanitizer/msan/msan_ddi.cpp b/unified-runtime/source/loader/layers/sanitizer/msan/msan_ddi.cpp index 4c72279e28bab..7d3d31d28dd70 100644 --- a/unified-runtime/source/loader/layers/sanitizer/msan/msan_ddi.cpp +++ b/unified-runtime/source/loader/layers/sanitizer/msan/msan_ddi.cpp @@ -1484,6 +1484,17 @@ ur_result_t UR_APICALL urEnqueueUSMFill( Events.push_back(Event); } + { + ur_device_handle_t Device = GetDevice(hQueue); + const auto &DeviceInfo = getMsanInterceptor()->getDeviceInfo(Device); + const auto MemShadow = DeviceInfo->Shadow->MemToOrigin((uptr)pMem); + + Event = nullptr; + UR_CALL(EnqueueUSMBlockingSet(hQueue, (void *)MemShadow, 0, size, 0, + nullptr, &Event)); + Events.push_back(Event); + } + if (phEvent) { UR_CALL(getContext()->urDdiTable.Enqueue.pfnEventsWait( hQueue, Events.size(), Events.data(), phEvent)); @@ -1534,12 +1545,24 @@ ur_result_t UR_APICALL urEnqueueUSMMemcpy( const auto SrcShadow = DeviceInfo->Shadow->MemToShadow((uptr)pSrc); const auto DstShadow = DeviceInfo->Shadow->MemToShadow((uptr)pDst); - Event = nullptr; + ur_event_handle_t Event{}; UR_CALL(pfnUSMMemcpy(hQueue, blocking, (void *)DstShadow, (void *)SrcShadow, size, 0, nullptr, &Event)); Events.push_back(Event); } + { + ur_device_handle_t Device = GetDevice(hQueue); + const auto &DeviceInfo = getMsanInterceptor()->getDeviceInfo(Device); + const auto SrcOrigin = DeviceInfo->Shadow->MemToOrigin((uptr)pSrc); + const auto DstOrigin = DeviceInfo->Shadow->MemToOrigin((uptr)pDst); + + ur_event_handle_t Event{}; + UR_CALL(pfnUSMMemcpy(hQueue, blocking, (void *)DstOrigin, (void *)SrcOrigin, + size, 0, nullptr, &Event)); + Events.push_back(Event); + } + if (phEvent) { UR_CALL(getContext()->urDdiTable.Enqueue.pfnEventsWait( hQueue, Events.size(), Events.data(), phEvent)); @@ -1596,12 +1619,25 @@ ur_result_t UR_APICALL urEnqueueUSMFill2D( const auto MemShadow = DeviceInfo->Shadow->MemToShadow((uptr)pMem); const char Pattern = 0; - Event = nullptr; + ur_event_handle_t Event{}; UR_CALL(pfnUSMFill2D(hQueue, (void *)MemShadow, pitch, 1, &Pattern, width, height, 0, nullptr, &Event)); Events.push_back(Event); } + // FIXME: align to 4 bytes + { + ur_device_handle_t Device = GetDevice(hQueue); + const auto &DeviceInfo = getMsanInterceptor()->getDeviceInfo(Device); + const auto MemOrigin = DeviceInfo->Shadow->MemToOrigin((uptr)pMem); + + const char Pattern = 0; + ur_event_handle_t Event{}; + UR_CALL(pfnUSMFill2D(hQueue, (void *)MemOrigin, pitch, 1, &Pattern, width, + height, 0, nullptr, &Event)); + Events.push_back(Event); + } + if (phEvent) { UR_CALL(getContext()->urDdiTable.Enqueue.pfnEventsWait( hQueue, Events.size(), Events.data(), phEvent)); @@ -1660,13 +1696,27 @@ ur_result_t UR_APICALL urEnqueueUSMMemcpy2D( const auto SrcShadow = DeviceInfo->Shadow->MemToShadow((uptr)pSrc); const auto DstShadow = DeviceInfo->Shadow->MemToShadow((uptr)pDst); - Event = nullptr; + ur_event_handle_t Event{}; UR_CALL(pfnUSMMemcpy2D(hQueue, blocking, (void *)DstShadow, dstPitch, (void *)SrcShadow, srcPitch, width, height, 0, nullptr, &Event)); Events.push_back(Event); } + // FIXME: align to 4 bytes + { + ur_device_handle_t Device = GetDevice(hQueue); + const auto &DeviceInfo = getMsanInterceptor()->getDeviceInfo(Device); + const auto SrcOrigin = DeviceInfo->Shadow->MemToOrigin((uptr)pSrc); + const auto DstOrigin = DeviceInfo->Shadow->MemToOrigin((uptr)pDst); + + ur_event_handle_t Event{}; + UR_CALL(pfnUSMMemcpy2D(hQueue, blocking, (void *)DstOrigin, dstPitch, + (void *)SrcOrigin, srcPitch, width, height, 0, + nullptr, &Event)); + Events.push_back(Event); + } + if (phEvent) { UR_CALL(getContext()->urDdiTable.Enqueue.pfnEventsWait( hQueue, Events.size(), Events.data(), phEvent)); diff --git a/unified-runtime/source/loader/layers/sanitizer/msan/msan_interceptor.cpp b/unified-runtime/source/loader/layers/sanitizer/msan/msan_interceptor.cpp index ba6efed967802..6f52adce424b1 100644 --- a/unified-runtime/source/loader/layers/sanitizer/msan/msan_interceptor.cpp +++ b/unified-runtime/source/loader/layers/sanitizer/msan/msan_interceptor.cpp @@ -81,6 +81,11 @@ ur_result_t MsanInterceptor::allocateMemory(ur_context_handle_t Context, ContextInfo->CleanShadowSize = std::max(ContextInfo->CleanShadowSize, Size); + bool IsHostOrSharedUSM = + Type == AllocType::HOST_USM || Type == AllocType::SHARED_USM; + bool DontCheckHostOrSharedUSM = + IsHostOrSharedUSM && !getContext()->Options.MsanCheckHostAndSharedUSM; + // For origin tracking HeapType HeapType; switch (Type) { @@ -98,21 +103,24 @@ ur_result_t MsanInterceptor::allocateMemory(ur_context_handle_t Context, } StackTrace Stack = GetCurrentBacktrace(); - Origin HeapOrigin = Origin::CreateHeapOrigin(Stack, HeapType); + Origin HeapOrigin = DontCheckHostOrSharedUSM + ? Origin::FromRawId(0) + : Origin::CreateHeapOrigin(Stack, HeapType); // Update shadow memory - auto EnqueuePoison = [&](ur_device_handle_t Device) { - ManagedQueue Queue(Context, Device); - std::shared_ptr DI = getDeviceInfo(Device); - DI->Shadow->EnqueuePoisonShadowWithOrigin(Queue, (uptr)Allocated, Size, - 0xff, HeapOrigin.rawId()); + auto EnqueuePoison = [&](const std::vector &Devices) { + u8 Value = DontCheckHostOrSharedUSM ? 0 : 0xff; + for (ur_device_handle_t Device : Devices) { + ManagedQueue Queue(Context, Device); + std::shared_ptr DI = getDeviceInfo(Device); + DI->Shadow->EnqueuePoisonShadowWithOrigin(Queue, (uptr)Allocated, Size, + Value, HeapOrigin.rawId()); + } }; if (Device) { // shared/device USM - EnqueuePoison(Device); + EnqueuePoison({Device}); } else { // host USM - for (const auto &[Device, _] : m_DeviceMap) { - EnqueuePoison(Device); - } + EnqueuePoison(ContextInfo->DeviceList); } UR_LOG_L(getContext()->logger, INFO, diff --git a/unified-runtime/source/loader/layers/sanitizer/msan/msan_shadow.cpp b/unified-runtime/source/loader/layers/sanitizer/msan/msan_shadow.cpp index 7ead3cb8c12cb..7690862f183f2 100644 --- a/unified-runtime/source/loader/layers/sanitizer/msan/msan_shadow.cpp +++ b/unified-runtime/source/loader/layers/sanitizer/msan/msan_shadow.cpp @@ -224,15 +224,6 @@ ur_result_t MsanShadowMemoryGPU::EnqueueVirtualMemMap( UR_LOG_L(getContext()->logger, DEBUG, "urVirtualMemMap: {} ~ {}", (void *)MappedPtr, (void *)(MappedPtr + PageSize - 1)); - // Initialize to zero - URes = EnqueueUSMBlockingSet(Queue, (void *)MappedPtr, 0, PageSize, - EventWaitList.size(), EventWaitList.data(), - OutEvent); - if (URes != UR_RESULT_SUCCESS) { - UR_LOG_L(getContext()->logger, ERR, "EnqueueUSMSet(): {}", URes); - return URes; - } - EventWaitList.clear(); if (OutEvent) { EventWaitList.push_back(*OutEvent); @@ -240,11 +231,6 @@ ur_result_t MsanShadowMemoryGPU::EnqueueVirtualMemMap( VirtualMemMaps[MappedPtr].first = PhysicalMem; } - - // auto AllocInfoItOp = getMsanInterceptor()->findAllocInfoByAddress(Ptr); - // if (AllocInfoItOp) { - // VirtualMemMaps[MappedPtr].second.insert((*AllocInfoItOp)->second); - // } } return UR_RESULT_SUCCESS; @@ -280,7 +266,7 @@ ur_result_t MsanShadowMemoryGPU::EnqueuePoisonShadowWithOrigin( EnqueueVirtualMemMap(Queue, ShadowBegin, ShadowEnd, Events, OutEvent)); UR_LOG_L(getContext()->logger, DEBUG, - "EnqueuePoisonShadow(addr={}, count={}, value={})", + "EnqueuePoisonShadow(addr={}, size={}, value={})", (void *)ShadowBegin, ShadowEnd - ShadowBegin + 1, (void *)(size_t)Value); @@ -291,20 +277,18 @@ ur_result_t MsanShadowMemoryGPU::EnqueuePoisonShadowWithOrigin( { uptr OriginBegin = MemToOrigin(Ptr); - uptr OriginEnd = MemToOrigin(Ptr + Size - 1); + uptr OriginEnd = MemToOrigin(Ptr + Size - 1) + sizeof(Origin) - 1; UR_CALL( EnqueueVirtualMemMap(Queue, OriginBegin, OriginEnd, Events, OutEvent)); - if (Origin != 0) { - UR_LOG_L(getContext()->logger, DEBUG, - "EnqueuePoisonOrigin(addr={}, count={}, value={})", - (void *)OriginBegin, OriginEnd - OriginBegin + 1, - (void *)(uptr)Origin); + UR_LOG_L(getContext()->logger, DEBUG, + "EnqueuePoisonOrigin(addr={}, size={}, value={})", + (void *)OriginBegin, OriginEnd - OriginBegin + 1, + (void *)(uptr)Origin); - UR_CALL(getContext()->urDdiTable.Enqueue.pfnUSMFill( - Queue, (void *)OriginBegin, sizeof(Origin), &Origin, Size, NumEvents, - EventWaitList, OutEvent)); - } + UR_CALL(getContext()->urDdiTable.Enqueue.pfnUSMFill( + Queue, (void *)OriginBegin, sizeof(Origin), &Origin, + OriginEnd - OriginBegin + 1, NumEvents, EventWaitList, OutEvent)); } return UR_RESULT_SUCCESS; @@ -435,12 +419,14 @@ uptr MsanShadowMemoryPVC::MemToShadow(uptr Ptr) { } uptr MsanShadowMemoryPVC::MemToOrigin(uptr Ptr) { - if (MsanShadowMemoryPVC::isDeviceUSM(Ptr)) { - return Ptr - 0xA000'0000'0000ULL; + uptr AlignedPtr = Ptr & ~3ULL; + if (MsanShadowMemoryPVC::isDeviceUSM(AlignedPtr)) { + return AlignedPtr - 0xA000'0000'0000ULL; } // host/shared USM - return (Ptr & 0xff'ffff'ffffULL) + ((Ptr & 0x8000'0000'0000ULL) >> 7) + - ShadowBegin + 0x0200'0000'0000ULL; + return (AlignedPtr & 0xff'ffff'ffffULL) + + ((AlignedPtr & 0x8000'0000'0000ULL) >> 7) + ShadowBegin + + 0x0200'0000'0000ULL; } uptr MsanShadowMemoryDG2::MemToShadow(uptr Ptr) { diff --git a/unified-runtime/source/loader/layers/sanitizer/sanitizer_common/sanitizer_options.cpp b/unified-runtime/source/loader/layers/sanitizer/sanitizer_common/sanitizer_options.cpp index 96daf0f1aeb9c..7ecca4e62387e 100644 --- a/unified-runtime/source/loader/layers/sanitizer/sanitizer_common/sanitizer_options.cpp +++ b/unified-runtime/source/loader/layers/sanitizer/sanitizer_common/sanitizer_options.cpp @@ -47,6 +47,7 @@ void SanitizerOptions::Init(const std::string &EnvName, Parser.ParseBool("detect_leaks", DetectLeaks); Parser.ParseBool("halt_on_error", HaltOnError); Parser.ParseBool("recover", Recover); + Parser.ParseBool("msan_check_host_and_shared_usm", MsanCheckHostAndSharedUSM); Parser.ParseUint64("quarantine_size_mb", MaxQuarantineSizeMB, 0, UINT32_MAX); Parser.ParseUint64("redzone", MinRZSize, 16); diff --git a/unified-runtime/source/loader/layers/sanitizer/sanitizer_common/sanitizer_options.hpp b/unified-runtime/source/loader/layers/sanitizer/sanitizer_common/sanitizer_options.hpp index 2f1e2be399edc..9394578ea3054 100644 --- a/unified-runtime/source/loader/layers/sanitizer/sanitizer_common/sanitizer_options.hpp +++ b/unified-runtime/source/loader/layers/sanitizer/sanitizer_common/sanitizer_options.hpp @@ -32,6 +32,7 @@ struct SanitizerOptions { bool DetectLeaks = true; bool HaltOnError = true; bool Recover = false; + bool MsanCheckHostAndSharedUSM = false; void Init(const std::string &EnvName, logger::Logger &Logger); }; From df6a0b7ab9c07adfe48072bf5fada3ecd55e0c23 Mon Sep 17 00:00:00 2001 From: "Zhao, Yang2" Date: Mon, 16 Jun 2025 04:38:15 +0200 Subject: [PATCH 07/26] wip --- libdevice/cmake/modules/SYCLLibdevice.cmake | 6 +- libdevice/include/asan_rtl.hpp | 1 + libdevice/include/msan_rtl.hpp | 1 + libdevice/include/sanitizer_defs.hpp | 17 +- .../{group_utils.hpp => sanitizer_utils.hpp} | 46 +++- libdevice/include/spir_global_var.hpp | 5 - libdevice/include/tsan_rtl.hpp | 1 + libdevice/sanitizer/msan_rtl.cpp | 232 ++++++++---------- 8 files changed, 160 insertions(+), 149 deletions(-) rename libdevice/include/{group_utils.hpp => sanitizer_utils.hpp} (51%) diff --git a/libdevice/cmake/modules/SYCLLibdevice.cmake b/libdevice/cmake/modules/SYCLLibdevice.cmake index c32cbbefe6c4e..9a56ffbb35c26 100644 --- a/libdevice/cmake/modules/SYCLLibdevice.cmake +++ b/libdevice/cmake/modules/SYCLLibdevice.cmake @@ -246,7 +246,7 @@ if (NOT MSVC AND UR_SANITIZER_INCLUDE_DIR) include/asan_rtl.hpp include/sanitizer_defs.hpp include/spir_global_var.hpp - include/group_utils.hpp + include/sanitizer_utils.hpp ${sycl-compiler_deps}) set(sanitizer_generic_compile_opts ${compile_opts} @@ -305,7 +305,7 @@ if (NOT MSVC AND UR_SANITIZER_INCLUDE_DIR) include/msan_rtl.hpp include/sanitizer_defs.hpp include/spir_global_var.hpp - include/group_utils.hpp + include/sanitizer_utils.hpp sycl-compiler) set(tsan_obj_deps @@ -314,7 +314,7 @@ if (NOT MSVC AND UR_SANITIZER_INCLUDE_DIR) include/tsan_rtl.hpp include/sanitizer_defs.hpp include/spir_global_var.hpp - include/group_utils.hpp + include/sanitizer_utils.hpp sycl-compiler) endif() diff --git a/libdevice/include/asan_rtl.hpp b/libdevice/include/asan_rtl.hpp index b26604e310018..4d5b2878a1a20 100644 --- a/libdevice/include/asan_rtl.hpp +++ b/libdevice/include/asan_rtl.hpp @@ -8,6 +8,7 @@ #pragma once #include "sanitizer_defs.hpp" +#include "sanitizer_utils.hpp" #include "spir_global_var.hpp" #include diff --git a/libdevice/include/msan_rtl.hpp b/libdevice/include/msan_rtl.hpp index 1c160c3f65a46..b336511feb3b2 100644 --- a/libdevice/include/msan_rtl.hpp +++ b/libdevice/include/msan_rtl.hpp @@ -8,6 +8,7 @@ #pragma once #include "sanitizer_defs.hpp" +#include "sanitizer_utils.hpp" #include "spir_global_var.hpp" // Treat this header as system one to workaround frontend's restriction diff --git a/libdevice/include/sanitizer_defs.hpp b/libdevice/include/sanitizer_defs.hpp index 552ca086fcbe4..302551a56da09 100644 --- a/libdevice/include/sanitizer_defs.hpp +++ b/libdevice/include/sanitizer_defs.hpp @@ -8,8 +8,8 @@ #pragma once #include "atomic.hpp" -#include "group_utils.hpp" #include "spir_global_var.hpp" + #include using uptr = uintptr_t; @@ -30,6 +30,11 @@ enum ADDRESS_SPACE : uint32_t { ADDRESS_SPACE_GENERIC = 4, }; +#define __SYCL_GLOBAL__ __attribute__((opencl_global)) +#define __SYCL_LOCAL__ __attribute__((opencl_local)) +#define __SYCL_PRIVATE__ __attribute__((opencl_private)) +#define __SYCL_CONSTANT__ __attribute__((opencl_constant)) + #define LIKELY(x) __builtin_expect(!!(x), 1) #define UNLIKELY(x) __builtin_expect(!!(x), 0) #define NORETURN __declspec(noreturn) @@ -56,14 +61,4 @@ __spirv_ControlBarrier(int32_t Execution, int32_t Memory, extern "C" SYCL_EXTERNAL void __devicelib_exit(); -__SYCL_GLOBAL__ void *ToGlobal(void *ptr) { - return __spirv_GenericCastToPtrExplicit_ToGlobal(ptr, 5); -} -__SYCL_LOCAL__ void *ToLocal(void *ptr) { - return __spirv_GenericCastToPtrExplicit_ToLocal(ptr, 4); -} -__SYCL_PRIVATE__ void *ToPrivate(void *ptr) { - return __spirv_GenericCastToPtrExplicit_ToPrivate(ptr, 7); -} - #endif // __SPIR__ || __SPIRV__ diff --git a/libdevice/include/group_utils.hpp b/libdevice/include/sanitizer_utils.hpp similarity index 51% rename from libdevice/include/group_utils.hpp rename to libdevice/include/sanitizer_utils.hpp index 7f392b3665eac..5d73357806429 100644 --- a/libdevice/include/group_utils.hpp +++ b/libdevice/include/sanitizer_utils.hpp @@ -7,11 +7,12 @@ //===----------------------------------------------------------------------===// #pragma once +#include "sanitizer_defs.hpp" #include "spirv_vars.h" #if defined(__SPIR__) || defined(__SPIRV__) -static inline size_t WorkGroupLinearId() { +size_t WorkGroupLinearId() { return __spirv_BuiltInWorkgroupId.x * __spirv_BuiltInNumWorkgroups.y * __spirv_BuiltInNumWorkgroups.z + __spirv_BuiltInWorkgroupId.y * __spirv_BuiltInNumWorkgroups.z + @@ -19,15 +20,54 @@ static inline size_t WorkGroupLinearId() { } // For GPU device, each sub group is a hardware thread -static inline size_t SubGroupLinearId() { +size_t SubGroupLinearId() { return __spirv_BuiltInGlobalLinearId / __spirv_BuiltInSubgroupSize; } -static inline void SubGroupBarrier() { +void SubGroupBarrier() { __spirv_ControlBarrier(__spv::Scope::Subgroup, __spv::Scope::Subgroup, __spv::MemorySemanticsMask::SequentiallyConsistent | __spv::MemorySemanticsMask::CrossWorkgroupMemory | __spv::MemorySemanticsMask::WorkgroupMemory); } +__SYCL_GLOBAL__ void *ToGlobal(void *ptr) { + return __spirv_GenericCastToPtrExplicit_ToGlobal(ptr, 5); +} +__SYCL_LOCAL__ void *ToLocal(void *ptr) { + return __spirv_GenericCastToPtrExplicit_ToLocal(ptr, 4); +} +__SYCL_PRIVATE__ void *ToPrivate(void *ptr) { + return __spirv_GenericCastToPtrExplicit_ToPrivate(ptr, 7); +} + +template SYCL_EXTERNAL T Memset(T ptr, int value, size_t size) { + for (size_t i = 0; i < size; i++) { + ptr[i] = value; + } + return ptr; +} + +template +SYCL_EXTERNAL DstT Memcpy(DstT dst, SrcT src, size_t size) { + for (size_t i = 0; i < size; i++) { + dst[i] = src[i]; + } + return dst; +} + +template +SYCL_EXTERNAL DstT Memmove(DstT dst, SrcT src, size_t size) { + if ((uptr)dst < (uptr)src) { + for (size_t i = 0; i < size; i++) { + dst[i] = src[i]; + } + } else { + for (size_t i = size; i > 0; i--) { + dst[i - 1] = src[i - 1]; + } + } + return dst; +} + #endif // __SPIR__ || __SPIRV__ diff --git a/libdevice/include/spir_global_var.hpp b/libdevice/include/spir_global_var.hpp index 33c63add82c7d..694e37517bf5b 100644 --- a/libdevice/include/spir_global_var.hpp +++ b/libdevice/include/spir_global_var.hpp @@ -38,11 +38,6 @@ class T val; }; -#define __SYCL_GLOBAL__ __attribute__((opencl_global)) -#define __SYCL_LOCAL__ __attribute__((opencl_local)) -#define __SYCL_PRIVATE__ __attribute__((opencl_private)) -#define __SYCL_CONSTANT__ __attribute__((opencl_constant)) - #ifndef SPIR_GLOBAL_VAR #ifdef __SYCL_DEVICE_ONLY__ #define SPIR_GLOBAL_VAR __attribute__((sycl_global_var)) diff --git a/libdevice/include/tsan_rtl.hpp b/libdevice/include/tsan_rtl.hpp index 7e17366cc33c6..0775d0f58fa9b 100644 --- a/libdevice/include/tsan_rtl.hpp +++ b/libdevice/include/tsan_rtl.hpp @@ -8,6 +8,7 @@ #pragma once #include "sanitizer_defs.hpp" +#include "sanitizer_utils.hpp" #include "spir_global_var.hpp" #include "tsan/tsan_libdevice.hpp" diff --git a/libdevice/sanitizer/msan_rtl.cpp b/libdevice/sanitizer/msan_rtl.cpp index bf6845bde7656..f4f965d041c45 100644 --- a/libdevice/sanitizer/msan_rtl.cpp +++ b/libdevice/sanitizer/msan_rtl.cpp @@ -9,6 +9,8 @@ #include "include/msan_rtl.hpp" #include "atomic.hpp" #include "device.h" +#include "include/sanitizer_defs.hpp" +#include "include/sanitizer_utils.hpp" #include "msan/msan_libdevice.hpp" #include "spirv_vars.h" @@ -144,11 +146,9 @@ void __msan_report_error(const uint32_t size, __msan_internal_report_save(size, file, line, func, origin); } -inline uptr __msan_get_shadow_cpu(uptr addr) { - return addr ^ 0x500000000000ULL; -} +inline uptr MemToShadow_CPU(uptr addr) { return addr ^ 0x500000000000ULL; } -inline uptr __msan_get_shadow_dg2(uptr addr, uint32_t as) { +inline uptr MemToShadow_DG2(uptr addr, uint32_t as) { if (as == ADDRESS_SPACE_GENERIC) { ConvertGenericPointer(addr, as); } @@ -166,7 +166,7 @@ inline uptr __msan_get_shadow_dg2(uptr addr, uint32_t as) { } } -inline uptr __msan_get_shadow_pvc(uptr addr, uint32_t as) { +inline uptr MemToShadow_PVC(uptr addr, uint32_t as) { if (as == ADDRESS_SPACE_GENERIC) { ConvertGenericPointer(addr, as); } @@ -218,13 +218,39 @@ inline uptr __msan_get_shadow_pvc(uptr addr, uint32_t as) { return GetMsanLaunchInfo->CleanShadow; } -inline uptr __msan_get_origin_cpu(uptr addr) { - return addr ^ 0x500000000000ULL; +inline uptr MemToShadow(uptr addr, uint32_t as) { + // Return clean shadow (0s) by default + uptr shadow_ptr; + +#if defined(__LIBDEVICE_PVC__) + shadow_ptr = MemToShadow_PVC(addr, as); +#elif defined(__LIBDEVICE_CPU__) + shadow_ptr = MemToShadow_CPU(addr); +#else + if (LIKELY(GetMsanLaunchInfo->DeviceTy == DeviceType::CPU)) { + shadow_ptr = MemToShadow_CPU(addr); + } else if (GetMsanLaunchInfo->DeviceTy == DeviceType::GPU_PVC) { + shadow_ptr = MemToShadow_PVC(addr, as); + } else if (GetMsanLaunchInfo->DeviceTy == DeviceType::GPU_DG2) { + shadow_ptr = MemToShadow_DG2(addr, as); + } else { + shadow_ptr = GetMsanLaunchInfo->CleanShadow; + MSAN_DEBUG(__spirv_ocl_printf(__msan_print_unsupport_device_type, + GetMsanLaunchInfo->DeviceTy)); + } +#endif + + MSAN_DEBUG(__spirv_ocl_printf(__msan_print_shadow, (void *)addr, as, + (void *)shadow_ptr, *(u8 *)shadow_ptr)); + + return shadow_ptr; } -inline uptr __msan_get_origin_dg2(uptr addr, uint32_t as) { return 0; } +inline uptr MemToOrigin_CPU(uptr addr) { return addr ^ 0x500000000000ULL; } + +inline uptr MemToOrigin_DG2(uptr addr, uint32_t as) { return 0; } -inline uptr __msan_get_origin_pvc(uptr addr, uint32_t as) { +inline uptr MemToOrigin_PVC(uptr addr, uint32_t as) { if (as == ADDRESS_SPACE_GENERIC) { ConvertGenericPointer(addr, as); } @@ -244,6 +270,35 @@ inline uptr __msan_get_origin_pvc(uptr addr, uint32_t as) { return GetMsanLaunchInfo->CleanShadow; } +inline uptr MemToOrigin(uptr addr, uint32_t as) { + uptr aligned_addr = addr & ~3ULL; + uptr origin_ptr; + +#if defined(__LIBDEVICE_PVC__) + origin_ptr = MemToOrigin_PVC(addr, as); +#elif defined(__LIBDEVICE_CPU__) + origin_ptr = MemToOrigin_CPU(addr); +#else + if (LIKELY(GetMsanLaunchInfo->DeviceTy == DeviceType::CPU)) { + origin_ptr = MemToOrigin_CPU(aligned_addr); + } else if (GetMsanLaunchInfo->DeviceTy == DeviceType::GPU_PVC) { + origin_ptr = MemToOrigin_PVC(aligned_addr, as); + } else if (GetMsanLaunchInfo->DeviceTy == DeviceType::GPU_DG2) { + origin_ptr = MemToOrigin_DG2(aligned_addr, as); + } else { + // Return clean shadow (0s) by default + origin_ptr = GetMsanLaunchInfo->CleanShadow; + MSAN_DEBUG(__spirv_ocl_printf(__msan_print_unsupport_device_type, + GetMsanLaunchInfo->DeviceTy)); + } +#endif + + MSAN_DEBUG(__spirv_ocl_printf(__msan_print_origin, (void *)addr, as, + (void *)origin_ptr, 0)); + + return origin_ptr; +} + inline void __msan_exit() { if (!GetMsanLaunchInfo->IsRecover) __devicelib_exit(); @@ -259,6 +314,29 @@ void GroupAsyncCopy(uptr Dest, uptr Src, size_t NumElements, size_t Stride) { } } +static __SYCL_CONSTANT__ const char __msan_print_memcpy[] = + "[kernel] memcpy(dst=%p, src=%p, shadow_dst=%p, shadow_src=%p, size=%p)\n"; + +template +inline void +CopyShadowAndOrigin(__attribute__((address_space(dst_as))) char *dst, + __attribute__((address_space(src_as))) char *src, + size_t size) {} + +static __SYCL_CONSTANT__ const char __msan_print_memmove[] = + "[kernel] memmove(dst=%p, src=%p, shadow_dst=%p, shadow_src=%p, size=%p)\n"; + +template +inline void +MoveShadowAndOrigin(__attribute__((address_space(dst_as))) char *dst, + __attribute__((address_space(src_as))) char *src, + size_t size) {} + +inline void UnpoisonShadow(uptr addr, uint32_t as, size_t size) { + auto *shadow_ptr = (__SYCL_GLOBAL__ char *)MemToShadow(addr, as); + Memset(shadow_ptr, 0, size); +} + } // namespace #define MSAN_MAYBE_WARNING(type, size) \ @@ -321,32 +399,7 @@ __msan_get_shadow(uptr addr, uint32_t as, const char __SYCL_CONSTANT__ *func = nullptr) { if (!GetMsanLaunchInfo) return nullptr; - - // Return clean shadow (0s) by default - uptr shadow_ptr; - -#if defined(__LIBDEVICE_PVC__) - shadow_ptr = __msan_get_shadow_pvc(addr, as); -#elif defined(__LIBDEVICE_CPU__) - shadow_ptr = __msan_get_shadow_cpu(addr); -#else - if (LIKELY(GetMsanLaunchInfo->DeviceTy == DeviceType::CPU)) { - shadow_ptr = __msan_get_shadow_cpu(addr); - } else if (GetMsanLaunchInfo->DeviceTy == DeviceType::GPU_PVC) { - shadow_ptr = __msan_get_shadow_pvc(addr, as); - } else if (GetMsanLaunchInfo->DeviceTy == DeviceType::GPU_DG2) { - shadow_ptr = __msan_get_shadow_dg2(addr, as); - } else { - shadow_ptr = GetMsanLaunchInfo->CleanShadow; - MSAN_DEBUG(__spirv_ocl_printf(__msan_print_unsupport_device_type, - GetMsanLaunchInfo->DeviceTy)); - } -#endif - - MSAN_DEBUG(__spirv_ocl_printf(__msan_print_shadow, (void *)addr, as, - (void *)shadow_ptr, *(u8 *)shadow_ptr)); - - return (__SYCL_GLOBAL__ void *)shadow_ptr; + return (__SYCL_GLOBAL__ void *)MemToShadow(addr, as); } // For mapping detail, ref to @@ -355,33 +408,7 @@ DEVICE_EXTERN_C_NOINLINE __SYCL_GLOBAL__ void *__msan_get_origin(uptr addr, uint32_t as) { if (!GetMsanLaunchInfo) return nullptr; - - uptr aligned_addr = addr & ~3ULL; - uptr origin_ptr; - -#if defined(__LIBDEVICE_PVC__) - origin_ptr = __msan_get_origin_pvc(addr, as); -#elif defined(__LIBDEVICE_CPU__) - origin_ptr = __msan_get_origin_cpu(addr); -#else - if (LIKELY(GetMsanLaunchInfo->DeviceTy == DeviceType::CPU)) { - origin_ptr = __msan_get_origin_cpu(aligned_addr); - } else if (GetMsanLaunchInfo->DeviceTy == DeviceType::GPU_PVC) { - origin_ptr = __msan_get_origin_pvc(aligned_addr, as); - } else if (GetMsanLaunchInfo->DeviceTy == DeviceType::GPU_DG2) { - origin_ptr = __msan_get_origin_dg2(aligned_addr, as); - } else { - // Return clean shadow (0s) by default - origin_ptr = GetMsanLaunchInfo->CleanShadow; - MSAN_DEBUG(__spirv_ocl_printf(__msan_print_unsupport_device_type, - GetMsanLaunchInfo->DeviceTy)); - } -#endif - - MSAN_DEBUG(__spirv_ocl_printf(__msan_print_origin, (void *)addr, as, - (void *)origin_ptr, 0)); - - return (__SYCL_GLOBAL__ void *)origin_ptr; + return (__SYCL_GLOBAL__ void *)MemToOrigin(addr, as); } #define MSAN_MAYBE_STORE_ORIGIN(type, size) \ @@ -405,15 +432,9 @@ static __SYCL_CONSTANT__ const char __msan_print_memset[] = __attribute__((address_space(as))) void *__msan_memset_p##as( \ __attribute__((address_space(as))) char *dest, int val, size_t size) { \ MSAN_DEBUG(__spirv_ocl_printf(__msan_print_func_beg, "__msan_memset")); \ - uptr shadow = (uptr)__msan_get_shadow((uptr)dest, as); \ - for (size_t i = 0; i < size; i++) { \ - dest[i] = val; \ - ((__SYCL_GLOBAL__ char *)shadow)[i] = 0; \ - } \ - MSAN_DEBUG(__spirv_ocl_printf(__msan_print_memset, dest, shadow, \ - shadow + size - 1)); \ - MSAN_DEBUG(__spirv_ocl_printf(__msan_print_func_end, "__msan_memset")); \ - return dest; \ + auto res = Memset(dest, val, size); \ + UnpoisonShadow((uptr)dest, as, size); \ + return res; \ } MSAN_MEMSET(0) @@ -421,34 +442,16 @@ MSAN_MEMSET(1) MSAN_MEMSET(3) MSAN_MEMSET(4) -static __SYCL_CONSTANT__ const char __msan_print_memmove[] = - "[kernel] memmove(dst=%p, src=%p, shadow_dst=%p, shadow_src=%p, size=%p)\n"; - #define MSAN_MEMMOVE_BASE(dst_as, src_as) \ DEVICE_EXTERN_C_NOINLINE __attribute__((address_space(dst_as))) void \ *__msan_memmove_p##dst_as##_p##src_as( \ __attribute__((address_space(dst_as))) char *dest, \ __attribute__((address_space(src_as))) char *src, size_t size) { \ MSAN_DEBUG(__spirv_ocl_printf(__msan_print_func_beg, "__msan_memmove")); \ - uptr dest_shadow = (uptr)__msan_get_shadow((uptr)dest, dst_as); \ - uptr src_shadow = (uptr)__msan_get_shadow((uptr)src, src_as); \ - if ((uptr)dest > (uptr)src) { \ - for (size_t i = size - 1; i < size; i--) { \ - dest[i] = src[i]; \ - ((__SYCL_GLOBAL__ char *)dest_shadow)[i] = \ - ((__SYCL_GLOBAL__ char *)src_shadow)[i]; \ - } \ - } else { \ - for (size_t i = 0; i < size; i++) { \ - dest[i] = src[i]; \ - ((__SYCL_GLOBAL__ char *)dest_shadow)[i] = \ - ((__SYCL_GLOBAL__ char *)src_shadow)[i]; \ - } \ - } \ - MSAN_DEBUG(__spirv_ocl_printf(__msan_print_memmove, dest, src, \ - dest_shadow, src_shadow, size)); \ + auto res = Memmove(dest, src, size); \ + MoveShadowAndOrigin(dest, src, size); \ MSAN_DEBUG(__spirv_ocl_printf(__msan_print_func_end, "__msan_memmove")); \ - return dest; \ + return res; \ } #define MSAN_MEMMOVE(dst_as) \ @@ -463,26 +466,16 @@ MSAN_MEMMOVE(1) MSAN_MEMMOVE(3) MSAN_MEMMOVE(4) -static __SYCL_CONSTANT__ const char __msan_print_memcpy[] = - "[kernel] memcpy(dst=%p, src=%p, shadow_dst=%p, shadow_src=%p, size=%p)\n"; - #define MSAN_MEMCPY_BASE(dst_as, src_as) \ DEVICE_EXTERN_C_NOINLINE __attribute__((address_space(dst_as))) void \ *__msan_memcpy_p##dst_as##_p##src_as( \ __attribute__((address_space(dst_as))) char *dest, \ __attribute__((address_space(src_as))) char *src, size_t size) { \ MSAN_DEBUG(__spirv_ocl_printf(__msan_print_func_beg, "__msan_memcpy")); \ - uptr dest_shadow = (uptr)__msan_get_shadow((uptr)dest, dst_as); \ - uptr src_shadow = (uptr)__msan_get_shadow((uptr)src, src_as); \ - for (size_t i = 0; i < size; i++) { \ - dest[i] = src[i]; \ - ((__SYCL_GLOBAL__ char *)dest_shadow)[i] = \ - ((__SYCL_GLOBAL__ char *)src_shadow)[i]; \ - } \ - MSAN_DEBUG(__spirv_ocl_printf(__msan_print_memmove, dest, src, \ - dest_shadow, src_shadow, size)); \ + auto res = Memcpy(dest, src, size); \ + CopyShadowAndOrigin(dest, src, size); \ MSAN_DEBUG(__spirv_ocl_printf(__msan_print_func_end, "__msan_memcpy")); \ - return dest; \ + return res; \ } #define MSAN_MEMCPY(dst_as) \ @@ -516,13 +509,10 @@ DEVICE_EXTERN_C_NOINLINE void __msan_poison_shadow_static_local(uptr ptr, MSAN_DEBUG(__spirv_ocl_printf(__msan_print_func_beg, "__msan_poison_shadow_static_local")); - auto shadow_address = (uptr)__msan_get_shadow(ptr, ADDRESS_SPACE_LOCAL); + auto shadow_address = MemToShadow(ptr, ADDRESS_SPACE_LOCAL); if (shadow_address == GetMsanLaunchInfo->CleanShadow) return; - - for (size_t i = 0; i < size; ++i) { - ((__SYCL_GLOBAL__ u8 *)shadow_address)[i] = 0xff; - } + Memset((__SYCL_GLOBAL__ char *)shadow_address, size, 0xff); MSAN_DEBUG(__spirv_ocl_printf(__mem_set_shadow_local, shadow_address, shadow_address + size, 0xff)); @@ -539,17 +529,9 @@ DEVICE_EXTERN_C_NOINLINE void __msan_unpoison_shadow_static_local(uptr ptr, 0) { if (!GetMsanLaunchInfo || GetMsanLaunchInfo->LocalShadowOffset == 0) return; - MSAN_DEBUG(__spirv_ocl_printf(__msan_print_func_beg, "__msan_unpoison_shadow_static_local")); - - auto shadow_address = (uptr)__msan_get_shadow(ptr, ADDRESS_SPACE_LOCAL); - for (size_t i = 0; i < size; ++i) { - ((__SYCL_GLOBAL__ u8 *)shadow_address)[i] = 0; - } - - MSAN_DEBUG(__spirv_ocl_printf(__mem_set_shadow_local, shadow_address, - shadow_address + size, 0)); + UnpoisonShadow(ptr, ADDRESS_SPACE_LOCAL, size); MSAN_DEBUG(__spirv_ocl_printf(__msan_print_func_end, "__msan_unpoison_shadow_static_local")); } @@ -646,14 +628,12 @@ DEVICE_EXTERN_C_NOINLINE void __msan_poison_stack(__SYCL_PRIVATE__ void *ptr, MSAN_DEBUG(__spirv_ocl_printf(__msan_print_func_beg, "__msan_poison_stack")); - auto shadow_address = - (uptr)__msan_get_shadow((uptr)ptr, ADDRESS_SPACE_PRIVATE); + auto shadow_address = MemToShadow((uptr)ptr, ADDRESS_SPACE_PRIVATE); MSAN_DEBUG(__spirv_ocl_printf(__msan_print_set_shadow_private, (void *)shadow_address, (void *)(shadow_address + size), 0xff)); - for (size_t i = 0; i < size; i++) - ((__SYCL_GLOBAL__ u8 *)shadow_address)[i] = 0xff; + Memset((__SYCL_GLOBAL__ char *)shadow_address, 0xff, size); MSAN_DEBUG(__spirv_ocl_printf(__msan_print_func_end, "__msan_poison_stack")); } @@ -666,14 +646,12 @@ DEVICE_EXTERN_C_NOINLINE void __msan_unpoison_stack(__SYCL_PRIVATE__ void *ptr, MSAN_DEBUG( __spirv_ocl_printf(__msan_print_func_beg, "__msan_unpoison_stack")); - auto shadow_address = - (uptr)__msan_get_shadow((uptr)ptr, ADDRESS_SPACE_PRIVATE); + auto shadow_address = MemToShadow((uptr)ptr, ADDRESS_SPACE_PRIVATE); MSAN_DEBUG(__spirv_ocl_printf(__msan_print_set_shadow_private, (void *)shadow_address, (void *)(shadow_address + size), 0x0)); - for (size_t i = 0; i < size; i++) - ((__SYCL_GLOBAL__ u8 *)shadow_address)[i] = 0; + Memset((__SYCL_GLOBAL__ char *)shadow_address, 0, size); MSAN_DEBUG( __spirv_ocl_printf(__msan_print_func_end, "__msan_unpoison_stack")); From ed84d2c7a0964f44c9272952df49d57bce3f0205 Mon Sep 17 00:00:00 2001 From: "Zhao, Yang2" Date: Mon, 16 Jun 2025 04:53:44 +0200 Subject: [PATCH 08/26] wip --- .../layers/sanitizer/asan/asan_shadow.cpp | 7 ++-- .../loader/layers/sanitizer/msan/msan_ddi.cpp | 39 ++++++------------- .../sanitizer/msan/msan_interceptor.cpp | 4 +- .../layers/sanitizer/msan/msan_libdevice.hpp | 2 + .../layers/sanitizer/msan/msan_shadow.cpp | 4 +- .../sanitizer_common/sanitizer_utils.cpp | 9 ----- .../sanitizer_common/sanitizer_utils.hpp | 12 ++++-- .../layers/sanitizer/tsan/tsan_shadow.cpp | 8 ++-- 8 files changed, 36 insertions(+), 49 deletions(-) diff --git a/unified-runtime/source/loader/layers/sanitizer/asan/asan_shadow.cpp b/unified-runtime/source/loader/layers/sanitizer/asan/asan_shadow.cpp index 01b9366e33118..8c8e26639f4a3 100644 --- a/unified-runtime/source/loader/layers/sanitizer/asan/asan_shadow.cpp +++ b/unified-runtime/source/loader/layers/sanitizer/asan/asan_shadow.cpp @@ -210,7 +210,8 @@ ur_result_t ShadowMemoryGPU::EnqueuePoisonShadow(ur_queue_handle_t Queue, (void *)MappedPtr, (void *)(MappedPtr + PageSize - 1)); // Initialize to zero - URes = EnqueueUSMBlockingSet(Queue, (void *)MappedPtr, 0, PageSize); + URes = + EnqueueUSMBlockingSet(Queue, (void *)MappedPtr, (char)0, PageSize); if (URes != UR_RESULT_SUCCESS) { UR_LOG_L(getContext()->logger, ERR, "EnqueueUSMBlockingSet(): {}", URes); @@ -260,7 +261,7 @@ ur_result_t ShadowMemoryGPU::AllocLocalShadow(ur_queue_handle_t Queue, // Initialize shadow memory ur_result_t URes = EnqueueUSMBlockingSet(Queue, (void *)LocalShadowOffset, - 0, RequiredShadowSize); + (char)0, RequiredShadowSize); if (URes != UR_RESULT_SUCCESS) { UR_CALL(getContext()->urDdiTable.USM.pfnFree(Context, (void *)LocalShadowOffset)); @@ -311,7 +312,7 @@ ur_result_t ShadowMemoryGPU::AllocPrivateShadow(ur_queue_handle_t Queue, // Initialize shadow memory ur_result_t URes = EnqueueUSMBlockingSet( - Queue, (void *)PrivateShadowOffset, 0, RequiredShadowSize); + Queue, (void *)PrivateShadowOffset, (char)0, RequiredShadowSize); if (URes != UR_RESULT_SUCCESS) { UR_CALL(getContext()->urDdiTable.USM.pfnFree( Context, (void *)PrivateShadowOffset)); diff --git a/unified-runtime/source/loader/layers/sanitizer/msan/msan_ddi.cpp b/unified-runtime/source/loader/layers/sanitizer/msan/msan_ddi.cpp index 7d3d31d28dd70..682384958479b 100644 --- a/unified-runtime/source/loader/layers/sanitizer/msan/msan_ddi.cpp +++ b/unified-runtime/source/loader/layers/sanitizer/msan/msan_ddi.cpp @@ -12,6 +12,7 @@ */ #include "msan_ddi.hpp" +#include "msan/msan_libdevice.hpp" #include "msan_interceptor.hpp" #include "sanitizer_common/sanitizer_utils.hpp" #include "ur_sanitizer_layer.hpp" @@ -1478,22 +1479,13 @@ ur_result_t UR_APICALL urEnqueueUSMFill( const auto &DeviceInfo = getMsanInterceptor()->getDeviceInfo(Device); const auto MemShadow = DeviceInfo->Shadow->MemToShadow((uptr)pMem); - Event = nullptr; - UR_CALL(EnqueueUSMBlockingSet(hQueue, (void *)MemShadow, 0, size, 0, + ur_event_handle_t Event{}; + UR_CALL(EnqueueUSMBlockingSet(hQueue, (void *)MemShadow, (char)0, size, 0, nullptr, &Event)); Events.push_back(Event); } - { - ur_device_handle_t Device = GetDevice(hQueue); - const auto &DeviceInfo = getMsanInterceptor()->getDeviceInfo(Device); - const auto MemShadow = DeviceInfo->Shadow->MemToOrigin((uptr)pMem); - - Event = nullptr; - UR_CALL(EnqueueUSMBlockingSet(hQueue, (void *)MemShadow, 0, size, 0, - nullptr, &Event)); - Events.push_back(Event); - } + // NOTE: No need to set origin, since its shadow is clean if (phEvent) { UR_CALL(getContext()->urDdiTable.Enqueue.pfnEventsWait( @@ -1554,12 +1546,16 @@ ur_result_t UR_APICALL urEnqueueUSMMemcpy( { ur_device_handle_t Device = GetDevice(hQueue); const auto &DeviceInfo = getMsanInterceptor()->getDeviceInfo(Device); - const auto SrcOrigin = DeviceInfo->Shadow->MemToOrigin((uptr)pSrc); + const auto SrcOriginBegin = DeviceInfo->Shadow->MemToOrigin((uptr)pSrc); + const auto SrcOriginEnd = + DeviceInfo->Shadow->MemToOrigin((uptr)pSrc + size - 1) + + MSAN_ORIGIN_GRANULARITY; const auto DstOrigin = DeviceInfo->Shadow->MemToOrigin((uptr)pDst); ur_event_handle_t Event{}; - UR_CALL(pfnUSMMemcpy(hQueue, blocking, (void *)DstOrigin, (void *)SrcOrigin, - size, 0, nullptr, &Event)); + UR_CALL(pfnUSMMemcpy(hQueue, blocking, (void *)DstOrigin, + (void *)SrcOriginBegin, SrcOriginEnd - SrcOriginBegin, + 0, nullptr, &Event)); Events.push_back(Event); } @@ -1625,18 +1621,7 @@ ur_result_t UR_APICALL urEnqueueUSMFill2D( Events.push_back(Event); } - // FIXME: align to 4 bytes - { - ur_device_handle_t Device = GetDevice(hQueue); - const auto &DeviceInfo = getMsanInterceptor()->getDeviceInfo(Device); - const auto MemOrigin = DeviceInfo->Shadow->MemToOrigin((uptr)pMem); - - const char Pattern = 0; - ur_event_handle_t Event{}; - UR_CALL(pfnUSMFill2D(hQueue, (void *)MemOrigin, pitch, 1, &Pattern, width, - height, 0, nullptr, &Event)); - Events.push_back(Event); - } + // NOTE: No need to set origin, since its shadow is clean if (phEvent) { UR_CALL(getContext()->urDdiTable.Enqueue.pfnEventsWait( diff --git a/unified-runtime/source/loader/layers/sanitizer/msan/msan_interceptor.cpp b/unified-runtime/source/loader/layers/sanitizer/msan/msan_interceptor.cpp index 6f52adce424b1..5016593ea4185 100644 --- a/unified-runtime/source/loader/layers/sanitizer/msan/msan_interceptor.cpp +++ b/unified-runtime/source/loader/layers/sanitizer/msan/msan_interceptor.cpp @@ -499,8 +499,8 @@ ur_result_t MsanInterceptor::prepareLaunch( ContextInfo->CleanShadowSize, (void **)&LaunchInfo.Data.Host.CleanShadow)); UR_CALL(EnqueueUSMBlockingSet(Queue, (void *)LaunchInfo.Data.Host.CleanShadow, - 0, ContextInfo->CleanShadowSize, 0, nullptr, - nullptr)); + (char)0, ContextInfo->CleanShadowSize, 0, + nullptr, nullptr)); if (LaunchInfo.LocalWorkSize.empty()) { LaunchInfo.LocalWorkSize.resize(LaunchInfo.WorkDim); diff --git a/unified-runtime/source/loader/layers/sanitizer/msan/msan_libdevice.hpp b/unified-runtime/source/loader/layers/sanitizer/msan/msan_libdevice.hpp index 1f46f9c3e4769..6d2f6917f5d2f 100644 --- a/unified-runtime/source/loader/layers/sanitizer/msan/msan_libdevice.hpp +++ b/unified-runtime/source/loader/layers/sanitizer/msan/msan_libdevice.hpp @@ -19,6 +19,8 @@ namespace ur_sanitizer_layer { #endif // !__SPIR__ && !__SPIRV__ +constexpr unsigned MSAN_ORIGIN_GRANULARITY = 4U; + struct MsanErrorReport { int Flag = 0; diff --git a/unified-runtime/source/loader/layers/sanitizer/msan/msan_shadow.cpp b/unified-runtime/source/loader/layers/sanitizer/msan/msan_shadow.cpp index 7690862f183f2..c5a5fa8f80fe8 100644 --- a/unified-runtime/source/loader/layers/sanitizer/msan/msan_shadow.cpp +++ b/unified-runtime/source/loader/layers/sanitizer/msan/msan_shadow.cpp @@ -343,7 +343,7 @@ ur_result_t MsanShadowMemoryGPU::AllocLocalShadow(ur_queue_handle_t Queue, // Initialize shadow memory ur_result_t URes = EnqueueUSMBlockingSet(Queue, (void *)LocalShadowOffset, - 0, RequiredShadowSize); + (char)0, RequiredShadowSize); if (URes != UR_RESULT_SUCCESS) { UR_CALL(getContext()->urDdiTable.USM.pfnFree(Context, (void *)LocalShadowOffset)); @@ -391,7 +391,7 @@ ur_result_t MsanShadowMemoryGPU::AllocPrivateShadow(ur_queue_handle_t Queue, // Initialize shadow memory ur_result_t URes = EnqueueUSMBlockingSet( - Queue, (void *)PrivateShadowOffset, 0, RequiredShadowSize); + Queue, (void *)PrivateShadowOffset, (char)0, RequiredShadowSize); if (URes != UR_RESULT_SUCCESS) { UR_CALL(getContext()->urDdiTable.USM.pfnFree( Context, (void *)PrivateShadowOffset)); diff --git a/unified-runtime/source/loader/layers/sanitizer/sanitizer_common/sanitizer_utils.cpp b/unified-runtime/source/loader/layers/sanitizer/sanitizer_common/sanitizer_utils.cpp index b7e7fe41e48b5..e8740966c3648 100644 --- a/unified-runtime/source/loader/layers/sanitizer/sanitizer_common/sanitizer_utils.cpp +++ b/unified-runtime/source/loader/layers/sanitizer/sanitizer_common/sanitizer_utils.cpp @@ -13,7 +13,6 @@ #include "sanitizer_utils.hpp" #include "sanitizer_common/sanitizer_common.hpp" -#include "ur_sanitizer_layer.hpp" namespace ur_sanitizer_layer { @@ -249,14 +248,6 @@ size_t GetVirtualMemGranularity(ur_context_handle_t Context, return Size; } -ur_result_t EnqueueUSMBlockingSet(ur_queue_handle_t Queue, void *Ptr, - char Value, size_t Size, uint32_t NumEvents, - const ur_event_handle_t *EventWaitList, - ur_event_handle_t *OutEvent) { - return getContext()->urDdiTable.Enqueue.pfnUSMFill( - Queue, Ptr, 1, &Value, Size, NumEvents, EventWaitList, OutEvent); -} - void PrintUrBuildLogIfError(ur_result_t Result, ur_program_handle_t Program, ur_device_handle_t *Devices, size_t NumDevices) { if (Result == UR_RESULT_SUCCESS || diff --git a/unified-runtime/source/loader/layers/sanitizer/sanitizer_common/sanitizer_utils.hpp b/unified-runtime/source/loader/layers/sanitizer/sanitizer_common/sanitizer_utils.hpp index b46544737d16d..5ff0434e80847 100644 --- a/unified-runtime/source/loader/layers/sanitizer/sanitizer_common/sanitizer_utils.hpp +++ b/unified-runtime/source/loader/layers/sanitizer/sanitizer_common/sanitizer_utils.hpp @@ -15,6 +15,7 @@ #include "sanitizer_libdevice.hpp" #include "ur_api.h" +#include "ur_sanitizer_layer.hpp" #include #include @@ -59,11 +60,16 @@ size_t GetKernelPrivateMemorySize(ur_kernel_handle_t Kernel, size_t GetVirtualMemGranularity(ur_context_handle_t Context, ur_device_handle_t Device); +template ur_result_t -EnqueueUSMBlockingSet(ur_queue_handle_t Queue, void *Ptr, char Value, - size_t Size, uint32_t NumEvents = 0, +EnqueueUSMBlockingSet(ur_queue_handle_t Queue, void *Ptr, T Value, size_t Size, + uint32_t NumEvents = 0, const ur_event_handle_t *EventWaitList = nullptr, - ur_event_handle_t *OutEvent = nullptr); + ur_event_handle_t *OutEvent = nullptr) { + assert(Size % sizeof(T) == 0); + return getContext()->urDdiTable.Enqueue.pfnUSMFill( + Queue, Ptr, sizeof(T), &Value, Size, NumEvents, EventWaitList, OutEvent); +} void PrintUrBuildLogIfError(ur_result_t Result, ur_program_handle_t Program, ur_device_handle_t *Devices, size_t NumDevices); diff --git a/unified-runtime/source/loader/layers/sanitizer/tsan/tsan_shadow.cpp b/unified-runtime/source/loader/layers/sanitizer/tsan/tsan_shadow.cpp index 495de6bf48e63..518d31cba72fb 100644 --- a/unified-runtime/source/loader/layers/sanitizer/tsan/tsan_shadow.cpp +++ b/unified-runtime/source/loader/layers/sanitizer/tsan/tsan_shadow.cpp @@ -165,7 +165,8 @@ ur_result_t ShadowMemoryGPU::CleanShadow(ur_queue_handle_t Queue, uptr Ptr, (void *)MappedPtr, (void *)(MappedPtr + PageSize - 1)); // Initialize to zero - URes = EnqueueUSMBlockingSet(Queue, (void *)MappedPtr, 0, PageSize); + URes = + EnqueueUSMBlockingSet(Queue, (void *)MappedPtr, (char)0, PageSize); if (URes != UR_RESULT_SUCCESS) { UR_LOG_L(getContext()->logger, ERR, "EnqueueUSMBlockingSet(): {}", URes); @@ -177,8 +178,9 @@ ur_result_t ShadowMemoryGPU::CleanShadow(ur_queue_handle_t Queue, uptr Ptr, } } - auto URes = EnqueueUSMBlockingSet( - Queue, (void *)Begin, 0, Size / kShadowCell * kShadowCnt * kShadowSize); + auto URes = + EnqueueUSMBlockingSet(Queue, (void *)Begin, (char)0, + Size / kShadowCell * kShadowCnt * kShadowSize); if (URes != UR_RESULT_SUCCESS) { UR_LOG_L(getContext()->logger, ERR, "EnqueueUSMBlockingSet(): {}", URes); return URes; From 6e3773586a92078a9b3a0950d842c9a8e9a82ea4 Mon Sep 17 00:00:00 2001 From: "Zhao, Yang2" Date: Mon, 16 Jun 2025 13:40:55 +0200 Subject: [PATCH 09/26] wip --- libdevice/atomic.hpp | 9 +++ libdevice/sanitizer/msan_rtl.cpp | 71 +++++++++++++------ .../loader/layers/sanitizer/msan/msan_ddi.cpp | 30 +++++--- .../layers/sanitizer/msan/msan_shadow.cpp | 10 ++- .../layers/sanitizer/msan/msan_shadow.hpp | 4 +- 5 files changed, 84 insertions(+), 40 deletions(-) diff --git a/libdevice/atomic.hpp b/libdevice/atomic.hpp index ea3e725ebe48d..508cb1e5964d9 100644 --- a/libdevice/atomic.hpp +++ b/libdevice/atomic.hpp @@ -75,6 +75,9 @@ extern DEVICE_EXTERNAL void __spirv_AtomicStore(int *, int, int, int) noexcept; extern DEVICE_EXTERNAL int __spirv_AtomicIAdd(SPIR_GLOBAL int *, int, int, int) noexcept; +extern DEVICE_EXTERNAL int __spirv_AtomicIAdd(SPIR_GLOBAL unsigned int *, int, + int, int) noexcept; + /// Atomically set the value in *Ptr with Desired if and only if it is Expected /// Return the value which already was in *Ptr static inline int atomicCompareAndSet(SPIR_GLOBAL int *Ptr, int Desired, @@ -113,4 +116,10 @@ static inline int atomicAdd(SPIR_GLOBAL int *Ptr, int V) { V); } +static inline int atomicAdd(SPIR_GLOBAL unsigned int *Ptr, int V) { + return __spirv_AtomicIAdd(Ptr, __spv::Scope::Device, + __spv::MemorySemanticsMask::SequentiallyConsistent, + V); +} + #endif // __SPIR__ || __SPIRV__ diff --git a/libdevice/sanitizer/msan_rtl.cpp b/libdevice/sanitizer/msan_rtl.cpp index f4f965d041c45..2dfb725c72905 100644 --- a/libdevice/sanitizer/msan_rtl.cpp +++ b/libdevice/sanitizer/msan_rtl.cpp @@ -248,7 +248,9 @@ inline uptr MemToShadow(uptr addr, uint32_t as) { inline uptr MemToOrigin_CPU(uptr addr) { return addr ^ 0x500000000000ULL; } -inline uptr MemToOrigin_DG2(uptr addr, uint32_t as) { return 0; } +inline uptr MemToOrigin_DG2(uptr addr, uint32_t as) { + return GetMsanLaunchInfo->CleanShadow; +} inline uptr MemToOrigin_PVC(uptr addr, uint32_t as) { if (as == ADDRESS_SPACE_GENERIC) { @@ -317,20 +319,45 @@ void GroupAsyncCopy(uptr Dest, uptr Src, size_t NumElements, size_t Stride) { static __SYCL_CONSTANT__ const char __msan_print_memcpy[] = "[kernel] memcpy(dst=%p, src=%p, shadow_dst=%p, shadow_src=%p, size=%p)\n"; -template -inline void -CopyShadowAndOrigin(__attribute__((address_space(dst_as))) char *dst, - __attribute__((address_space(src_as))) char *src, - size_t size) {} +// FIXME: The original implemention only copies the origin of poisoned memories +void CopyOrigin(uptr dst, uint32_t dst_as, uptr src, uint32_t src_as, + uptr size) { + auto *src_beg = (__SYCL_GLOBAL__ char *)MemToOrigin(src, src_as); + auto *src_end = (__SYCL_GLOBAL__ char *)MemToOrigin(src + size - 1, src_as) + + MSAN_ORIGIN_GRANULARITY; + auto *dst_beg = (__SYCL_GLOBAL__ char *)MemToOrigin(dst, dst_as); + Memcpy(dst_beg, src_beg, src_end - src_beg); +} + +inline void CopyShadowAndOrigin(uptr dst, uint32_t dst_as, uptr src, + uint32_t src_as, size_t size) { + auto *shadow_dst = (__SYCL_GLOBAL__ char *)MemToShadow(dst, dst_as); + auto *shadow_src = (__SYCL_GLOBAL__ char *)MemToShadow(src, src_as); + Memcpy(shadow_dst, shadow_src, size); + CopyOrigin(dst, dst_as, src, src_as, size); +} static __SYCL_CONSTANT__ const char __msan_print_memmove[] = "[kernel] memmove(dst=%p, src=%p, shadow_dst=%p, shadow_src=%p, size=%p)\n"; -template -inline void -MoveShadowAndOrigin(__attribute__((address_space(dst_as))) char *dst, - __attribute__((address_space(src_as))) char *src, - size_t size) {} +// FIXME: The original implemention only moves the origin of poisoned memories +void MoveOrigin(uptr dst, uint32_t dst_as, uptr src, uint32_t src_as, + uptr size) { + auto *src_beg = (__SYCL_GLOBAL__ char *)MemToOrigin(src, src_as); + auto *src_end = (__SYCL_GLOBAL__ char *)MemToOrigin(src + size - 1, src_as) + + MSAN_ORIGIN_GRANULARITY; + auto *dst_beg = (__SYCL_GLOBAL__ char *)MemToOrigin(dst, dst_as); + Memmove(dst_beg, src_beg, src_end - src_beg); +} + +inline void MoveShadowAndOrigin(uptr dst, uint32_t dst_as, uptr src, + uint32_t src_as, size_t size) { + auto *shadow_dst = (__SYCL_GLOBAL__ char *)MemToShadow(dst, dst_as); + auto *shadow_src = (__SYCL_GLOBAL__ char *)MemToShadow(src, src_as); + // MoveOrigin transfers origins by refering to their shadows + MoveOrigin(dst, dst_as, src, src_as, size); + Memmove(shadow_dst, shadow_src, size); +} inline void UnpoisonShadow(uptr addr, uint32_t as, size_t size) { auto *shadow_ptr = (__SYCL_GLOBAL__ char *)MemToShadow(addr, as); @@ -449,7 +476,7 @@ MSAN_MEMSET(4) __attribute__((address_space(src_as))) char *src, size_t size) { \ MSAN_DEBUG(__spirv_ocl_printf(__msan_print_func_beg, "__msan_memmove")); \ auto res = Memmove(dest, src, size); \ - MoveShadowAndOrigin(dest, src, size); \ + MoveShadowAndOrigin((uptr)dest, dst_as, (uptr)src, src_as, size); \ MSAN_DEBUG(__spirv_ocl_printf(__msan_print_func_end, "__msan_memmove")); \ return res; \ } @@ -473,7 +500,7 @@ MSAN_MEMMOVE(4) __attribute__((address_space(src_as))) char *src, size_t size) { \ MSAN_DEBUG(__spirv_ocl_printf(__msan_print_func_beg, "__msan_memcpy")); \ auto res = Memcpy(dest, src, size); \ - CopyShadowAndOrigin(dest, src, size); \ + CopyShadowAndOrigin((uptr)dest, dst_as, (uptr)src, src_as, size); \ MSAN_DEBUG(__spirv_ocl_printf(__msan_print_func_end, "__msan_memcpy")); \ return res; \ } @@ -711,16 +738,14 @@ __msan_unpoison_strided_copy(uptr dest, uint32_t dest_as, uptr src, "__msan_unpoison_strided_copy")); } -DEVICE_EXTERN_C_NOINLINE void -__msan_set_alloca_origin_no_descr(void *a, uptr size, - __SYCL_GLOBAL__ u32 *id_ptr) { - // SetAllocaOrigin(a, size, id_ptr, nullptr, GET_CALLER_PC()); -} +// FIXME: not support origin tracking for private memory +DEVICE_EXTERN_C_NOINLINE void __msan_set_alloca_origin_no_descr( + [[maybe_unused]] void *a, [[maybe_unused]] uptr size, + [[maybe_unused]] __SYCL_GLOBAL__ u32 *id_ptr) {} -DEVICE_EXTERN_C_NOINLINE void -__msan_set_alloca_origin_with_descr(void *a, uptr size, - __SYCL_GLOBAL__ u32 *id_ptr, char *descr) { - // SetAllocaOrigin(a, size, id_ptr, descr, GET_CALLER_PC()); -} +// FIXME: not support origin tracking for private memory +DEVICE_EXTERN_C_NOINLINE void __msan_set_alloca_origin_with_descr( + [[maybe_unused]] void *a, [[maybe_unused]] uptr size, + [[maybe_unused]] __SYCL_GLOBAL__ u32 *id_ptr, char *descr) {} #endif // __SPIR__ || __SPIRV__ diff --git a/unified-runtime/source/loader/layers/sanitizer/msan/msan_ddi.cpp b/unified-runtime/source/loader/layers/sanitizer/msan/msan_ddi.cpp index 682384958479b..edf1094555e11 100644 --- a/unified-runtime/source/loader/layers/sanitizer/msan/msan_ddi.cpp +++ b/unified-runtime/source/loader/layers/sanitizer/msan/msan_ddi.cpp @@ -1688,18 +1688,32 @@ ur_result_t UR_APICALL urEnqueueUSMMemcpy2D( Events.push_back(Event); } - // FIXME: align to 4 bytes { ur_device_handle_t Device = GetDevice(hQueue); const auto &DeviceInfo = getMsanInterceptor()->getDeviceInfo(Device); - const auto SrcOrigin = DeviceInfo->Shadow->MemToOrigin((uptr)pSrc); - const auto DstOrigin = DeviceInfo->Shadow->MemToOrigin((uptr)pDst); - ur_event_handle_t Event{}; - UR_CALL(pfnUSMMemcpy2D(hQueue, blocking, (void *)DstOrigin, dstPitch, - (void *)SrcOrigin, srcPitch, width, height, 0, - nullptr, &Event)); - Events.push_back(Event); + auto pfnUSMMemcpy = getContext()->urDdiTable.Enqueue.pfnUSMMemcpy; + + std::vector WaitEvents(numEventsInWaitList); + for (uint32_t i = 0; i < numEventsInWaitList; i++) { + WaitEvents[i] = phEventWaitList[i]; + } + + for (size_t HeightIndex = 0; HeightIndex < height; HeightIndex++) { + ur_event_handle_t Event{}; + const auto DstOrigin = + DeviceInfo->Shadow->MemToOrigin((uptr)pDst + dstPitch * HeightIndex); + const auto SrcOrigin = + DeviceInfo->Shadow->MemToOrigin((uptr)pSrc + srcPitch * HeightIndex); + const auto SrcOriginEnd = + DeviceInfo->Shadow->MemToOrigin((uptr)pSrc + srcPitch * HeightIndex + + width - 1) + + MSAN_ORIGIN_GRANULARITY; + pfnUSMMemcpy(hQueue, false, (void *)DstOrigin, (void *)SrcOrigin, + SrcOriginEnd - SrcOrigin, WaitEvents.size(), + WaitEvents.data(), &Event); + Events.push_back(Event); + } } if (phEvent) { diff --git a/unified-runtime/source/loader/layers/sanitizer/msan/msan_shadow.cpp b/unified-runtime/source/loader/layers/sanitizer/msan/msan_shadow.cpp index c5a5fa8f80fe8..bc680e67c68b6 100644 --- a/unified-runtime/source/loader/layers/sanitizer/msan/msan_shadow.cpp +++ b/unified-runtime/source/loader/layers/sanitizer/msan/msan_shadow.cpp @@ -195,7 +195,7 @@ ur_result_t MsanShadowMemoryGPU::Destory() { } ur_result_t MsanShadowMemoryGPU::EnqueueVirtualMemMap( - ur_queue_handle_t Queue, uptr VirtualBegin, uptr VirtualEnd, + uptr VirtualBegin, uptr VirtualEnd, std::vector &EventWaitList, ur_event_handle_t *OutEvent) { const size_t PageSize = GetVirtualMemGranularity(Context, Device); @@ -262,8 +262,7 @@ ur_result_t MsanShadowMemoryGPU::EnqueuePoisonShadowWithOrigin( uptr ShadowEnd = MemToShadow(Ptr + Size - 1); assert(ShadowBegin <= ShadowEnd); - UR_CALL( - EnqueueVirtualMemMap(Queue, ShadowBegin, ShadowEnd, Events, OutEvent)); + UR_CALL(EnqueueVirtualMemMap(ShadowBegin, ShadowEnd, Events, OutEvent)); UR_LOG_L(getContext()->logger, DEBUG, "EnqueuePoisonShadow(addr={}, size={}, value={})", @@ -278,8 +277,7 @@ ur_result_t MsanShadowMemoryGPU::EnqueuePoisonShadowWithOrigin( { uptr OriginBegin = MemToOrigin(Ptr); uptr OriginEnd = MemToOrigin(Ptr + Size - 1) + sizeof(Origin) - 1; - UR_CALL( - EnqueueVirtualMemMap(Queue, OriginBegin, OriginEnd, Events, OutEvent)); + UR_CALL(EnqueueVirtualMemMap(OriginBegin, OriginEnd, Events, OutEvent)); UR_LOG_L(getContext()->logger, DEBUG, "EnqueuePoisonOrigin(addr={}, size={}, value={})", @@ -419,7 +417,7 @@ uptr MsanShadowMemoryPVC::MemToShadow(uptr Ptr) { } uptr MsanShadowMemoryPVC::MemToOrigin(uptr Ptr) { - uptr AlignedPtr = Ptr & ~3ULL; + uptr AlignedPtr = RoundDownTo(Ptr, MSAN_ORIGIN_GRANULARITY); if (MsanShadowMemoryPVC::isDeviceUSM(AlignedPtr)) { return AlignedPtr - 0xA000'0000'0000ULL; } diff --git a/unified-runtime/source/loader/layers/sanitizer/msan/msan_shadow.hpp b/unified-runtime/source/loader/layers/sanitizer/msan/msan_shadow.hpp index 0e7318b086561..c6f94c3725f02 100644 --- a/unified-runtime/source/loader/layers/sanitizer/msan/msan_shadow.hpp +++ b/unified-runtime/source/loader/layers/sanitizer/msan/msan_shadow.hpp @@ -157,8 +157,7 @@ struct MsanShadowMemoryGPU : public MsanShadowMemory { private: ur_result_t - EnqueueVirtualMemMap(ur_queue_handle_t Queue, uptr VirtualBegin, - uptr VirtualEnd, + EnqueueVirtualMemMap(uptr VirtualBegin, uptr VirtualEnd, std::vector &EventWaitList, ur_event_handle_t *OutEvent); @@ -169,7 +168,6 @@ struct MsanShadowMemoryGPU : public MsanShadowMemory { ur_mutex VirtualMemMapsMutex; uptr LocalShadowOffset = 0; - uptr PrivateShadowOffset = 0; }; From 3564364e339f587cd0de6128e0fa541ebed6400b Mon Sep 17 00:00:00 2001 From: "Zhao, Yang2" Date: Mon, 16 Jun 2025 13:50:11 +0200 Subject: [PATCH 10/26] clean code --- unified-runtime/source/loader/CMakeLists.txt | 22 +++++++++---------- .../loader/layers/sanitizer/msan/msan_ddi.cpp | 12 +++++----- .../sanitizer/msan/msan_interceptor.cpp | 11 ++++------ .../layers/sanitizer/msan/msan_shadow.cpp | 8 +++---- .../layers/sanitizer/msan/msan_shadow.hpp | 4 ++-- 5 files changed, 27 insertions(+), 30 deletions(-) diff --git a/unified-runtime/source/loader/CMakeLists.txt b/unified-runtime/source/loader/CMakeLists.txt index 4928399462e09..8e1e9aa9d3604 100644 --- a/unified-runtime/source/loader/CMakeLists.txt +++ b/unified-runtime/source/loader/CMakeLists.txt @@ -170,6 +170,17 @@ if(UR_ENABLE_SANITIZER) ${CMAKE_CURRENT_SOURCE_DIR}/layers/sanitizer/msan/msan_report.hpp ${CMAKE_CURRENT_SOURCE_DIR}/layers/sanitizer/msan/msan_shadow.cpp ${CMAKE_CURRENT_SOURCE_DIR}/layers/sanitizer/msan/msan_shadow.hpp + ${CMAKE_CURRENT_SOURCE_DIR}/layers/sanitizer/tsan/tsan_buffer.cpp + ${CMAKE_CURRENT_SOURCE_DIR}/layers/sanitizer/tsan/tsan_buffer.hpp + ${CMAKE_CURRENT_SOURCE_DIR}/layers/sanitizer/tsan/tsan_ddi.cpp + ${CMAKE_CURRENT_SOURCE_DIR}/layers/sanitizer/tsan/tsan_ddi.hpp + ${CMAKE_CURRENT_SOURCE_DIR}/layers/sanitizer/tsan/tsan_interceptor.cpp + ${CMAKE_CURRENT_SOURCE_DIR}/layers/sanitizer/tsan/tsan_interceptor.hpp + ${CMAKE_CURRENT_SOURCE_DIR}/layers/sanitizer/tsan/tsan_libdevice.hpp + ${CMAKE_CURRENT_SOURCE_DIR}/layers/sanitizer/tsan/tsan_report.cpp + ${CMAKE_CURRENT_SOURCE_DIR}/layers/sanitizer/tsan/tsan_report.hpp + ${CMAKE_CURRENT_SOURCE_DIR}/layers/sanitizer/tsan/tsan_shadow.cpp + ${CMAKE_CURRENT_SOURCE_DIR}/layers/sanitizer/tsan/tsan_shadow.hpp ${CMAKE_CURRENT_SOURCE_DIR}/layers/sanitizer/sanitizer_common/linux/backtrace.cpp ${CMAKE_CURRENT_SOURCE_DIR}/layers/sanitizer/sanitizer_common/linux/sanitizer_utils.cpp ${CMAKE_CURRENT_SOURCE_DIR}/layers/sanitizer/sanitizer_common/sanitizer_allocator.cpp @@ -184,17 +195,6 @@ if(UR_ENABLE_SANITIZER) ${CMAKE_CURRENT_SOURCE_DIR}/layers/sanitizer/sanitizer_common/sanitizer_stacktrace.hpp ${CMAKE_CURRENT_SOURCE_DIR}/layers/sanitizer/sanitizer_common/sanitizer_utils.cpp ${CMAKE_CURRENT_SOURCE_DIR}/layers/sanitizer/sanitizer_common/sanitizer_utils.hpp - ${CMAKE_CURRENT_SOURCE_DIR}/layers/sanitizer/tsan/tsan_buffer.cpp - ${CMAKE_CURRENT_SOURCE_DIR}/layers/sanitizer/tsan/tsan_buffer.hpp - ${CMAKE_CURRENT_SOURCE_DIR}/layers/sanitizer/tsan/tsan_ddi.cpp - ${CMAKE_CURRENT_SOURCE_DIR}/layers/sanitizer/tsan/tsan_ddi.hpp - ${CMAKE_CURRENT_SOURCE_DIR}/layers/sanitizer/tsan/tsan_interceptor.cpp - ${CMAKE_CURRENT_SOURCE_DIR}/layers/sanitizer/tsan/tsan_interceptor.hpp - ${CMAKE_CURRENT_SOURCE_DIR}/layers/sanitizer/tsan/tsan_libdevice.hpp - ${CMAKE_CURRENT_SOURCE_DIR}/layers/sanitizer/tsan/tsan_report.cpp - ${CMAKE_CURRENT_SOURCE_DIR}/layers/sanitizer/tsan/tsan_report.hpp - ${CMAKE_CURRENT_SOURCE_DIR}/layers/sanitizer/tsan/tsan_shadow.cpp - ${CMAKE_CURRENT_SOURCE_DIR}/layers/sanitizer/tsan/tsan_shadow.hpp ${CMAKE_CURRENT_SOURCE_DIR}/layers/sanitizer/ur_sanddi.cpp ${CMAKE_CURRENT_SOURCE_DIR}/layers/sanitizer/ur_sanitizer_layer.cpp ${CMAKE_CURRENT_SOURCE_DIR}/layers/sanitizer/ur_sanitizer_layer.hpp diff --git a/unified-runtime/source/loader/layers/sanitizer/msan/msan_ddi.cpp b/unified-runtime/source/loader/layers/sanitizer/msan/msan_ddi.cpp index edf1094555e11..db5f170f2e8fe 100644 --- a/unified-runtime/source/loader/layers/sanitizer/msan/msan_ddi.cpp +++ b/unified-runtime/source/loader/layers/sanitizer/msan/msan_ddi.cpp @@ -1479,7 +1479,7 @@ ur_result_t UR_APICALL urEnqueueUSMFill( const auto &DeviceInfo = getMsanInterceptor()->getDeviceInfo(Device); const auto MemShadow = DeviceInfo->Shadow->MemToShadow((uptr)pMem); - ur_event_handle_t Event{}; + ur_event_handle_t Event = nullptr; UR_CALL(EnqueueUSMBlockingSet(hQueue, (void *)MemShadow, (char)0, size, 0, nullptr, &Event)); Events.push_back(Event); @@ -1537,7 +1537,7 @@ ur_result_t UR_APICALL urEnqueueUSMMemcpy( const auto SrcShadow = DeviceInfo->Shadow->MemToShadow((uptr)pSrc); const auto DstShadow = DeviceInfo->Shadow->MemToShadow((uptr)pDst); - ur_event_handle_t Event{}; + ur_event_handle_t Event = nullptr; UR_CALL(pfnUSMMemcpy(hQueue, blocking, (void *)DstShadow, (void *)SrcShadow, size, 0, nullptr, &Event)); Events.push_back(Event); @@ -1552,7 +1552,7 @@ ur_result_t UR_APICALL urEnqueueUSMMemcpy( MSAN_ORIGIN_GRANULARITY; const auto DstOrigin = DeviceInfo->Shadow->MemToOrigin((uptr)pDst); - ur_event_handle_t Event{}; + ur_event_handle_t Event = nullptr; UR_CALL(pfnUSMMemcpy(hQueue, blocking, (void *)DstOrigin, (void *)SrcOriginBegin, SrcOriginEnd - SrcOriginBegin, 0, nullptr, &Event)); @@ -1615,7 +1615,7 @@ ur_result_t UR_APICALL urEnqueueUSMFill2D( const auto MemShadow = DeviceInfo->Shadow->MemToShadow((uptr)pMem); const char Pattern = 0; - ur_event_handle_t Event{}; + ur_event_handle_t Event = nullptr; UR_CALL(pfnUSMFill2D(hQueue, (void *)MemShadow, pitch, 1, &Pattern, width, height, 0, nullptr, &Event)); Events.push_back(Event); @@ -1681,7 +1681,7 @@ ur_result_t UR_APICALL urEnqueueUSMMemcpy2D( const auto SrcShadow = DeviceInfo->Shadow->MemToShadow((uptr)pSrc); const auto DstShadow = DeviceInfo->Shadow->MemToShadow((uptr)pDst); - ur_event_handle_t Event{}; + ur_event_handle_t Event = nullptr; UR_CALL(pfnUSMMemcpy2D(hQueue, blocking, (void *)DstShadow, dstPitch, (void *)SrcShadow, srcPitch, width, height, 0, nullptr, &Event)); @@ -1700,7 +1700,7 @@ ur_result_t UR_APICALL urEnqueueUSMMemcpy2D( } for (size_t HeightIndex = 0; HeightIndex < height; HeightIndex++) { - ur_event_handle_t Event{}; + ur_event_handle_t Event = nullptr; const auto DstOrigin = DeviceInfo->Shadow->MemToOrigin((uptr)pDst + dstPitch * HeightIndex); const auto SrcOrigin = diff --git a/unified-runtime/source/loader/layers/sanitizer/msan/msan_interceptor.cpp b/unified-runtime/source/loader/layers/sanitizer/msan/msan_interceptor.cpp index 5016593ea4185..1505364212713 100644 --- a/unified-runtime/source/loader/layers/sanitizer/msan/msan_interceptor.cpp +++ b/unified-runtime/source/loader/layers/sanitizer/msan/msan_interceptor.cpp @@ -52,16 +52,13 @@ ur_result_t MsanInterceptor::allocateMemory(ur_context_handle_t Context, auto ContextInfo = getContextInfo(Context); std::shared_ptr DI = Device ? getDeviceInfo(Device) : nullptr; - // Origin tracking needs alignment at leat is 4 - constexpr uint32_t MSAN_ORIGIN_TRACKING_GRANULARITY = 4; - uint32_t Alignment = Properties ? Properties->align : 4; // Alignment must be zero or a power-of-two if (0 != (Alignment & (Alignment - 1))) { return UR_RESULT_ERROR_INVALID_ARGUMENT; } - if (Alignment < MSAN_ORIGIN_TRACKING_GRANULARITY) { - Alignment = MSAN_ORIGIN_TRACKING_GRANULARITY; + if (Alignment < MSAN_ORIGIN_GRANULARITY) { + Alignment = MSAN_ORIGIN_GRANULARITY; } ur_usm_desc_t NewProperties; @@ -306,9 +303,9 @@ MsanInterceptor::registerDeviceGlobals(ur_program_handle_t Program) { // Only support device global USM if (DeviceInfo->Type == DeviceType::CPU || (DeviceInfo->Type == DeviceType::GPU_PVC && - MsanShadowMemoryPVC::isDeviceUSM(GVInfo.Addr)) || + MsanShadowMemoryPVC::IsDeviceUSM(GVInfo.Addr)) || (DeviceInfo->Type == DeviceType::GPU_DG2 && - MsanShadowMemoryDG2::isDeviceUSM(GVInfo.Addr))) { + MsanShadowMemoryDG2::IsDeviceUSM(GVInfo.Addr))) { UR_CALL(DeviceInfo->Shadow->EnqueuePoisonShadow(Queue, GVInfo.Addr, GVInfo.Size, 0)); ContextInfo->CleanShadowSize = diff --git a/unified-runtime/source/loader/layers/sanitizer/msan/msan_shadow.cpp b/unified-runtime/source/loader/layers/sanitizer/msan/msan_shadow.cpp index bc680e67c68b6..7feba341ce692 100644 --- a/unified-runtime/source/loader/layers/sanitizer/msan/msan_shadow.cpp +++ b/unified-runtime/source/loader/layers/sanitizer/msan/msan_shadow.cpp @@ -408,7 +408,7 @@ ur_result_t MsanShadowMemoryGPU::AllocPrivateShadow(ur_queue_handle_t Queue, } uptr MsanShadowMemoryPVC::MemToShadow(uptr Ptr) { - if (MsanShadowMemoryPVC::isDeviceUSM(Ptr)) { + if (MsanShadowMemoryPVC::IsDeviceUSM(Ptr)) { return Ptr - 0x5000'0000'0000ULL; } // host/shared USM @@ -418,7 +418,7 @@ uptr MsanShadowMemoryPVC::MemToShadow(uptr Ptr) { uptr MsanShadowMemoryPVC::MemToOrigin(uptr Ptr) { uptr AlignedPtr = RoundDownTo(Ptr, MSAN_ORIGIN_GRANULARITY); - if (MsanShadowMemoryPVC::isDeviceUSM(AlignedPtr)) { + if (MsanShadowMemoryPVC::IsDeviceUSM(AlignedPtr)) { return AlignedPtr - 0xA000'0000'0000ULL; } // host/shared USM @@ -428,7 +428,7 @@ uptr MsanShadowMemoryPVC::MemToOrigin(uptr Ptr) { } uptr MsanShadowMemoryDG2::MemToShadow(uptr Ptr) { - assert(MsanShadowMemoryDG2::isDeviceUSM(Ptr) && "Ptr must be device USM"); + assert(MsanShadowMemoryDG2::IsDeviceUSM(Ptr) && "Ptr must be device USM"); if (Ptr < ShadowBegin) { return Ptr + (ShadowBegin - 0xffff'8000'0000'0000ULL); } else { @@ -437,7 +437,7 @@ uptr MsanShadowMemoryDG2::MemToShadow(uptr Ptr) { } uptr MsanShadowMemoryDG2::MemToOrigin(uptr Ptr) { - assert(MsanShadowMemoryDG2::isDeviceUSM(Ptr) && "Ptr must be device USM"); + assert(MsanShadowMemoryDG2::IsDeviceUSM(Ptr) && "Ptr must be device USM"); if (Ptr < ShadowBegin) { return Ptr + (ShadowBegin - 0xffff'8000'0000'0000ULL); } else { diff --git a/unified-runtime/source/loader/layers/sanitizer/msan/msan_shadow.hpp b/unified-runtime/source/loader/layers/sanitizer/msan/msan_shadow.hpp index c6f94c3725f02..feff5df9747e0 100644 --- a/unified-runtime/source/loader/layers/sanitizer/msan/msan_shadow.hpp +++ b/unified-runtime/source/loader/layers/sanitizer/msan/msan_shadow.hpp @@ -195,7 +195,7 @@ struct MsanShadowMemoryPVC final : public MsanShadowMemoryGPU { MsanShadowMemoryPVC(ur_context_handle_t Context, ur_device_handle_t Device) : MsanShadowMemoryGPU(Context, Device) {} - static bool isDeviceUSM(uptr Ptr) { return Ptr >> 52 == 0xff0; } + static bool IsDeviceUSM(uptr Ptr) { return Ptr >> 52 == 0xff0; } uptr MemToShadow(uptr Ptr) override; uptr MemToOrigin(uptr Ptr) override; @@ -219,7 +219,7 @@ struct MsanShadowMemoryDG2 final : public MsanShadowMemoryGPU { MsanShadowMemoryDG2(ur_context_handle_t Context, ur_device_handle_t Device) : MsanShadowMemoryGPU(Context, Device) {} - static bool isDeviceUSM(uptr Ptr) { return Ptr >> 48; } + static bool IsDeviceUSM(uptr Ptr) { return Ptr >> 48; } uptr MemToShadow(uptr Ptr) override; uptr MemToOrigin(uptr Ptr) override; From dd0acc2dc646991b501d3f023da6ece51fbc7bf2 Mon Sep 17 00:00:00 2001 From: "Zhao, Yang2" Date: Tue, 17 Jun 2025 08:37:59 +0200 Subject: [PATCH 11/26] wip cpu --- libdevice/include/sanitizer_defs.hpp | 1 - libdevice/sanitizer/msan_rtl.cpp | 2 +- .../layers/sanitizer/msan/msan_shadow.cpp | 125 +++++++++++------- 3 files changed, 78 insertions(+), 50 deletions(-) diff --git a/libdevice/include/sanitizer_defs.hpp b/libdevice/include/sanitizer_defs.hpp index 302551a56da09..cdc84cacbdda3 100644 --- a/libdevice/include/sanitizer_defs.hpp +++ b/libdevice/include/sanitizer_defs.hpp @@ -9,7 +9,6 @@ #include "atomic.hpp" #include "spir_global_var.hpp" - #include using uptr = uintptr_t; diff --git a/libdevice/sanitizer/msan_rtl.cpp b/libdevice/sanitizer/msan_rtl.cpp index 2dfb725c72905..5ca8d3e88b390 100644 --- a/libdevice/sanitizer/msan_rtl.cpp +++ b/libdevice/sanitizer/msan_rtl.cpp @@ -246,7 +246,7 @@ inline uptr MemToShadow(uptr addr, uint32_t as) { return shadow_ptr; } -inline uptr MemToOrigin_CPU(uptr addr) { return addr ^ 0x500000000000ULL; } +inline uptr MemToOrigin_CPU(uptr addr) { return addr ^ 0x100000000000ULL; } inline uptr MemToOrigin_DG2(uptr addr, uint32_t as) { return GetMsanLaunchInfo->CleanShadow; diff --git a/unified-runtime/source/loader/layers/sanitizer/msan/msan_shadow.cpp b/unified-runtime/source/loader/layers/sanitizer/msan/msan_shadow.cpp index 7feba341ce692..63aafac885e63 100644 --- a/unified-runtime/source/loader/layers/sanitizer/msan/msan_shadow.cpp +++ b/unified-runtime/source/loader/layers/sanitizer/msan/msan_shadow.cpp @@ -20,14 +20,45 @@ namespace ur_sanitizer_layer { namespace msan { -#define CPU_SHADOW1_BEGIN 0x010000000000ULL -#define CPU_SHADOW1_END 0x100000000000ULL -#define CPU_SHADOW2_BEGIN 0x200000000000ULL -#define CPU_SHADOW2_END 0x300000000000ULL -#define CPU_SHADOW3_BEGIN 0x500000000000ULL -#define CPU_SHADOW3_END 0x510000000000ULL - -#define CPU_SHADOW_MASK 0x500000000000ULL +namespace { + +// +// The CPU part of shadow mapping is based on llvm/compiler-rt/lib/msan/msan.h +// +struct MappingDesc { + uptr start; + uptr end; + enum Type { + INVALID = 1, + ALLOCATOR = 2, + APP = 4, + SHADOW = 8, + ORIGIN = 16, + } type; + const char *name; +}; + +const MappingDesc kMemoryLayout[] = { + {0x000000000000ULL, 0x010000000000ULL, MappingDesc::APP, "app-1"}, + {0x010000000000ULL, 0x100000000000ULL, MappingDesc::SHADOW, "shadow-2"}, + {0x100000000000ULL, 0x110000000000ULL, MappingDesc::INVALID, "invalid"}, + {0x110000000000ULL, 0x200000000000ULL, MappingDesc::ORIGIN, "origin-2"}, + {0x200000000000ULL, 0x300000000000ULL, MappingDesc::SHADOW, "shadow-3"}, + {0x300000000000ULL, 0x400000000000ULL, MappingDesc::ORIGIN, "origin-3"}, + {0x400000000000ULL, 0x500000000000ULL, MappingDesc::INVALID, "invalid"}, + {0x500000000000ULL, 0x510000000000ULL, MappingDesc::SHADOW, "shadow-1"}, + {0x510000000000ULL, 0x600000000000ULL, MappingDesc::APP, "app-2"}, + {0x600000000000ULL, 0x610000000000ULL, MappingDesc::ORIGIN, "origin-1"}, + {0x610000000000ULL, 0x700000000000ULL, MappingDesc::INVALID, "invalid"}, + {0x700000000000ULL, 0x740000000000ULL, MappingDesc::ALLOCATOR, "allocator"}, + {0x740000000000ULL, 0x800000000000ULL, MappingDesc::APP, "app-3"}}; + +const uptr kMemoryLayoutSize = sizeof(kMemoryLayout) / sizeof(kMemoryLayout[0]); + +#define MEM_TO_SHADOW(mem) (((uptr)(mem)) ^ 0x500000000000ULL) +#define SHADOW_TO_ORIGIN(mem) (((uptr)(mem)) + 0x100000000000ULL) + +} // namespace std::shared_ptr GetMsanShadowMemory(ur_context_handle_t Context, ur_device_handle_t Device, @@ -52,29 +83,30 @@ GetMsanShadowMemory(ur_context_handle_t Context, ur_device_handle_t Device, ur_result_t MsanShadowMemoryCPU::Setup() { static ur_result_t Result = [this]() { - if (MmapFixedNoReserve(CPU_SHADOW1_BEGIN, - CPU_SHADOW1_END - CPU_SHADOW1_BEGIN) == 0) { - return UR_RESULT_ERROR_OUT_OF_HOST_MEMORY; - } - if (ProtectMemoryRange(CPU_SHADOW1_END, - CPU_SHADOW2_BEGIN - CPU_SHADOW1_END) == 0) { - return UR_RESULT_ERROR_OUT_OF_HOST_MEMORY; - } - if (MmapFixedNoReserve(CPU_SHADOW2_BEGIN, - CPU_SHADOW2_END - CPU_SHADOW2_BEGIN) == 0) { - return UR_RESULT_ERROR_OUT_OF_HOST_MEMORY; - } - if (ProtectMemoryRange(CPU_SHADOW2_END, - CPU_SHADOW3_BEGIN - CPU_SHADOW2_END) == 0) { - return UR_RESULT_ERROR_OUT_OF_HOST_MEMORY; - } - if (MmapFixedNoReserve(CPU_SHADOW3_BEGIN, - CPU_SHADOW3_END - CPU_SHADOW3_BEGIN) == 0) { - return UR_RESULT_ERROR_OUT_OF_HOST_MEMORY; + for (unsigned i = 0; i < kMemoryLayoutSize; ++i) { + uptr Start = kMemoryLayout[i].start; + uptr End = kMemoryLayout[i].end; + uptr Size = End - Start; + MappingDesc::Type Type = kMemoryLayout[i].type; + bool InitOrigins = true; + + bool IsMap = Type == MappingDesc::SHADOW || + (InitOrigins && Type == MappingDesc::ORIGIN); + bool IsProtect = Type == MappingDesc::INVALID || + (!InitOrigins && Type == MappingDesc::ORIGIN); + + if (IsMap) { + if (MmapFixedNoReserve(Start, Size) == 0) { + return UR_RESULT_ERROR_OUT_OF_HOST_MEMORY; + } + DontCoredumpRange(Start, Size); + } + if (IsProtect) { + if (ProtectMemoryRange(Start, Size) == 0) { + return UR_RESULT_ERROR_OUT_OF_HOST_MEMORY; + } + } } - ShadowBegin = CPU_SHADOW1_BEGIN; - ShadowEnd = CPU_SHADOW3_END; - DontCoredumpRange(ShadowBegin, ShadowEnd - ShadowBegin); return UR_RESULT_SUCCESS; }(); return Result; @@ -85,20 +117,19 @@ ur_result_t MsanShadowMemoryCPU::Destory() { return UR_RESULT_SUCCESS; } static ur_result_t Result = [this]() { - if (!Munmap(CPU_SHADOW1_BEGIN, CPU_SHADOW1_END - CPU_SHADOW1_BEGIN)) { - return UR_RESULT_ERROR_UNKNOWN; - } - if (!Munmap(CPU_SHADOW1_END, CPU_SHADOW2_BEGIN - CPU_SHADOW1_END)) { - return UR_RESULT_ERROR_UNKNOWN; - } - if (!Munmap(CPU_SHADOW2_BEGIN, CPU_SHADOW2_END - CPU_SHADOW2_BEGIN) == 0) { - return UR_RESULT_ERROR_UNKNOWN; - } - if (!Munmap(CPU_SHADOW2_END, CPU_SHADOW3_BEGIN - CPU_SHADOW2_END)) { - return UR_RESULT_ERROR_UNKNOWN; - } - if (!Munmap(CPU_SHADOW3_BEGIN, CPU_SHADOW3_END - CPU_SHADOW3_BEGIN) == 0) { - return UR_RESULT_ERROR_UNKNOWN; + for (unsigned i = 0; i < kMemoryLayoutSize; ++i) { + uptr Start = kMemoryLayout[i].start; + uptr End = kMemoryLayout[i].end; + uptr Size = End - Start; + MappingDesc::Type Type = kMemoryLayout[i].type; + bool InitOrigins = true; + bool IsMap = Type == MappingDesc::SHADOW || + (InitOrigins && Type == MappingDesc::ORIGIN); + if (IsMap) { + if (Munmap(Start, Size)) { + return UR_RESULT_ERROR_UNKNOWN; + } + } } ShadowBegin = ShadowEnd = 0; return UR_RESULT_SUCCESS; @@ -106,12 +137,10 @@ ur_result_t MsanShadowMemoryCPU::Destory() { return Result; } -uptr MsanShadowMemoryCPU::MemToShadow(uptr Ptr) { - return Ptr ^ CPU_SHADOW_MASK; -} +uptr MsanShadowMemoryCPU::MemToShadow(uptr Ptr) { return MEM_TO_SHADOW(Ptr); } uptr MsanShadowMemoryCPU::MemToOrigin(uptr Ptr) { - return Ptr ^ CPU_SHADOW_MASK; + return SHADOW_TO_ORIGIN(Ptr); } ur_result_t MsanShadowMemoryCPU::EnqueuePoisonShadow( From c87dc800d1ee2407d98d702bb74f75cdb4f80e3c Mon Sep 17 00:00:00 2001 From: "Zhao, Yang2" Date: Wed, 18 Jun 2025 10:05:18 +0200 Subject: [PATCH 12/26] enable origin track is optional --- clang/lib/Driver/SanitizerArgs.cpp | 11 +++-- libdevice/sanitizer/msan_rtl.cpp | 15 +----- .../SPIRVSanitizerCommonUtils.h | 11 +++++ .../Instrumentation/MemorySanitizer.cpp | 47 ++++++++++++------- .../origin-tracking/check_host_usm.cpp | 29 ++++++++++++ .../sanitizer/msan/msan_interceptor.cpp | 18 ++++--- .../sanitizer/msan/msan_interceptor.hpp | 6 ++- .../sanitizer_common/sanitizer_common.hpp | 12 +++++ .../sanitizer_common/sanitizer_options.hpp | 2 +- 9 files changed, 106 insertions(+), 45 deletions(-) create mode 100644 sycl/test-e2e/MemorySanitizer/origin-tracking/check_host_usm.cpp diff --git a/clang/lib/Driver/SanitizerArgs.cpp b/clang/lib/Driver/SanitizerArgs.cpp index ff0812294ad9d..6347f78cdb0b7 100644 --- a/clang/lib/Driver/SanitizerArgs.cpp +++ b/clang/lib/Driver/SanitizerArgs.cpp @@ -1280,10 +1280,13 @@ void SanitizerArgs::addArgs(const ToolChain &TC, const llvm::opt::ArgList &Args, CmdArgs.push_back("-mllvm"); CmdArgs.push_back("-msan-poison-stack-with-call=1"); - CmdArgs.push_back("-mllvm"); - CmdArgs.push_back("-msan-track-origins=1"); - CmdArgs.push_back("-mllvm"); - CmdArgs.push_back("-msan-print-stack-names=0"); + if (MsanTrackOrigins) { + CmdArgs.push_back("-mllvm"); + CmdArgs.push_back(Args.MakeArgString("-msan-track-origins=" + + Twine(MsanTrackOrigins))); + CmdArgs.push_back("-mllvm"); + CmdArgs.push_back("-msan-print-stack-names=0"); + } } else if (Sanitizers.has(SanitizerKind::Thread)) { CmdArgs.push_back("-fsanitize=thread"); // The tsan function entry/exit builtins are used to record stack diff --git a/libdevice/sanitizer/msan_rtl.cpp b/libdevice/sanitizer/msan_rtl.cpp index 5ca8d3e88b390..a7f5e892bd2dd 100644 --- a/libdevice/sanitizer/msan_rtl.cpp +++ b/libdevice/sanitizer/msan_rtl.cpp @@ -421,9 +421,8 @@ DEVICE_EXTERN_C_NOINLINE void __msan_warning_with_origin_noreturn( // For mapping detail, ref to // "unified-runtime/source/loader/layers/sanitizer/msan/msan_shadow.hpp" -DEVICE_EXTERN_C_NOINLINE __SYCL_GLOBAL__ void * -__msan_get_shadow(uptr addr, uint32_t as, - const char __SYCL_CONSTANT__ *func = nullptr) { +DEVICE_EXTERN_C_NOINLINE __SYCL_GLOBAL__ void *__msan_get_shadow(uptr addr, + uint32_t as) { if (!GetMsanLaunchInfo) return nullptr; return (__SYCL_GLOBAL__ void *)MemToShadow(addr, as); @@ -738,14 +737,4 @@ __msan_unpoison_strided_copy(uptr dest, uint32_t dest_as, uptr src, "__msan_unpoison_strided_copy")); } -// FIXME: not support origin tracking for private memory -DEVICE_EXTERN_C_NOINLINE void __msan_set_alloca_origin_no_descr( - [[maybe_unused]] void *a, [[maybe_unused]] uptr size, - [[maybe_unused]] __SYCL_GLOBAL__ u32 *id_ptr) {} - -// FIXME: not support origin tracking for private memory -DEVICE_EXTERN_C_NOINLINE void __msan_set_alloca_origin_with_descr( - [[maybe_unused]] void *a, [[maybe_unused]] uptr size, - [[maybe_unused]] __SYCL_GLOBAL__ u32 *id_ptr, char *descr) {} - #endif // __SPIR__ || __SPIRV__ diff --git a/llvm/include/llvm/Transforms/Instrumentation/SPIRVSanitizerCommonUtils.h b/llvm/include/llvm/Transforms/Instrumentation/SPIRVSanitizerCommonUtils.h index f6edabcc3bbf3..ee7b43c98ddef 100644 --- a/llvm/include/llvm/Transforms/Instrumentation/SPIRVSanitizerCommonUtils.h +++ b/llvm/include/llvm/Transforms/Instrumentation/SPIRVSanitizerCommonUtils.h @@ -26,6 +26,17 @@ constexpr unsigned kSpirOffloadGenericAS = 4; TargetExtType *getTargetExtType(Type *Ty); bool isJointMatrixAccess(Value *V); + +// Sync with sanitizer_common/sanitizer_common.hpp +enum SanitizedKernelFlags : uint32_t { + NO_CHECK = 0, + CHECK_GLOBALS = 1U << 1, + CHECK_LOCALS = 1U << 2, + CHECK_PRIVATES = 1U << 3, + CHECK_GENERICS = 1U << 4, + MSAN_TRACK_ORIGINS = 1U << 5, +}; + } // namespace llvm #endif // LLVM_TRANSFORMS_INSTRUMENTATION_SPIRVSANITIZERCOMMONUTILS_H diff --git a/llvm/lib/Transforms/Instrumentation/MemorySanitizer.cpp b/llvm/lib/Transforms/Instrumentation/MemorySanitizer.cpp index cb99d628bf1d6..10aef84d6cf06 100644 --- a/llvm/lib/Transforms/Instrumentation/MemorySanitizer.cpp +++ b/llvm/lib/Transforms/Instrumentation/MemorySanitizer.cpp @@ -787,7 +787,7 @@ class MemorySanitizerOnSpirv { Int32Ty = Type::getInt32Ty(C); } - bool instrumentModule(); + bool instrumentModule(int TrackOrigins); void beforeInstrumentFunction(Function &F, Instruction *FnPrologueEnd); void afterInstrumentFunction(Function &F); @@ -799,7 +799,7 @@ class MemorySanitizerOnSpirv { void instrumentGlobalVariables(); void instrumentStaticLocalMemory(); void instrumentDynamicLocalMemory(Function &F); - void instrumentKernelsMetadata(); + void instrumentKernelsMetadata(int TrackOrigins); void instrumentPrivateArguments(Function &F, Instruction *FnPrologueEnd); void instrumentPrivateBase(Function &F); @@ -1174,17 +1174,15 @@ void MemorySanitizerOnSpirv::instrumentPrivateArguments( // Instrument __MsanKernelMetadata, which records information of sanitized // kernel -void MemorySanitizerOnSpirv::instrumentKernelsMetadata() { +void MemorySanitizerOnSpirv::instrumentKernelsMetadata(int TrackOrigins) { SmallVector SpirKernelsMetadata; // SpirKernelsMetadata only saves fixed kernels, and is described by // following structure: // uptr unmangled_kernel_name // uptr unmangled_kernel_name_size - // uptr check_local_memory - // uptr check_private_memory - StructType *StructTy = - StructType::get(IntptrTy, IntptrTy, IntptrTy, IntptrTy); + // uptr sanitized_flags + StructType *StructTy = StructType::get(IntptrTy, IntptrTy, IntptrTy); for (Function &F : M) { if (F.getCallingConv() != CallingConv::SPIR_KERNEL) continue; @@ -1196,11 +1194,21 @@ void MemorySanitizerOnSpirv::instrumentKernelsMetadata() { auto KernelName = F.getName(); auto *KernelNameGV = getOrCreateGlobalString("__msan_kernel", KernelName, kSpirOffloadConstantAS); + + uintptr_t SanitizerFlags = 0; + SanitizerFlags |= ClSpirOffloadLocals ? SanitizedKernelFlags::CHECK_LOCALS + : SanitizedKernelFlags::NO_CHECK; + SanitizerFlags |= ClSpirOffloadPrivates + ? SanitizedKernelFlags::CHECK_PRIVATES + : SanitizedKernelFlags::NO_CHECK; + SanitizerFlags |= TrackOrigins != 0 + ? SanitizedKernelFlags::MSAN_TRACK_ORIGINS + : SanitizedKernelFlags::NO_CHECK; + SpirKernelsMetadata.emplace_back(ConstantStruct::get( StructTy, ConstantExpr::getPointerCast(KernelNameGV, IntptrTy), ConstantInt::get(IntptrTy, KernelName.size()), - ConstantInt::get(IntptrTy, ClSpirOffloadLocals), - ConstantInt::get(IntptrTy, ClSpirOffloadPrivates))); + ConstantInt::get(IntptrTy, SanitizerFlags))); } // Create global variable to record spirv kernels' information @@ -1246,14 +1254,14 @@ void MemorySanitizerOnSpirv::initializeRetVecMap(Function *F) { KernelToRetVecMap[F] = std::move(RetVec); } -bool MemorySanitizerOnSpirv::instrumentModule() { +bool MemorySanitizerOnSpirv::instrumentModule(int TrackOrigins) { if (!IsSPIRV) return false; initializeCallbacks(); instrumentGlobalVariables(); instrumentStaticLocalMemory(); - instrumentKernelsMetadata(); + instrumentKernelsMetadata(TrackOrigins); return true; } @@ -1291,7 +1299,7 @@ PreservedAnalyses MemorySanitizerPass::run(Module &M, } MemorySanitizerOnSpirv MsanSpirv(M); - Modified |= MsanSpirv.instrumentModule(); + Modified |= MsanSpirv.instrumentModule(Options.TrackOrigins); auto &FAM = AM.getResult(M).getManager(); for (Function &F : M) { @@ -1336,8 +1344,11 @@ void MemorySanitizerPass::printPipeline( static GlobalVariable *createPrivateConstGlobalForString(Module &M, StringRef Str) { Constant *StrConst = ConstantDataArray::getString(M.getContext(), Str); + bool SpirOrSpirv = Triple(M.getTargetTriple()).isSPIROrSPIRV(); return new GlobalVariable(M, StrConst->getType(), /*isConstant=*/true, - GlobalValue::PrivateLinkage, StrConst, ""); + GlobalValue::PrivateLinkage, StrConst, "", nullptr, + llvm::GlobalValue::NotThreadLocal, + SpirOrSpirv ? kSpirOffloadConstantAS : 0); } template @@ -1548,10 +1559,9 @@ void MemorySanitizer::createUserspaceApi(Module &M, MsanSetAllocaOriginWithDescriptionFn = M.getOrInsertFunction("__msan_set_alloca_origin_with_descr", IRB.getVoidTy(), PtrTy, IntptrTy, PtrTy, PtrTy); - MsanSetAllocaOriginNoDescriptionFn = M.getOrInsertFunction( - "__msan_set_alloca_origin_no_descr", IRB.getVoidTy(), PtrTy, IntptrTy, - TargetTriple.isSPIROrSPIRV() ? PointerType::get(*C, kSpirOffloadGlobalAS) - : PtrTy); + MsanSetAllocaOriginNoDescriptionFn = + M.getOrInsertFunction("__msan_set_alloca_origin_no_descr", + IRB.getVoidTy(), PtrTy, IntptrTy, PtrTy); MsanPoisonStackFn = M.getOrInsertFunction("__msan_poison_stack", IRB.getVoidTy(), PtrTy, IntptrTy); } @@ -6637,7 +6647,8 @@ struct MemorySanitizerVisitor : public InstVisitor { IRB.CreateMemSet(ShadowBase, PoisonValue, Len, I.getAlign()); } - if (PoisonStack && MS.TrackOrigins) { + // FIXME: Not support track origins on private memory yet + if (PoisonStack && MS.TrackOrigins && !SpirOrSpirv) { Value *Idptr = getLocalVarIdptr(I); if (ClPrintStackNames) { Value *Descr = getLocalVarDescription(I); diff --git a/sycl/test-e2e/MemorySanitizer/origin-tracking/check_host_usm.cpp b/sycl/test-e2e/MemorySanitizer/origin-tracking/check_host_usm.cpp new file mode 100644 index 0000000000000..fef31f5da6803 --- /dev/null +++ b/sycl/test-e2e/MemorySanitizer/origin-tracking/check_host_usm.cpp @@ -0,0 +1,29 @@ +// REQUIRES: linux, cpu || (gpu && level_zero) +// RUN: %{build} %device_msan_flags -O0 -g -o %t2.out +// RUN: %{run} not %t2.out 2>&1 | FileCheck %s +// RUN: %{build} %device_msan_flags -O1 -g -o %t2.out +// RUN: %{run} not %t2.out 2>&1 | FileCheck %s +// RUN: %{build} %device_msan_flags -O2 -g -o %t3.out +// RUN: %{run} not %t3.out 2>&1 | FileCheck %s + +#include +#include + +__attribute__((noinline)) int foo(int data1) { return data1; } + +int main() { + sycl::queue Q; + auto *array = sycl::malloc_host(2, Q); + + Q.submit([&](sycl::handler &h) { + h.single_task([=]() { array[0] = foo(array[1]); }); + }); + Q.wait(); + // CHECK-NOT: [kernel] + // CHECK: use-of-uninitialized-value + // CHECK: kernel <{{.*MyKernel}}> + // CHECK: #0 {{.*}} {{.*check_call.cpp}}:[[@LINE-6]] + + sycl::free(array, Q); + return 0; +} diff --git a/unified-runtime/source/loader/layers/sanitizer/msan/msan_interceptor.cpp b/unified-runtime/source/loader/layers/sanitizer/msan/msan_interceptor.cpp index 1505364212713..c80023dec526e 100644 --- a/unified-runtime/source/loader/layers/sanitizer/msan/msan_interceptor.cpp +++ b/unified-runtime/source/loader/layers/sanitizer/msan/msan_interceptor.cpp @@ -244,15 +244,17 @@ ur_result_t MsanInterceptor::registerSpirKernels(ur_program_handle_t Program) { std::string KernelName = std::string(KernelNameV.begin(), KernelNameV.end()); + bool CheckLocals = SKI.Flags & SanitizedKernelFlags::CHECK_LOCALS; + bool CheckPrivates = SKI.Flags & SanitizedKernelFlags::CHECK_PRIVATES; + bool TrackOrigins = SKI.Flags & SanitizedKernelFlags::MSAN_TRACK_ORIGINS; UR_LOG_L(getContext()->logger, INFO, "SpirKernel(name='{}', isInstrumented={}, " - "checkLocals={}, checkPrivates={})", - KernelName, true, (bool)SKI.CheckLocals, - (bool)SKI.CheckPrivates); + "checkLocals={}, checkPrivates={}, trackOrigins={})", + KernelName, true, CheckLocals, CheckPrivates, TrackOrigins); - PI->KernelMetadataMap[KernelName] = ProgramInfo::KernelMetada{ - (bool)SKI.CheckLocals, (bool)SKI.CheckPrivates}; + PI->KernelMetadataMap[KernelName] = + ProgramInfo::KernelMetada{CheckLocals, CheckPrivates, TrackOrigins}; } UR_LOG_L(getContext()->logger, INFO, "Number of sanitized kernel: {}", PI->KernelMetadataMap.size()); @@ -408,6 +410,7 @@ KernelInfo &MsanInterceptor::getOrCreateKernelInfo(ur_kernel_handle_t Kernel) { auto &KM = PI->getKernelMetadata(Kernel); KI->IsCheckLocals = KM.CheckLocals; KI->IsCheckPrivates = KM.CheckPrivates; + KI->IsTrackOrigins = KM.TrackOrigins; } std::scoped_lock Guard(m_KernelMapMutex); @@ -457,9 +460,10 @@ ur_result_t MsanInterceptor::prepareLaunch( auto &KernelInfo = getOrCreateKernelInfo(Kernel); UR_LOG_L(getContext()->logger, INFO, "KernelInfo {} (Name=<{}>, IsInstrumented={}, " - "IsCheckLocals={}, IsCheckPrivates={})", + "CheckLocals={}, CheckPrivates={}, TrackOrigins={})", (void *)Kernel, GetKernelName(Kernel), KernelInfo.IsInstrumented, - KernelInfo.IsCheckLocals, KernelInfo.IsCheckPrivates); + KernelInfo.IsCheckLocals, KernelInfo.IsCheckPrivates, + KernelInfo.IsTrackOrigins); std::shared_lock Guard(KernelInfo.Mutex); diff --git a/unified-runtime/source/loader/layers/sanitizer/msan/msan_interceptor.hpp b/unified-runtime/source/loader/layers/sanitizer/msan/msan_interceptor.hpp index 04627de11d1ea..34c95f6cb4853 100644 --- a/unified-runtime/source/loader/layers/sanitizer/msan/msan_interceptor.hpp +++ b/unified-runtime/source/loader/layers/sanitizer/msan/msan_interceptor.hpp @@ -83,6 +83,8 @@ struct KernelInfo { bool IsCheckLocals = true; // check private memory bool IsCheckPrivates = true; + // track origins + bool IsTrackOrigins = false; // lock this mutex if following fields are accessed ur_shared_mutex Mutex; @@ -111,6 +113,7 @@ struct ProgramInfo { struct KernelMetada { bool CheckLocals; bool CheckPrivates; + bool TrackOrigins; }; // Program is built only once, so we don't need to lock it @@ -250,8 +253,7 @@ struct DeviceGlobalInfo { struct SpirKernelInfo { uptr KernelName; uptr Size; - uptr CheckLocals; - uptr CheckPrivates; + uptr Flags; }; class MsanInterceptor { diff --git a/unified-runtime/source/loader/layers/sanitizer/sanitizer_common/sanitizer_common.hpp b/unified-runtime/source/loader/layers/sanitizer/sanitizer_common/sanitizer_common.hpp index dbd66bb0fd78b..c4854d96ad2e2 100644 --- a/unified-runtime/source/loader/layers/sanitizer/sanitizer_common/sanitizer_common.hpp +++ b/unified-runtime/source/loader/layers/sanitizer/sanitizer_common/sanitizer_common.hpp @@ -147,4 +147,16 @@ void *GetMemFunctionPointer(const char *); std::string DemangleName(const std::string &name); +// ================================================================ + +// Sync with llvm/Transforms/Instrumentation/SPIRVSanitizerCommonUtils.h +enum SanitizedKernelFlags : uint32_t { + NO_CHECK = 0, + CHECK_GLOBALS = 1U << 1, + CHECK_LOCALS = 1U << 2, + CHECK_PRIVATES = 1U << 3, + CHECK_GENERICS = 1U << 4, + MSAN_TRACK_ORIGINS = 1U << 5, +}; + } // namespace ur_sanitizer_layer diff --git a/unified-runtime/source/loader/layers/sanitizer/sanitizer_common/sanitizer_options.hpp b/unified-runtime/source/loader/layers/sanitizer/sanitizer_common/sanitizer_options.hpp index 9394578ea3054..328bdc05b2171 100644 --- a/unified-runtime/source/loader/layers/sanitizer/sanitizer_common/sanitizer_options.hpp +++ b/unified-runtime/source/loader/layers/sanitizer/sanitizer_common/sanitizer_options.hpp @@ -32,7 +32,7 @@ struct SanitizerOptions { bool DetectLeaks = true; bool HaltOnError = true; bool Recover = false; - bool MsanCheckHostAndSharedUSM = false; + bool MsanCheckHostAndSharedUSM = true; void Init(const std::string &EnvName, logger::Logger &Logger); }; From 0aea1c23728f8ecaaf97dcd370d0c6c50481e415 Mon Sep 17 00:00:00 2001 From: "Zhao, Yang2" Date: Wed, 18 Jun 2025 10:17:43 +0200 Subject: [PATCH 13/26] wip --- libdevice/include/sanitizer_utils.hpp | 18 ++++++++-------- .../Instrumentation/MemorySanitizer.cpp | 4 +++- sycl/test-e2e/MemorySanitizer/check_call.cpp | 7 +++++-- sycl/test-e2e/MemorySanitizer/check_usm.cpp | 21 ++----------------- .../origin-tracking/check_host_usm.cpp | 16 ++++++++------ .../layers/sanitizer/msan/msan_shadow.cpp | 18 ++++++++++++++-- 6 files changed, 45 insertions(+), 39 deletions(-) diff --git a/libdevice/include/sanitizer_utils.hpp b/libdevice/include/sanitizer_utils.hpp index 5d73357806429..e0899fb5b9cbb 100644 --- a/libdevice/include/sanitizer_utils.hpp +++ b/libdevice/include/sanitizer_utils.hpp @@ -12,7 +12,7 @@ #if defined(__SPIR__) || defined(__SPIRV__) -size_t WorkGroupLinearId() { +inline size_t WorkGroupLinearId() { return __spirv_BuiltInWorkgroupId.x * __spirv_BuiltInNumWorkgroups.y * __spirv_BuiltInNumWorkgroups.z + __spirv_BuiltInWorkgroupId.y * __spirv_BuiltInNumWorkgroups.z + @@ -20,28 +20,28 @@ size_t WorkGroupLinearId() { } // For GPU device, each sub group is a hardware thread -size_t SubGroupLinearId() { +inline size_t SubGroupLinearId() { return __spirv_BuiltInGlobalLinearId / __spirv_BuiltInSubgroupSize; } -void SubGroupBarrier() { +inline void SubGroupBarrier() { __spirv_ControlBarrier(__spv::Scope::Subgroup, __spv::Scope::Subgroup, __spv::MemorySemanticsMask::SequentiallyConsistent | __spv::MemorySemanticsMask::CrossWorkgroupMemory | __spv::MemorySemanticsMask::WorkgroupMemory); } -__SYCL_GLOBAL__ void *ToGlobal(void *ptr) { +inline __SYCL_GLOBAL__ void *ToGlobal(void *ptr) { return __spirv_GenericCastToPtrExplicit_ToGlobal(ptr, 5); } -__SYCL_LOCAL__ void *ToLocal(void *ptr) { +inline __SYCL_LOCAL__ void *ToLocal(void *ptr) { return __spirv_GenericCastToPtrExplicit_ToLocal(ptr, 4); } -__SYCL_PRIVATE__ void *ToPrivate(void *ptr) { +inline __SYCL_PRIVATE__ void *ToPrivate(void *ptr) { return __spirv_GenericCastToPtrExplicit_ToPrivate(ptr, 7); } -template SYCL_EXTERNAL T Memset(T ptr, int value, size_t size) { +template T Memset(T ptr, int value, size_t size) { for (size_t i = 0; i < size; i++) { ptr[i] = value; } @@ -49,7 +49,7 @@ template SYCL_EXTERNAL T Memset(T ptr, int value, size_t size) { } template -SYCL_EXTERNAL DstT Memcpy(DstT dst, SrcT src, size_t size) { +DstT Memcpy(DstT dst, SrcT src, size_t size) { for (size_t i = 0; i < size; i++) { dst[i] = src[i]; } @@ -57,7 +57,7 @@ SYCL_EXTERNAL DstT Memcpy(DstT dst, SrcT src, size_t size) { } template -SYCL_EXTERNAL DstT Memmove(DstT dst, SrcT src, size_t size) { +DstT Memmove(DstT dst, SrcT src, size_t size) { if ((uptr)dst < (uptr)src) { for (size_t i = 0; i < size; i++) { dst[i] = src[i]; diff --git a/llvm/lib/Transforms/Instrumentation/MemorySanitizer.cpp b/llvm/lib/Transforms/Instrumentation/MemorySanitizer.cpp index 10aef84d6cf06..8b908757b13c0 100644 --- a/llvm/lib/Transforms/Instrumentation/MemorySanitizer.cpp +++ b/llvm/lib/Transforms/Instrumentation/MemorySanitizer.cpp @@ -2616,8 +2616,10 @@ struct MemorySanitizerVisitor : public InstVisitor { Value *getShadowPtrOffset(Value *Addr, IRBuilder<> &IRB) { Type *IntptrTy = ptrToIntPtrType(Addr->getType()); Value *OffsetLong = IRB.CreatePointerCast(Addr, IntptrTy); + if (uint64_t AndMask = MS.MapParams->AndMask) OffsetLong = IRB.CreateAnd(OffsetLong, constToIntPtr(IntptrTy, ~AndMask)); + if (uint64_t XorMask = MS.MapParams->XorMask) OffsetLong = IRB.CreateXor(OffsetLong, constToIntPtr(IntptrTy, XorMask)); return OffsetLong; @@ -6622,7 +6624,7 @@ struct MemorySanitizerVisitor : public InstVisitor { Value *getLocalVarIdptr(AllocaInst &I) { ConstantInt *IntConst = - ConstantInt::get(Type::getInt32Ty(I.getContext()), 0); + ConstantInt::get(Type::getInt32Ty((*F.getParent()).getContext()), 0); return new GlobalVariable(*F.getParent(), IntConst->getType(), /*isConstant=*/false, GlobalValue::PrivateLinkage, IntConst); diff --git a/sycl/test-e2e/MemorySanitizer/check_call.cpp b/sycl/test-e2e/MemorySanitizer/check_call.cpp index d7d5aea09cd7d..aa2a608027876 100644 --- a/sycl/test-e2e/MemorySanitizer/check_call.cpp +++ b/sycl/test-e2e/MemorySanitizer/check_call.cpp @@ -9,14 +9,17 @@ #include #include -__attribute__((noinline)) int foo(int data1) { return data1; } +__attribute__((noinline)) long long foo(int data1, long long data2) { + return data1 + data2; +} int main() { sycl::queue Q; auto *array = sycl::malloc_device(2, Q); Q.submit([&](sycl::handler &h) { - h.single_task([=]() { array[0] = foo(array[1]); }); + h.single_task( + [=]() { array[0] = foo(array[0], array[1]); }); }); Q.wait(); // CHECK-NOT: [kernel] diff --git a/sycl/test-e2e/MemorySanitizer/check_usm.cpp b/sycl/test-e2e/MemorySanitizer/check_usm.cpp index be84e59b1ab0c..1542fad0ee1d6 100644 --- a/sycl/test-e2e/MemorySanitizer/check_usm.cpp +++ b/sycl/test-e2e/MemorySanitizer/check_usm.cpp @@ -30,23 +30,7 @@ void check_memset(sycl::queue &Q) { // CHECK-NOT: use-of-uninitialized-value // CHECK: PASS -void check_memcpy1(sycl::queue &Q) { - std::cout << "check_memcpy1" << std::endl; - auto *source = sycl::malloc_host(2, Q); - auto *array = sycl::malloc_device(2, Q); - // FIXME: We don't support shadow propagation on host/shared usm - auto ev1 = Q.memcpy(array, source, 2 * sizeof(int)); - auto ev2 = Q.single_task(ev1, [=]() { array[0] = foo(array[0], array[1]); }); - Q.wait(); - sycl::free(array, Q); - sycl::free(source, Q); - std::cout << "PASS" << std::endl; -} -// CHECK-LABEL: check_memcpy1 -// CHECK-NOT: use-of-uninitialized-value -// CHECK: PASS - -void check_memcpy2(sycl::queue &Q) { +void check_memcpy(sycl::queue &Q) { std::cout << "check_memcpy2" << std::endl; auto *source = sycl::malloc_device(2, Q); auto *array = sycl::malloc_device(2, Q); @@ -65,7 +49,6 @@ void check_memcpy2(sycl::queue &Q) { int main() { sycl::queue Q; check_memset(Q); - check_memcpy1(Q); - check_memcpy2(Q); + check_memcpy(Q); return 0; } diff --git a/sycl/test-e2e/MemorySanitizer/origin-tracking/check_host_usm.cpp b/sycl/test-e2e/MemorySanitizer/origin-tracking/check_host_usm.cpp index fef31f5da6803..b577fb0aad246 100644 --- a/sycl/test-e2e/MemorySanitizer/origin-tracking/check_host_usm.cpp +++ b/sycl/test-e2e/MemorySanitizer/origin-tracking/check_host_usm.cpp @@ -1,28 +1,32 @@ // REQUIRES: linux, cpu || (gpu && level_zero) -// RUN: %{build} %device_msan_flags -O0 -g -o %t2.out +// RUN: %{build} %device_msan_flags -Xarch_device -fsanitize-memory-track-origins=1 -O0 -g -o %t2.out // RUN: %{run} not %t2.out 2>&1 | FileCheck %s -// RUN: %{build} %device_msan_flags -O1 -g -o %t2.out +// RUN: %{build} %device_msan_flags -Xarch_device -fsanitize-memory-track-origins=1 -O1 -g -o %t2.out // RUN: %{run} not %t2.out 2>&1 | FileCheck %s -// RUN: %{build} %device_msan_flags -O2 -g -o %t3.out +// RUN: %{build} %device_msan_flags -Xarch_device -fsanitize-memory-track-origins=1 -O2 -g -o %t3.out // RUN: %{run} not %t3.out 2>&1 | FileCheck %s +// RUN: env UR_LAYER_MSAN_OPTIONS=msan_check_host_and_shared_usm:0 %{run} %t3.out 2>&1 | FileCheck %s --check-prefixes CHECK-HOSTUSM #include #include -__attribute__((noinline)) int foo(int data1) { return data1; } +__attribute__((noinline)) int check(int data) { return data; } int main() { sycl::queue Q; auto *array = sycl::malloc_host(2, Q); Q.submit([&](sycl::handler &h) { - h.single_task([=]() { array[0] = foo(array[1]); }); + h.single_task([=]() { array[0] = check(array[1]); }); }); Q.wait(); // CHECK-NOT: [kernel] // CHECK: use-of-uninitialized-value // CHECK: kernel <{{.*MyKernel}}> - // CHECK: #0 {{.*}} {{.*check_call.cpp}}:[[@LINE-6]] + // CHECK: #{{.*}} {{.*check_host_usm.cpp}}:[[@LINE-6]] + // CHECK: ORIGIN: Host USM allocation + // CHECK: #{{.*}} {{.*check_host_usm.cpp}}:[[@LINE-11]] + // CHECK-HOSTUSM-NOT: use-of-uninitialized-value sycl::free(array, Q); return 0; diff --git a/unified-runtime/source/loader/layers/sanitizer/msan/msan_shadow.cpp b/unified-runtime/source/loader/layers/sanitizer/msan/msan_shadow.cpp index 63aafac885e63..4940f8f6b79ea 100644 --- a/unified-runtime/source/loader/layers/sanitizer/msan/msan_shadow.cpp +++ b/unified-runtime/source/loader/layers/sanitizer/msan/msan_shadow.cpp @@ -140,7 +140,8 @@ ur_result_t MsanShadowMemoryCPU::Destory() { uptr MsanShadowMemoryCPU::MemToShadow(uptr Ptr) { return MEM_TO_SHADOW(Ptr); } uptr MsanShadowMemoryCPU::MemToOrigin(uptr Ptr) { - return SHADOW_TO_ORIGIN(Ptr); + uptr AlignedPtr = RoundDownTo(Ptr, MSAN_ORIGIN_GRANULARITY); + return SHADOW_TO_ORIGIN(AlignedPtr); } ur_result_t MsanShadowMemoryCPU::EnqueuePoisonShadow( @@ -161,10 +162,23 @@ ur_result_t MsanShadowMemoryCPU::EnqueuePoisonShadowWithOrigin( UR_LOG_L(getContext()->logger, DEBUG, "EnqueuePoisonShadow(addr={}, count={}, value={})", (void *)ShadowBegin, ShadowEnd - ShadowBegin + 1, - (void *)(size_t)Value); + (void *)(uptr)Value); memset((void *)ShadowBegin, Value, ShadowEnd - ShadowBegin + 1); } + // if (Size) { + // const uptr OriginBegin = MemToOrigin(Ptr); + // const uptr OriginEnd = + // MemToOrigin(Ptr + Size - 1) + MSAN_ORIGIN_GRANULARITY; + // assert(OriginBegin <= OriginEnd); + // UR_LOG_L(getContext()->logger, DEBUG, + // "EnqueuePoisonOrigin(addr={}, count={}, value={})", + // (void *)OriginBegin, OriginEnd - OriginBegin + 1, + // (void *)(uptr)Origin); + // // memset((void *)OriginBegin, Value, OriginEnd - OriginBegin + 1); + // std::fill((uint32_t *)OriginBegin, (uint32_t *)OriginEnd, Origin); + // } + if (OutEvent) { UR_CALL(getContext()->urDdiTable.Enqueue.pfnEventsWait( Queue, NumEvents, EventWaitList, OutEvent)); From 0b201428bda44e91c1bd408e6d2f8fbc1fe6c605 Mon Sep 17 00:00:00 2001 From: "Zhao, Yang2" Date: Wed, 18 Jun 2025 15:27:53 +0200 Subject: [PATCH 14/26] fix shared usm --- libdevice/sanitizer/msan_rtl.cpp | 6 +-- .../track-origins/check_host_usm.cpp | 33 +++++++++++++ .../check_host_usm_initialized_on_host.cpp | 30 ++++++++++++ .../track-origins/check_kernel_memcpy.cpp | 45 +++++++++++++++++ .../check_kernel_memmove_no_overlap.cpp | 49 +++++++++++++++++++ .../check_kernel_memmove_overlap.cpp | 41 ++++++++++++++++ .../track-origins/check_shared_usm.cpp | 33 +++++++++++++ .../sanitizer/msan/msan_interceptor.cpp | 6 +-- .../layers/sanitizer/msan/msan_report.cpp | 8 ++- .../layers/sanitizer/msan/msan_report.hpp | 3 +- .../layers/sanitizer/msan/msan_shadow.cpp | 8 +-- .../layers/sanitizer/msan/msan_shadow.hpp | 22 +++++---- .../sanitizer_common/sanitizer_allocator.cpp | 3 +- 13 files changed, 264 insertions(+), 23 deletions(-) create mode 100644 sycl/test-e2e/MemorySanitizer/track-origins/check_host_usm.cpp create mode 100644 sycl/test-e2e/MemorySanitizer/track-origins/check_host_usm_initialized_on_host.cpp create mode 100644 sycl/test-e2e/MemorySanitizer/track-origins/check_kernel_memcpy.cpp create mode 100644 sycl/test-e2e/MemorySanitizer/track-origins/check_kernel_memmove_no_overlap.cpp create mode 100644 sycl/test-e2e/MemorySanitizer/track-origins/check_kernel_memmove_overlap.cpp create mode 100644 sycl/test-e2e/MemorySanitizer/track-origins/check_shared_usm.cpp diff --git a/libdevice/sanitizer/msan_rtl.cpp b/libdevice/sanitizer/msan_rtl.cpp index a7f5e892bd2dd..6976911f88b98 100644 --- a/libdevice/sanitizer/msan_rtl.cpp +++ b/libdevice/sanitizer/msan_rtl.cpp @@ -178,7 +178,7 @@ inline uptr MemToShadow_PVC(uptr addr, uint32_t as) { } // host/shared USM auto shadow_base = GetMsanLaunchInfo->GlobalShadowOffset; - return (addr & 0xff'ffff'ffffULL) + ((addr & 0x8000'0000'0000ULL) >> 7) + + return (addr & 0xfff'ffff'ffffULL) + ((addr & 0x8000'0000'0000ULL) >> 3) + shadow_base; } else if (as == ADDRESS_SPACE_LOCAL) { const auto shadow_offset = GetMsanLaunchInfo->LocalShadowOffset; @@ -264,8 +264,8 @@ inline uptr MemToOrigin_PVC(uptr addr, uint32_t as) { } // host/shared USM uptr shadow_base = GetMsanLaunchInfo->GlobalShadowOffset; - return (addr & 0xff'ffff'ffffULL) + ((addr & 0x8000'0000'0000ULL) >> 7) + - shadow_base + 0x0200'0000'0000ULL; + return (addr & 0xfff'ffff'ffffULL) + ((addr & 0x8000'0000'0000ULL) >> 3) + + shadow_base + 0x2000'0000'0000ULL; } // Return clean shadow (0s) by default diff --git a/sycl/test-e2e/MemorySanitizer/track-origins/check_host_usm.cpp b/sycl/test-e2e/MemorySanitizer/track-origins/check_host_usm.cpp new file mode 100644 index 0000000000000..7d032e233beed --- /dev/null +++ b/sycl/test-e2e/MemorySanitizer/track-origins/check_host_usm.cpp @@ -0,0 +1,33 @@ +// REQUIRES: linux, cpu || (gpu && level_zero) +// RUN: %{build} %device_msan_flags -Xarch_device -fsanitize-memory-track-origins=1 -O0 -g -o %t2.out +// RUN: %{run} %t2.out 2>&1 | FileCheck %s +// RUN: %{build} %device_msan_flags -Xarch_device -fsanitize-memory-track-origins=1 -O1 -g -o %t2.out +// RUN: %{run} %t2.out 2>&1 | FileCheck %s +// RUN: %{build} %device_msan_flags -Xarch_device -fsanitize-memory-track-origins=1 -O2 -g -o %t3.out +// RUN: %{run} %t3.out 2>&1 | FileCheck %s +// RUN: env UR_LAYER_MSAN_OPTIONS=msan_check_host_and_shared_usm:0 %{run} %t3.out 2>&1 | FileCheck %s --check-prefixes CHECK-HOSTUSM + +#include +#include + +__attribute__((noinline)) int check(int data) { return data; } + +int main() { + sycl::queue Q; + auto *array = sycl::malloc_host(2, Q); + + Q.submit([&](sycl::handler &h) { + h.single_task([=]() { array[0] = check(array[1]); }); + }); + Q.wait(); + // CHECK-NOT: [kernel] + // CHECK: use-of-uninitialized-value + // CHECK: kernel <{{.*MyKernel}}> + // CHECK: #{{.*}} {{.*check_host_usm.cpp}}:[[@LINE-6]] + // CHECK: ORIGIN: Host USM allocation + // CHECK: #{{.*}} {{.*check_host_usm.cpp}}:[[@LINE-11]] + // CHECK-HOSTUSM-NOT: use-of-uninitialized-value + + sycl::free(array, Q); + return 0; +} diff --git a/sycl/test-e2e/MemorySanitizer/track-origins/check_host_usm_initialized_on_host.cpp b/sycl/test-e2e/MemorySanitizer/track-origins/check_host_usm_initialized_on_host.cpp new file mode 100644 index 0000000000000..55fb7578de043 --- /dev/null +++ b/sycl/test-e2e/MemorySanitizer/track-origins/check_host_usm_initialized_on_host.cpp @@ -0,0 +1,30 @@ +// REQUIRES: linux, cpu || (gpu && level_zero) +// RUN: %{build} %device_msan_flags -Xarch_device -fsanitize-memory-track-origins=1 -O2 -g -o %t1.out +// RUN: %{run} %t1.out 2>&1 | FileCheck %s +// RUN: env UR_LAYER_MSAN_OPTIONS=msan_check_host_and_shared_usm:0 %{run} %t1.out 2>&1 | FileCheck %s --check-prefixes CHECK-HOSTUSM + +#include +#include + +__attribute__((noinline)) int check(int data) { return data; } + +int main() { + sycl::queue Q; + auto *array = sycl::malloc_host(2, Q); + array[0] = array[1] = 0; + + Q.submit([&](sycl::handler &h) { + h.single_task([=]() { array[0] = check(array[1]); }); + }); + Q.wait(); + // CHECK-NOT: [kernel] + // CHECK: use-of-uninitialized-value + // CHECK: kernel <{{.*MyKernel}}> + // CHECK: #{{.*}} {{.*check_host_usm_initialized_on_host.cpp}}:[[@LINE-6]] + // CHECK: ORIGIN: Host USM allocation + // CHECK: #{{.*}} {{.*check_host_usm_initialized_on_host.cpp}}:[[@LINE-12]] + // CHECK-HOSTUSM-NOT: use-of-uninitialized-value + + sycl::free(array, Q); + return 0; +} diff --git a/sycl/test-e2e/MemorySanitizer/track-origins/check_kernel_memcpy.cpp b/sycl/test-e2e/MemorySanitizer/track-origins/check_kernel_memcpy.cpp new file mode 100644 index 0000000000000..3669242af2e45 --- /dev/null +++ b/sycl/test-e2e/MemorySanitizer/track-origins/check_kernel_memcpy.cpp @@ -0,0 +1,45 @@ +// REQUIRES: linux, cpu || (gpu && level_zero) +// RUN: %{build} %device_msan_flags -Xarch_device -fsanitize-memory-track-origins=1 -O2 -g -o %t1.out +// RUN: %{run} %t1.out 2>&1 | FileCheck %s +// RUN: %{build} %device_msan_flags -Xarch_device -fsanitize-memory-track-origins=1 -DINIT_SOURCE -O2 -g -o %t2.out +// RUN: %{run} %t2.out 2>&1 | FileCheck %s --check-prefixes CHECK-INIT + +#include +#include + +__attribute__((noinline)) char check(char data1) { return data1; } + +int main() { + sycl::queue Q; + constexpr size_t N = 1024; + auto *array1 = sycl::malloc_host(N, Q); + auto *array2 = sycl::malloc_host(N, Q); + +#ifdef INIT_SOURCE + Q.submit([&](sycl::handler &h) { + h.single_task([=]() { memset(array1, 0, N); }); + }).wait(); +#endif + + Q.submit([&](sycl::handler &h) { + h.single_task([=]() { memset(array2, 0, N); }); + }).wait(); + + Q.submit([&](sycl::handler &h) { + h.single_task([=]() { + memcpy(array2, array1, N); + check(array2[0]); + }); + }).wait(); + // CHECK-NOT: [kernel] + // CHECK: use-of-uninitialized-value + // CHECK: kernel <{{.*MyKernel3}}> + // CHECK: #{{.*}} {{.*check_kernel_memcpy.cpp}}:[[@LINE-6]] + // CHECK: ORIGIN: Host USM allocation + // CHECK: #{{.*}} {{.*check_kernel_memcpy.cpp}}:[[@LINE-24]] + // CHECK-INIT-NOT: use-of-uninitialized-value + + sycl::free(array1, Q); + sycl::free(array2, Q); + return 0; +} diff --git a/sycl/test-e2e/MemorySanitizer/track-origins/check_kernel_memmove_no_overlap.cpp b/sycl/test-e2e/MemorySanitizer/track-origins/check_kernel_memmove_no_overlap.cpp new file mode 100644 index 0000000000000..87db3461bb1ad --- /dev/null +++ b/sycl/test-e2e/MemorySanitizer/track-origins/check_kernel_memmove_no_overlap.cpp @@ -0,0 +1,49 @@ +// REQUIRES: linux, cpu || (gpu && level_zero) +// RUN: %{build} %device_msan_flags -Xarch_device -fsanitize-memory-track-origins=1 -O2 -g -o %t1.out +// RUN: %{run} %t1.out 2>&1 | FileCheck %s +// RUN: %{build} %device_msan_flags -Xarch_device -fsanitize-memory-track-origins=1 -DINIT_SOURCE -O2 -g -o %t2.out +// RUN: %{run} %t2.out 2>&1 | FileCheck %s --check-prefixes CHECK-INIT + +#include +#include + +__attribute__((noinline)) char check(char data1) { return data1; } + +void no_overlap() { + sycl::queue Q; + constexpr size_t N = 1024; + auto *array1 = sycl::malloc_host(N, Q); + auto *array2 = sycl::malloc_host(N, Q); + +#ifdef INIT_SOURCE + Q.submit([&](sycl::handler &h) { + h.single_task([=]() { memset(array1, 0, N); }); + }).wait(); +#endif + + Q.submit([&](sycl::handler &h) { + h.single_task([=]() { memset(array2, 0, N); }); + }).wait(); + + Q.submit([&](sycl::handler &h) { + h.single_task([=]() { + memmove(array2, array1, N); + check(array2[0]); + }); + }).wait(); + // CHECK-NOT: [kernel] + // CHECK: use-of-uninitialized-value + // CHECK: kernel <{{.*MyKernel3}}> + // CHECK: #{{.*}} {{.*check_kernel_memmove_no_overlap.cpp}}:[[@LINE-6]] + // CHECK: ORIGIN: Host USM allocation + // CHECK: #{{.*}} {{.*check_kernel_memmove_no_overlap.cpp}}:[[@LINE-24]] + // CHECK-INIT-NOT: use-of-uninitialized-value + + sycl::free(array1, Q); + sycl::free(array2, Q); +} + +int main() { + no_overlap(); + return 0; +} diff --git a/sycl/test-e2e/MemorySanitizer/track-origins/check_kernel_memmove_overlap.cpp b/sycl/test-e2e/MemorySanitizer/track-origins/check_kernel_memmove_overlap.cpp new file mode 100644 index 0000000000000..f0a15e52f387a --- /dev/null +++ b/sycl/test-e2e/MemorySanitizer/track-origins/check_kernel_memmove_overlap.cpp @@ -0,0 +1,41 @@ +// REQUIRES: linux, cpu || (gpu && level_zero) +// RUN: %{build} %device_msan_flags -Xarch_device -fsanitize-memory-track-origins=1 -O2 -g -o %t1.out +// RUN: %{run} %t1.out 2>&1 | FileCheck %s + +#include +#include + +__attribute__((noinline)) char check(char data1) { return data1; } + +void overlap() { + sycl::queue Q; + constexpr size_t N = 1024; + auto *array = sycl::malloc_shared(N, Q); + + Q.submit([&](sycl::handler &h) { + h.single_task([=]() { memset(array, 0, N / 2); }); + }).wait(); + + Q.submit([&](sycl::handler &h) { + h.single_task([=]() { + check(array[0]); + check(array[1]); + memmove(array, array + N / 2 - 1, N / 2); + check(array[0]); + check(array[1]); + }); + }).wait(); + // CHECK-NOT: [kernel] + // CHECK: use-of-uninitialized-value + // CHECK: kernel <{{.*MyKernel2}}> + // CHECK: #{{.*}} {{.*check_kernel_memmove_overlap.cpp}}:[[@LINE-6]] + // CHECK: ORIGIN: Shared USM allocation + // CHECK: #{{.*}} {{.*check_kernel_memmove_overlap.cpp}}:[[@LINE-20]] + + sycl::free(array, Q); +} + +int main() { + overlap(); + return 0; +} diff --git a/sycl/test-e2e/MemorySanitizer/track-origins/check_shared_usm.cpp b/sycl/test-e2e/MemorySanitizer/track-origins/check_shared_usm.cpp new file mode 100644 index 0000000000000..4f5a79772c1e8 --- /dev/null +++ b/sycl/test-e2e/MemorySanitizer/track-origins/check_shared_usm.cpp @@ -0,0 +1,33 @@ +// REQUIRES: linux, cpu || (gpu && level_zero) +// RUN: %{build} %device_msan_flags -Xarch_device -fsanitize-memory-track-origins=1 -O0 -g -o %t2.out +// RUN: %{run} %t2.out 2>&1 | FileCheck %s +// RUN: %{build} %device_msan_flags -Xarch_device -fsanitize-memory-track-origins=1 -O1 -g -o %t2.out +// RUN: %{run} %t2.out 2>&1 | FileCheck %s +// RUN: %{build} %device_msan_flags -Xarch_device -fsanitize-memory-track-origins=1 -O2 -g -o %t3.out +// RUN: %{run} %t3.out 2>&1 | FileCheck %s +// RUN: env UR_LAYER_MSAN_OPTIONS=msan_check_host_and_shared_usm:0 %{run} %t3.out 2>&1 | FileCheck %s --check-prefixes CHECK-SHAREDUSM + +#include +#include + +__attribute__((noinline)) int check(int data) { return data; } + +int main() { + sycl::queue Q; + auto *array = sycl::malloc_shared(2, Q); + + Q.submit([&](sycl::handler &h) { + h.single_task([=]() { array[0] = check(array[1]); }); + }); + Q.wait(); + // CHECK-NOT: [kernel] + // CHECK: use-of-uninitialized-value + // CHECK: kernel <{{.*MyKernel}}> + // CHECK: #{{.*}} {{.*check_shared_usm.cpp}}:[[@LINE-6]] + // CHECK: ORIGIN: Shared USM allocation + // CHECK: #{{.*}} {{.*check_shared_usm.cpp}}:[[@LINE-11]] + // CHECK-SHAREDUSM-NOT: use-of-uninitialized-value + + sycl::free(array, Q); + return 0; +} diff --git a/unified-runtime/source/loader/layers/sanitizer/msan/msan_interceptor.cpp b/unified-runtime/source/loader/layers/sanitizer/msan/msan_interceptor.cpp index c80023dec526e..a7edd9394ea7c 100644 --- a/unified-runtime/source/loader/layers/sanitizer/msan/msan_interceptor.cpp +++ b/unified-runtime/source/loader/layers/sanitizer/msan/msan_interceptor.cpp @@ -166,9 +166,9 @@ ur_result_t MsanInterceptor::postLaunchKernel(ur_kernel_handle_t Kernel, return Result; } - ReportUsesUninitializedValue(LaunchInfo.Data.Host.Report, Kernel); - - exitWithErrors(); + if (ReportUsesUninitializedValue(LaunchInfo.Data.Host.Report, Kernel)) { + exitWithErrors(); + } } return Result; diff --git a/unified-runtime/source/loader/layers/sanitizer/msan/msan_report.cpp b/unified-runtime/source/loader/layers/sanitizer/msan/msan_report.cpp index a61be3829973f..31c1cf9fe9269 100644 --- a/unified-runtime/source/loader/layers/sanitizer/msan/msan_report.cpp +++ b/unified-runtime/source/loader/layers/sanitizer/msan/msan_report.cpp @@ -22,7 +22,7 @@ namespace ur_sanitizer_layer { namespace msan { -void ReportUsesUninitializedValue(const MsanErrorReport &Report, +bool ReportUsesUninitializedValue(const MsanErrorReport &Report, ur_kernel_handle_t Kernel) { const char *File = Report.File[0] ? Report.File : ""; const char *Func = Report.Func[0] ? Report.Func : ""; @@ -43,7 +43,7 @@ void ReportUsesUninitializedValue(const MsanErrorReport &Report, Report.Line); if (!Report.Origin) { - return; + return true; } Origin Origin = Origin::FromRawId(Report.Origin); @@ -53,7 +53,11 @@ void ReportUsesUninitializedValue(const MsanErrorReport &Report, UR_LOG_L(getContext()->logger, QUIET, "ORIGIN: {} allocation ({})", ToString(Type), (void *)(uptr)Report.Origin); Stack.print(); + + return !(Type == HeapType::HostUSM || Type == HeapType::SharedUSM); } + + return true; } } // namespace msan diff --git a/unified-runtime/source/loader/layers/sanitizer/msan/msan_report.hpp b/unified-runtime/source/loader/layers/sanitizer/msan/msan_report.hpp index 59de5eeab56bb..2097802e6678e 100644 --- a/unified-runtime/source/loader/layers/sanitizer/msan/msan_report.hpp +++ b/unified-runtime/source/loader/layers/sanitizer/msan/msan_report.hpp @@ -21,7 +21,8 @@ struct MsanErrorReport; namespace msan { -void ReportUsesUninitializedValue(const MsanErrorReport &Report, +// Abort the program if the return value is true +bool ReportUsesUninitializedValue(const MsanErrorReport &Report, ur_kernel_handle_t Kernel); } // namespace msan diff --git a/unified-runtime/source/loader/layers/sanitizer/msan/msan_shadow.cpp b/unified-runtime/source/loader/layers/sanitizer/msan/msan_shadow.cpp index 4940f8f6b79ea..6d57e634e4e52 100644 --- a/unified-runtime/source/loader/layers/sanitizer/msan/msan_shadow.cpp +++ b/unified-runtime/source/loader/layers/sanitizer/msan/msan_shadow.cpp @@ -455,7 +455,7 @@ uptr MsanShadowMemoryPVC::MemToShadow(uptr Ptr) { return Ptr - 0x5000'0000'0000ULL; } // host/shared USM - return (Ptr & 0xff'ffff'ffffULL) + ((Ptr & 0x8000'0000'0000ULL) >> 7) + + return (Ptr & 0xfff'ffff'ffffULL) + ((Ptr & 0x8000'0000'0000ULL) >> 3) + ShadowBegin; } @@ -465,9 +465,9 @@ uptr MsanShadowMemoryPVC::MemToOrigin(uptr Ptr) { return AlignedPtr - 0xA000'0000'0000ULL; } // host/shared USM - return (AlignedPtr & 0xff'ffff'ffffULL) + - ((AlignedPtr & 0x8000'0000'0000ULL) >> 7) + ShadowBegin + - 0x0200'0000'0000ULL; + return (AlignedPtr & 0xfff'ffff'ffffULL) + + ((AlignedPtr & 0x8000'0000'0000ULL) >> 3) + ShadowBegin + + 0x2000'0000'0000ULL; } uptr MsanShadowMemoryDG2::MemToShadow(uptr Ptr) { diff --git a/unified-runtime/source/loader/layers/sanitizer/msan/msan_shadow.hpp b/unified-runtime/source/loader/layers/sanitizer/msan/msan_shadow.hpp index feff5df9747e0..8237e535b44a9 100644 --- a/unified-runtime/source/loader/layers/sanitizer/msan/msan_shadow.hpp +++ b/unified-runtime/source/loader/layers/sanitizer/msan/msan_shadow.hpp @@ -175,21 +175,25 @@ struct MsanShadowMemoryGPU : public MsanShadowMemory { /// Shadow Memory layout of GPU PVC device /// /// USM Allocation Range (56 bits) -/// Host USM : 0x00ff_ff00_0000_0000 ~ 0x00ff_ffff_ffff_ffff -/// Shared USM : 0x0000_7f00_0000_0000 ~ 0x0000_7fff_ffff_ffff +/// Host USM : 0x00ff_f000_0000_0000 ~ 0x00ff_ffff_ffff_ffff +/// Shared USM : 0x0000_7000_0000_0000 ~ 0x0000_7fff_ffff_ffff /// DeviceĀ USM : 0xff00_0000_0000_0000 ~ 0xff00_ffff_ffff_ffff /// /// Shadow Memory Mapping /// 0xff00_0000_0000_0000 - MSAN_SHADOW_BASE : "invalid" -/// MSAN_SHADOW_BASE - MSAN_SHADOW_END1 : "shadow-1" (MSAN_SHADOW_END1 - MSAN_SHADOW_BASE = 0x0200_0000_0000) -/// MSAN_SHADOW_END1 - MSAN_SHADOW_END2 : "origin-1" (MSAN_SHADOW_END1 - MSAN_SHADOW_END2 = 0x0200_0000_0000) -/// (gap) +/// MSAN_SHADOW_BASE - MSAN_SHADOW_END1 : "shadow-1" (MSAN_SHADOW_END1 - MSAN_SHADOW_BASE = 0x2000_0000_0000) +/// MSAN_SHADOW_END1 - MSAN_SHADOW_END2 : "origin-1" (MSAN_SHADOW_END1 - MSAN_SHADOW_END2 = 0x2000_0000_0000) +/// (gap) : (0x1000_0000_0000) /// MSAN_SHADOW_END3 - MSAN_SHADOW_END4 : "origin-2" (MSAN_SHADOW_END4 - MSAN_SHADOW_END3 = 0x5000_0000_0000) /// MSAN_SHADOW_END4 - MSAN_SHADOW_END5 : "shadow-2" (MSAN_SHADOW_END5 - MSAN_SHADOW_END4 = 0x5000_0000_0000) -/// MSAN_SHADOW_END5 - 0xff00_ffff_ffff_ffff : "app" (MSAN_SHADOW_END5 - MSAN_SHADOW_BASE = 0xB400_0000_0000) +/// MSAN_SHADOW_END5 - 0xff00_ffff_ffff_ffff : "app" (MSAN_SHADOW_END5 - MSAN_SHADOW_BASE = 0xF000_0000_0000) /// -/// here, "shadow-1" and "origin-1" is use for host/shared USM, "shadow-2" and "origin-2" is used for device USM, "app" is device USM. -/// the size of "app" is less than 0x5000_0000_0000_0000. We assume "invalid" is not usable for user application. +/// here, +/// - We assume "invalid" is not usable for user application (by observation) +/// - "shadow-1" and "origin-1" is use for host/shared USM +/// - "shadow-2" and "origin-2" is used for device USM +/// - "app" is device USM, the size of "app" is less than 0x5000_0000_0000_0000, so that it can be fully mapped to its shadow +/// - "gap" is necessary, so that "app" can be mapped to its shadow // clang-format on struct MsanShadowMemoryPVC final : public MsanShadowMemoryGPU { MsanShadowMemoryPVC(ur_context_handle_t Context, ur_device_handle_t Device) @@ -200,7 +204,7 @@ struct MsanShadowMemoryPVC final : public MsanShadowMemoryGPU { uptr MemToShadow(uptr Ptr) override; uptr MemToOrigin(uptr Ptr) override; - size_t GetShadowSize() override { return 0xb400'0000'0000ULL; } + size_t GetShadowSize() override { return 0xf000'0000'0000ULL; } uptr GetStartAddress() override { return 0x100'0000'0000'0000ULL; } }; diff --git a/unified-runtime/source/loader/layers/sanitizer/sanitizer_common/sanitizer_allocator.cpp b/unified-runtime/source/loader/layers/sanitizer/sanitizer_common/sanitizer_allocator.cpp index c476f91960323..7ff0c253a3cea 100644 --- a/unified-runtime/source/loader/layers/sanitizer/sanitizer_common/sanitizer_allocator.cpp +++ b/unified-runtime/source/loader/layers/sanitizer/sanitizer_common/sanitizer_allocator.cpp @@ -33,7 +33,8 @@ void validateDeviceUSM(uptr Allocated, DeviceType DeviceType) { void validateSharedUSM(uptr Allocated, DeviceType DeviceType) { switch (DeviceType) { case DeviceType::GPU_PVC: { - assert((Allocated >> 40) == 0x7f); + std::cout << (void *)Allocated << std::endl; + assert((Allocated >> 44) == 0x7); break; default: break; From 063812ed957387fad56059759295482e2fa6ab68 Mon Sep 17 00:00:00 2001 From: "Zhao, Yang2" Date: Wed, 18 Jun 2025 15:28:10 +0200 Subject: [PATCH 15/26] wip --- .../origin-tracking/check_host_usm.cpp | 33 ------------------- 1 file changed, 33 deletions(-) delete mode 100644 sycl/test-e2e/MemorySanitizer/origin-tracking/check_host_usm.cpp diff --git a/sycl/test-e2e/MemorySanitizer/origin-tracking/check_host_usm.cpp b/sycl/test-e2e/MemorySanitizer/origin-tracking/check_host_usm.cpp deleted file mode 100644 index b577fb0aad246..0000000000000 --- a/sycl/test-e2e/MemorySanitizer/origin-tracking/check_host_usm.cpp +++ /dev/null @@ -1,33 +0,0 @@ -// REQUIRES: linux, cpu || (gpu && level_zero) -// RUN: %{build} %device_msan_flags -Xarch_device -fsanitize-memory-track-origins=1 -O0 -g -o %t2.out -// RUN: %{run} not %t2.out 2>&1 | FileCheck %s -// RUN: %{build} %device_msan_flags -Xarch_device -fsanitize-memory-track-origins=1 -O1 -g -o %t2.out -// RUN: %{run} not %t2.out 2>&1 | FileCheck %s -// RUN: %{build} %device_msan_flags -Xarch_device -fsanitize-memory-track-origins=1 -O2 -g -o %t3.out -// RUN: %{run} not %t3.out 2>&1 | FileCheck %s -// RUN: env UR_LAYER_MSAN_OPTIONS=msan_check_host_and_shared_usm:0 %{run} %t3.out 2>&1 | FileCheck %s --check-prefixes CHECK-HOSTUSM - -#include -#include - -__attribute__((noinline)) int check(int data) { return data; } - -int main() { - sycl::queue Q; - auto *array = sycl::malloc_host(2, Q); - - Q.submit([&](sycl::handler &h) { - h.single_task([=]() { array[0] = check(array[1]); }); - }); - Q.wait(); - // CHECK-NOT: [kernel] - // CHECK: use-of-uninitialized-value - // CHECK: kernel <{{.*MyKernel}}> - // CHECK: #{{.*}} {{.*check_host_usm.cpp}}:[[@LINE-6]] - // CHECK: ORIGIN: Host USM allocation - // CHECK: #{{.*}} {{.*check_host_usm.cpp}}:[[@LINE-11]] - // CHECK-HOSTUSM-NOT: use-of-uninitialized-value - - sycl::free(array, Q); - return 0; -} From ab57f6e4e6f4c24513103ce1aba65db160d99597 Mon Sep 17 00:00:00 2001 From: "Zhao, Yang2" Date: Wed, 18 Jun 2025 15:31:46 +0200 Subject: [PATCH 16/26] disable cpu --- sycl/test-e2e/MemorySanitizer/lit.local.cfg | 3 +++ 1 file changed, 3 insertions(+) diff --git a/sycl/test-e2e/MemorySanitizer/lit.local.cfg b/sycl/test-e2e/MemorySanitizer/lit.local.cfg index 291cb45169bf4..dbabae15786c7 100644 --- a/sycl/test-e2e/MemorySanitizer/lit.local.cfg +++ b/sycl/test-e2e/MemorySanitizer/lit.local.cfg @@ -17,6 +17,9 @@ config.unsupported_features += ['spirv-backend'] # https://github.com/intel/llvm/issues/16920 config.unsupported_features += ['arch-intel_gpu_bmg_g21'] +# TRACKER: CMPLRLLVM-68547 +config.unsupported_features += ['cpu'] + config.substitutions.append( ("%device_msan_flags", "-Xarch_device -fsanitize=memory") ) From 26124c72e69b840ff1538a131439d0a88fe049c0 Mon Sep 17 00:00:00 2001 From: "Zhao, Yang2" Date: Fri, 20 Jun 2025 03:42:36 +0200 Subject: [PATCH 17/26] fix usm copy --- unified-runtime/source/loader/CMakeLists.txt | 1 - .../loader/layers/sanitizer/msan/msan_ddi.cpp | 162 +++++++++++------- .../sanitizer/msan/msan_interceptor.cpp | 2 +- .../layers/sanitizer/msan/msan_origin.cpp | 19 -- .../sanitizer_common/sanitizer_stackdepot.cpp | 1 - .../sanitizer_common/sanitizer_stackdepot.hpp | 1 + .../sanitizer_common/sanitizer_utils.cpp | 9 + .../sanitizer_common/sanitizer_utils.hpp | 1 + 8 files changed, 114 insertions(+), 82 deletions(-) delete mode 100644 unified-runtime/source/loader/layers/sanitizer/msan/msan_origin.cpp diff --git a/unified-runtime/source/loader/CMakeLists.txt b/unified-runtime/source/loader/CMakeLists.txt index 8e1e9aa9d3604..a36ff4f6988a7 100644 --- a/unified-runtime/source/loader/CMakeLists.txt +++ b/unified-runtime/source/loader/CMakeLists.txt @@ -164,7 +164,6 @@ if(UR_ENABLE_SANITIZER) ${CMAKE_CURRENT_SOURCE_DIR}/layers/sanitizer/msan/msan_interceptor.cpp ${CMAKE_CURRENT_SOURCE_DIR}/layers/sanitizer/msan/msan_interceptor.hpp ${CMAKE_CURRENT_SOURCE_DIR}/layers/sanitizer/msan/msan_libdevice.hpp - ${CMAKE_CURRENT_SOURCE_DIR}/layers/sanitizer/msan/msan_origin.cpp ${CMAKE_CURRENT_SOURCE_DIR}/layers/sanitizer/msan/msan_origin.hpp ${CMAKE_CURRENT_SOURCE_DIR}/layers/sanitizer/msan/msan_report.cpp ${CMAKE_CURRENT_SOURCE_DIR}/layers/sanitizer/msan/msan_report.hpp diff --git a/unified-runtime/source/loader/layers/sanitizer/msan/msan_ddi.cpp b/unified-runtime/source/loader/layers/sanitizer/msan/msan_ddi.cpp index b7d7fd7404b26..b40ce477df1ec 100644 --- a/unified-runtime/source/loader/layers/sanitizer/msan/msan_ddi.cpp +++ b/unified-runtime/source/loader/layers/sanitizer/msan/msan_ddi.cpp @@ -1481,8 +1481,10 @@ ur_result_t UR_APICALL urEnqueueUSMFill( Events.push_back(Event); { - ur_device_handle_t Device = GetDevice(hQueue); - const auto &DeviceInfo = getMsanInterceptor()->getDeviceInfo(Device); + ur_context_handle_t hContext = GetContext(hQueue); + ur_device_handle_t hDevice = GetUSMAllocDevice(hContext, pMem); + assert(hDevice); + const auto &DeviceInfo = getMsanInterceptor()->getDeviceInfo(hDevice); const auto MemShadow = DeviceInfo->Shadow->MemToShadow((uptr)pMem); ur_event_handle_t Event = nullptr; @@ -1537,32 +1539,51 @@ ur_result_t UR_APICALL urEnqueueUSMMemcpy( phEventWaitList, &Event)); Events.push_back(Event); - { - ur_device_handle_t Device = GetDevice(hQueue); - const auto &DeviceInfo = getMsanInterceptor()->getDeviceInfo(Device); - const auto SrcShadow = DeviceInfo->Shadow->MemToShadow((uptr)pSrc); - const auto DstShadow = DeviceInfo->Shadow->MemToShadow((uptr)pDst); + ur_context_handle_t hContext = GetContext(hQueue); + bool IsSrcUSM = IsUSM(hContext, pSrc); + bool IsDstUSM = IsUSM(hContext, pDst); - ur_event_handle_t Event = nullptr; - UR_CALL(pfnUSMMemcpy(hQueue, blocking, (void *)DstShadow, (void *)SrcShadow, - size, 0, nullptr, &Event)); - Events.push_back(Event); - } + if (IsSrcUSM && IsDstUSM) { + ur_device_handle_t SrcDevice = GetUSMAllocDevice(hContext, pSrc); + ur_device_handle_t DstDevice = GetUSMAllocDevice(hContext, pDst); + assert(SrcDevice && DstDevice); + const auto SrcDI = getMsanInterceptor()->getDeviceInfo(SrcDevice); + const auto DstDI = getMsanInterceptor()->getDeviceInfo(DstDevice); + { + const auto SrcShadow = SrcDI->Shadow->MemToShadow((uptr)pSrc); + const auto DstShadow = DstDI->Shadow->MemToShadow((uptr)pDst); - { - ur_device_handle_t Device = GetDevice(hQueue); - const auto &DeviceInfo = getMsanInterceptor()->getDeviceInfo(Device); - const auto SrcOriginBegin = DeviceInfo->Shadow->MemToOrigin((uptr)pSrc); - const auto SrcOriginEnd = - DeviceInfo->Shadow->MemToOrigin((uptr)pSrc + size - 1) + - MSAN_ORIGIN_GRANULARITY; - const auto DstOrigin = DeviceInfo->Shadow->MemToOrigin((uptr)pDst); + ur_event_handle_t Event = nullptr; + UR_CALL(pfnUSMMemcpy(hQueue, blocking, (void *)DstShadow, + (void *)SrcShadow, size, 0, nullptr, &Event)); + Events.push_back(Event); + } + { + const auto SrcOriginBegin = SrcDI->Shadow->MemToOrigin((uptr)pSrc); + const auto SrcOriginEnd = + SrcDI->Shadow->MemToOrigin((uptr)pSrc + size - 1) + + MSAN_ORIGIN_GRANULARITY; + const auto DstOrigin = DstDI->Shadow->MemToOrigin((uptr)pDst); - ur_event_handle_t Event = nullptr; - UR_CALL(pfnUSMMemcpy(hQueue, blocking, (void *)DstOrigin, - (void *)SrcOriginBegin, SrcOriginEnd - SrcOriginBegin, - 0, nullptr, &Event)); - Events.push_back(Event); + ur_event_handle_t Event = nullptr; + UR_CALL(pfnUSMMemcpy(hQueue, blocking, (void *)DstOrigin, + (void *)SrcOriginBegin, + SrcOriginEnd - SrcOriginBegin, 0, nullptr, &Event)); + Events.push_back(Event); + } + } else if (IsDstUSM) { + // FIXME: Assume host memory is always initialized memory, but the better + // way may enable host-side Msan as well + ur_device_handle_t DstDevice = GetUSMAllocDevice(hContext, pDst); + assert(DstDevice); + const auto DstDI = getMsanInterceptor()->getDeviceInfo(DstDevice); + { + const auto DstShadow = DstDI->Shadow->MemToShadow((uptr)pDst); + ur_event_handle_t Event = nullptr; + UR_CALL(EnqueueUSMBlockingSet(hQueue, (void *)DstShadow, (char)0, size, 0, + nullptr, &Event)); + Events.push_back(Event); + } } if (phEvent) { @@ -1616,8 +1637,10 @@ ur_result_t UR_APICALL urEnqueueUSMFill2D( Events.push_back(Event); { - ur_device_handle_t Device = GetDevice(hQueue); - const auto &DeviceInfo = getMsanInterceptor()->getDeviceInfo(Device); + ur_context_handle_t hContext = GetContext(hQueue); + ur_device_handle_t hDevice = GetUSMAllocDevice(hContext, pMem); + assert(hDevice); + const auto &DeviceInfo = getMsanInterceptor()->getDeviceInfo(hDevice); const auto MemShadow = DeviceInfo->Shadow->MemToShadow((uptr)pMem); const char Pattern = 0; @@ -1681,45 +1704,64 @@ ur_result_t UR_APICALL urEnqueueUSMMemcpy2D( &Event)); Events.push_back(Event); - { - ur_device_handle_t Device = GetDevice(hQueue); - const auto &DeviceInfo = getMsanInterceptor()->getDeviceInfo(Device); - const auto SrcShadow = DeviceInfo->Shadow->MemToShadow((uptr)pSrc); - const auto DstShadow = DeviceInfo->Shadow->MemToShadow((uptr)pDst); + ur_context_handle_t hContext = GetContext(hQueue); + bool IsSrcUSM = IsUSM(hContext, pSrc); + bool IsDstUSM = IsUSM(hContext, pDst); - ur_event_handle_t Event = nullptr; - UR_CALL(pfnUSMMemcpy2D(hQueue, blocking, (void *)DstShadow, dstPitch, - (void *)SrcShadow, srcPitch, width, height, 0, - nullptr, &Event)); - Events.push_back(Event); - } + if (IsSrcUSM && IsDstUSM) { + ur_device_handle_t SrcDevice = GetUSMAllocDevice(hContext, pSrc); + ur_device_handle_t DstDevice = GetUSMAllocDevice(hContext, pDst); + assert(SrcDevice && DstDevice); + const auto SrcDI = getMsanInterceptor()->getDeviceInfo(SrcDevice); + const auto DstDI = getMsanInterceptor()->getDeviceInfo(DstDevice); + { + const auto SrcShadow = SrcDI->Shadow->MemToShadow((uptr)pSrc); + const auto DstShadow = DstDI->Shadow->MemToShadow((uptr)pDst); - { - ur_device_handle_t Device = GetDevice(hQueue); - const auto &DeviceInfo = getMsanInterceptor()->getDeviceInfo(Device); + ur_event_handle_t Event = nullptr; + UR_CALL(pfnUSMMemcpy2D(hQueue, blocking, (void *)DstShadow, dstPitch, + (void *)SrcShadow, srcPitch, width, height, 0, + nullptr, &Event)); + Events.push_back(Event); + } - auto pfnUSMMemcpy = getContext()->urDdiTable.Enqueue.pfnUSMMemcpy; + { + auto pfnUSMMemcpy = getContext()->urDdiTable.Enqueue.pfnUSMMemcpy; - std::vector WaitEvents(numEventsInWaitList); - for (uint32_t i = 0; i < numEventsInWaitList; i++) { - WaitEvents[i] = phEventWaitList[i]; - } + std::vector WaitEvents(numEventsInWaitList); + for (uint32_t i = 0; i < numEventsInWaitList; i++) { + WaitEvents[i] = phEventWaitList[i]; + } - for (size_t HeightIndex = 0; HeightIndex < height; HeightIndex++) { - ur_event_handle_t Event = nullptr; - const auto DstOrigin = - DeviceInfo->Shadow->MemToOrigin((uptr)pDst + dstPitch * HeightIndex); - const auto SrcOrigin = - DeviceInfo->Shadow->MemToOrigin((uptr)pSrc + srcPitch * HeightIndex); - const auto SrcOriginEnd = - DeviceInfo->Shadow->MemToOrigin((uptr)pSrc + srcPitch * HeightIndex + - width - 1) + - MSAN_ORIGIN_GRANULARITY; - pfnUSMMemcpy(hQueue, false, (void *)DstOrigin, (void *)SrcOrigin, - SrcOriginEnd - SrcOrigin, WaitEvents.size(), - WaitEvents.data(), &Event); - Events.push_back(Event); + for (size_t HeightIndex = 0; HeightIndex < height; HeightIndex++) { + ur_event_handle_t Event = nullptr; + const auto DstOrigin = + DstDI->Shadow->MemToOrigin((uptr)pDst + dstPitch * HeightIndex); + const auto SrcOrigin = + SrcDI->Shadow->MemToOrigin((uptr)pSrc + srcPitch * HeightIndex); + const auto SrcOriginEnd = + SrcDI->Shadow->MemToOrigin((uptr)pSrc + srcPitch * HeightIndex + + width - 1) + + MSAN_ORIGIN_GRANULARITY; + pfnUSMMemcpy(hQueue, false, (void *)DstOrigin, (void *)SrcOrigin, + SrcOriginEnd - SrcOrigin, WaitEvents.size(), + WaitEvents.data(), &Event); + Events.push_back(Event); + } } + } else if (IsDstUSM) { + // FIXME: Assume host memory is always initialized memory, but the better + // way may enable host-side Msan as well + ur_device_handle_t DstDevice = GetUSMAllocDevice(hContext, pDst); + assert(DstDevice); + const auto DstDI = getMsanInterceptor()->getDeviceInfo(DstDevice); + const auto DstShadow = DstDI->Shadow->MemToShadow((uptr)pDst); + const char Pattern = 0; + ur_event_handle_t Event = nullptr; + UR_CALL(getContext()->urDdiTable.Enqueue.pfnUSMFill2D( + hQueue, (void *)DstShadow, dstPitch, 1, &Pattern, width, height, 0, + nullptr, &Event)); + Events.push_back(Event); } if (phEvent) { diff --git a/unified-runtime/source/loader/layers/sanitizer/msan/msan_interceptor.cpp b/unified-runtime/source/loader/layers/sanitizer/msan/msan_interceptor.cpp index a7edd9394ea7c..c339e1cd5dd5a 100644 --- a/unified-runtime/source/loader/layers/sanitizer/msan/msan_interceptor.cpp +++ b/unified-runtime/source/loader/layers/sanitizer/msan/msan_interceptor.cpp @@ -52,7 +52,7 @@ ur_result_t MsanInterceptor::allocateMemory(ur_context_handle_t Context, auto ContextInfo = getContextInfo(Context); std::shared_ptr DI = Device ? getDeviceInfo(Device) : nullptr; - uint32_t Alignment = Properties ? Properties->align : 4; + uint32_t Alignment = Properties ? Properties->align : MSAN_ORIGIN_GRANULARITY; // Alignment must be zero or a power-of-two if (0 != (Alignment & (Alignment - 1))) { return UR_RESULT_ERROR_INVALID_ARGUMENT; diff --git a/unified-runtime/source/loader/layers/sanitizer/msan/msan_origin.cpp b/unified-runtime/source/loader/layers/sanitizer/msan/msan_origin.cpp deleted file mode 100644 index ef0a0dfd1fff5..0000000000000 --- a/unified-runtime/source/loader/layers/sanitizer/msan/msan_origin.cpp +++ /dev/null @@ -1,19 +0,0 @@ -/* - * - * Copyright (C) 2025 Intel Corporation - * - * Part of the Unified-Runtime Project, under the Apache License v2.0 with LLVM - * Exceptions. See LICENSE.TXT - * - * SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception - * - * @file msan_origin.cpp - * - */ - -#include "msan_origin.hpp" -#include "ur_sanitizer_layer.hpp" - -namespace ur_sanitizer_layer { -namespace msan {} // namespace msan -} // namespace ur_sanitizer_layer diff --git a/unified-runtime/source/loader/layers/sanitizer/sanitizer_common/sanitizer_stackdepot.cpp b/unified-runtime/source/loader/layers/sanitizer/sanitizer_common/sanitizer_stackdepot.cpp index 361b08462e579..8637a5034b517 100644 --- a/unified-runtime/source/loader/layers/sanitizer/sanitizer_common/sanitizer_stackdepot.cpp +++ b/unified-runtime/source/loader/layers/sanitizer/sanitizer_common/sanitizer_stackdepot.cpp @@ -13,7 +13,6 @@ #include "sanitizer_stackdepot.hpp" -#include #include namespace ur_sanitizer_layer { diff --git a/unified-runtime/source/loader/layers/sanitizer/sanitizer_common/sanitizer_stackdepot.hpp b/unified-runtime/source/loader/layers/sanitizer/sanitizer_common/sanitizer_stackdepot.hpp index 7c35a26575ab2..c8d2a522ab13c 100644 --- a/unified-runtime/source/loader/layers/sanitizer/sanitizer_common/sanitizer_stackdepot.hpp +++ b/unified-runtime/source/loader/layers/sanitizer/sanitizer_common/sanitizer_stackdepot.hpp @@ -35,6 +35,7 @@ inline const char *ToString(HeapType Type) { } } +// Save/load stack with corresponding stack id void StackDepotPut(uint32_t Id, StackTrace &Stack); StackTrace StackDepotGet(uint32_t Id); diff --git a/unified-runtime/source/loader/layers/sanitizer/sanitizer_common/sanitizer_utils.cpp b/unified-runtime/source/loader/layers/sanitizer/sanitizer_common/sanitizer_utils.cpp index e8740966c3648..ce21b5496616f 100644 --- a/unified-runtime/source/loader/layers/sanitizer/sanitizer_common/sanitizer_utils.cpp +++ b/unified-runtime/source/loader/layers/sanitizer/sanitizer_common/sanitizer_utils.cpp @@ -123,6 +123,15 @@ std::string GetKernelName(ur_kernel_handle_t Kernel) { return std::string(KernelNameBuf.data(), KernelNameSize - 1); } +bool IsUSM(ur_context_handle_t Context, const void *MemPtr) { + ur_usm_type_t USMType = UR_USM_TYPE_UNKNOWN; + auto Result = getContext()->urDdiTable.USM.pfnGetMemAllocInfo( + Context, MemPtr, UR_USM_ALLOC_INFO_TYPE, sizeof(USMType), &USMType, + nullptr); + assert(Result == UR_RESULT_SUCCESS); + return USMType != UR_USM_TYPE_UNKNOWN; +} + ur_device_handle_t GetUSMAllocDevice(ur_context_handle_t Context, const void *MemPtr) { ur_device_handle_t Device{}; diff --git a/unified-runtime/source/loader/layers/sanitizer/sanitizer_common/sanitizer_utils.hpp b/unified-runtime/source/loader/layers/sanitizer/sanitizer_common/sanitizer_utils.hpp index 5ff0434e80847..9291d1bcc3dac 100644 --- a/unified-runtime/source/loader/layers/sanitizer/sanitizer_common/sanitizer_utils.hpp +++ b/unified-runtime/source/loader/layers/sanitizer/sanitizer_common/sanitizer_utils.hpp @@ -50,6 +50,7 @@ bool GetDeviceUSMCapability(ur_device_handle_t Device, std::string GetKernelName(ur_kernel_handle_t Kernel); size_t GetDeviceLocalMemorySize(ur_device_handle_t Device); ur_program_handle_t GetProgram(ur_kernel_handle_t Kernel); +bool IsUSM(ur_context_handle_t Context, const void *MemPtr); ur_device_handle_t GetUSMAllocDevice(ur_context_handle_t Context, const void *MemPtr); uint32_t GetKernelNumArgs(ur_kernel_handle_t Kernel); From cca3f5d27ef6b132436a2b683dfb2fb599ea3d7d Mon Sep 17 00:00:00 2001 From: "Zhao, Yang2" Date: Fri, 20 Jun 2025 03:44:00 +0200 Subject: [PATCH 18/26] fix usm copy --- libdevice/sanitizer/msan_rtl.cpp | 40 +++++++++++++------------------- 1 file changed, 16 insertions(+), 24 deletions(-) diff --git a/libdevice/sanitizer/msan_rtl.cpp b/libdevice/sanitizer/msan_rtl.cpp index 6976911f88b98..3b0080ec42e51 100644 --- a/libdevice/sanitizer/msan_rtl.cpp +++ b/libdevice/sanitizer/msan_rtl.cpp @@ -84,11 +84,9 @@ inline void ConvertGenericPointer(uptr &addr, uint32_t &as) { MSAN_DEBUG(__spirv_ocl_printf(__msan_print_generic_to, old, addr, as)); } -void __msan_internal_report_save(const uint32_t size, - const char __SYCL_CONSTANT__ *file, - const uint32_t line, - const char __SYCL_CONSTANT__ *func, - const uint32_t origin) { +void SaveReport(const uint32_t size, const char __SYCL_CONSTANT__ *file, + const uint32_t line, const char __SYCL_CONSTANT__ *func, + const uint32_t origin = 0) { const int Expected = MSAN_REPORT_NONE; int Desired = MSAN_REPORT_START; @@ -138,14 +136,6 @@ void __msan_internal_report_save(const uint32_t size, } } -void __msan_report_error(const uint32_t size, - const char __SYCL_CONSTANT__ *file, - const uint32_t line, - const char __SYCL_CONSTANT__ *func, - uint32_t origin = 0) { - __msan_internal_report_save(size, file, line, func, origin); -} - inline uptr MemToShadow_CPU(uptr addr) { return addr ^ 0x500000000000ULL; } inline uptr MemToShadow_DG2(uptr addr, uint32_t as) { @@ -301,11 +291,18 @@ inline uptr MemToOrigin(uptr addr, uint32_t as) { return origin_ptr; } -inline void __msan_exit() { +inline void Exit() { if (!GetMsanLaunchInfo->IsRecover) __devicelib_exit(); } +inline void ReportError(const uint32_t size, const char __SYCL_CONSTANT__ *file, + const uint32_t line, const char __SYCL_CONSTANT__ *func, + const uint32_t origin = 0) { + SaveReport(size, file, line, func, origin); + Exit(); +} + // This function is only used for shadow propagation template void GroupAsyncCopy(uptr Dest, uptr Src, size_t NumElements, size_t Stride) { @@ -373,8 +370,7 @@ inline void UnpoisonShadow(uptr addr, uint32_t as, size_t size) { if (!GetMsanLaunchInfo) \ return; \ if (UNLIKELY(s)) { \ - __msan_report_error(size, file, line, func, o); \ - __msan_exit(); \ + ReportError(size, file, line, func, o); \ } \ } @@ -388,8 +384,7 @@ __msan_warning(const char __SYCL_CONSTANT__ *file, uint32_t line, const char __SYCL_CONSTANT__ *func) { if (!GetMsanLaunchInfo) return; - __msan_report_error(1, file, line, func); - __msan_exit(); + ReportError(1, file, line, func); } DEVICE_EXTERN_C_NOINLINE void @@ -397,8 +392,7 @@ __msan_warning_noreturn(const char __SYCL_CONSTANT__ *file, uint32_t line, const char __SYCL_CONSTANT__ *func) { if (!GetMsanLaunchInfo) return; - __msan_internal_report_save(1, file, line, func, 0); - __msan_exit(); + ReportError(1, file, line, func, 0); } DEVICE_EXTERN_C_NOINLINE void @@ -406,8 +400,7 @@ __msan_warning_with_origin(uint32_t origin, const char __SYCL_CONSTANT__ *file, uint32_t line, const char __SYCL_CONSTANT__ *func) { if (!GetMsanLaunchInfo) return; - __msan_internal_report_save(1, file, line, func, origin); - __msan_exit(); + ReportError(1, file, line, func, origin); } DEVICE_EXTERN_C_NOINLINE void __msan_warning_with_origin_noreturn( @@ -415,8 +408,7 @@ DEVICE_EXTERN_C_NOINLINE void __msan_warning_with_origin_noreturn( const char __SYCL_CONSTANT__ *func) { if (!GetMsanLaunchInfo) return; - __msan_internal_report_save(1, file, line, func, origin); - __msan_exit(); + ReportError(1, file, line, func, origin); } // For mapping detail, ref to From af499f0fb65df6cffa8c8019bf375970920a93f3 Mon Sep 17 00:00:00 2001 From: "Zhao, Yang2" Date: Fri, 20 Jun 2025 05:22:50 +0200 Subject: [PATCH 19/26] fix cpu --- libdevice/sanitizer/msan_rtl.cpp | 28 +++++++---- sycl/test-e2e/MemorySanitizer/lit.local.cfg | 3 -- .../layers/sanitizer/msan/msan_shadow.cpp | 47 ++++++++++--------- 3 files changed, 45 insertions(+), 33 deletions(-) diff --git a/libdevice/sanitizer/msan_rtl.cpp b/libdevice/sanitizer/msan_rtl.cpp index 3b0080ec42e51..89a6a322ad913 100644 --- a/libdevice/sanitizer/msan_rtl.cpp +++ b/libdevice/sanitizer/msan_rtl.cpp @@ -36,7 +36,7 @@ const __SYCL_CONSTANT__ char __msan_print_shadow[] = "[kernel] __msan_get_shadow(addr=%p, as=%d) = %p: %02X\n"; const __SYCL_CONSTANT__ char __msan_print_origin[] = - "[kernel] __msan_get_origin(addr=%p, as=%d) = %p: %02X\n"; + "[kernel] __msan_get_origin(addr=%p, as=%d) = %p: %04x\n"; const __SYCL_CONSTANT__ char __msan_print_unsupport_device_type[] = "[kernel] Unsupport device type: %d\n"; @@ -136,6 +136,7 @@ void SaveReport(const uint32_t size, const char __SYCL_CONSTANT__ *file, } } +// The CPU mapping is based on compiler-rt/msan inline uptr MemToShadow_CPU(uptr addr) { return addr ^ 0x500000000000ULL; } inline uptr MemToShadow_DG2(uptr addr, uint32_t as) { @@ -236,7 +237,10 @@ inline uptr MemToShadow(uptr addr, uint32_t as) { return shadow_ptr; } -inline uptr MemToOrigin_CPU(uptr addr) { return addr ^ 0x100000000000ULL; } +// The CPU mapping is based on compiler-rt/msan +inline uptr MemToOrigin_CPU(uptr addr) { + return MemToShadow_CPU(addr) + 0x100000000000ULL; +} inline uptr MemToOrigin_DG2(uptr addr, uint32_t as) { return GetMsanLaunchInfo->CleanShadow; @@ -286,7 +290,7 @@ inline uptr MemToOrigin(uptr addr, uint32_t as) { #endif MSAN_DEBUG(__spirv_ocl_printf(__msan_print_origin, (void *)addr, as, - (void *)origin_ptr, 0)); + (void *)origin_ptr, *(uint32_t *)origin_ptr)); return origin_ptr; } @@ -313,8 +317,9 @@ void GroupAsyncCopy(uptr Dest, uptr Src, size_t NumElements, size_t Stride) { } } -static __SYCL_CONSTANT__ const char __msan_print_memcpy[] = - "[kernel] memcpy(dst=%p, src=%p, shadow_dst=%p, shadow_src=%p, size=%p)\n"; +static __SYCL_CONSTANT__ const char __msan_print_copy_shadow[] = + "[kernel] CopyShadow(dst=%p(%d), src=%p(%d), shadow_dst=%p, shadow_src=%p, " + "size=%p)\n"; // FIXME: The original implemention only copies the origin of poisoned memories void CopyOrigin(uptr dst, uint32_t dst_as, uptr src, uint32_t src_as, @@ -332,10 +337,14 @@ inline void CopyShadowAndOrigin(uptr dst, uint32_t dst_as, uptr src, auto *shadow_src = (__SYCL_GLOBAL__ char *)MemToShadow(src, src_as); Memcpy(shadow_dst, shadow_src, size); CopyOrigin(dst, dst_as, src, src_as, size); + + MSAN_DEBUG(__spirv_ocl_printf(__msan_print_copy_shadow, dst, dst_as, src, + src_as, shadow_dst, shadow_src)); } -static __SYCL_CONSTANT__ const char __msan_print_memmove[] = - "[kernel] memmove(dst=%p, src=%p, shadow_dst=%p, shadow_src=%p, size=%p)\n"; +static __SYCL_CONSTANT__ const char __msan_print_move_shadow[] = + "[kernel] MoveShadow(dst=%p(%d), src=%p(%d), shadow_dst=%p, shadow_src=%p, " + "size=%p)\n"; // FIXME: The original implemention only moves the origin of poisoned memories void MoveOrigin(uptr dst, uint32_t dst_as, uptr src, uint32_t src_as, @@ -354,6 +363,9 @@ inline void MoveShadowAndOrigin(uptr dst, uint32_t dst_as, uptr src, // MoveOrigin transfers origins by refering to their shadows MoveOrigin(dst, dst_as, src, src_as, size); Memmove(shadow_dst, shadow_src, size); + + MSAN_DEBUG(__spirv_ocl_printf(__msan_print_move_shadow, dst, dst_as, src, + src_as, shadow_dst, shadow_src)); } inline void UnpoisonShadow(uptr addr, uint32_t as, size_t size) { @@ -433,7 +445,7 @@ DEVICE_EXTERN_C_NOINLINE __SYCL_GLOBAL__ void *__msan_get_origin(uptr addr, DEVICE_EXTERN_C_NOINLINE void __msan_maybe_store_origin_##size( \ type s, uptr addr, uint32_t as, uint32_t o) { \ if (UNLIKELY(s)) { \ - *(__SYCL_GLOBAL__ u32 *)__msan_get_origin(addr, as) = o; \ + *(__SYCL_GLOBAL__ u32 *)MemToOrigin(addr, as) = o; \ } \ } diff --git a/sycl/test-e2e/MemorySanitizer/lit.local.cfg b/sycl/test-e2e/MemorySanitizer/lit.local.cfg index dbabae15786c7..291cb45169bf4 100644 --- a/sycl/test-e2e/MemorySanitizer/lit.local.cfg +++ b/sycl/test-e2e/MemorySanitizer/lit.local.cfg @@ -17,9 +17,6 @@ config.unsupported_features += ['spirv-backend'] # https://github.com/intel/llvm/issues/16920 config.unsupported_features += ['arch-intel_gpu_bmg_g21'] -# TRACKER: CMPLRLLVM-68547 -config.unsupported_features += ['cpu'] - config.substitutions.append( ("%device_msan_flags", "-Xarch_device -fsanitize=memory") ) diff --git a/unified-runtime/source/loader/layers/sanitizer/msan/msan_shadow.cpp b/unified-runtime/source/loader/layers/sanitizer/msan/msan_shadow.cpp index 6d57e634e4e52..2bd69710c57d9 100644 --- a/unified-runtime/source/loader/layers/sanitizer/msan/msan_shadow.cpp +++ b/unified-runtime/source/loader/layers/sanitizer/msan/msan_shadow.cpp @@ -107,6 +107,8 @@ ur_result_t MsanShadowMemoryCPU::Setup() { } } } + ShadowBegin = kMemoryLayout[1].start; + ShadowEnd = kMemoryLayout[9].end; return UR_RESULT_SUCCESS; }(); return Result; @@ -141,7 +143,7 @@ uptr MsanShadowMemoryCPU::MemToShadow(uptr Ptr) { return MEM_TO_SHADOW(Ptr); } uptr MsanShadowMemoryCPU::MemToOrigin(uptr Ptr) { uptr AlignedPtr = RoundDownTo(Ptr, MSAN_ORIGIN_GRANULARITY); - return SHADOW_TO_ORIGIN(AlignedPtr); + return SHADOW_TO_ORIGIN(MEM_TO_SHADOW(AlignedPtr)); } ur_result_t MsanShadowMemoryCPU::EnqueuePoisonShadow( @@ -156,29 +158,30 @@ ur_result_t MsanShadowMemoryCPU::EnqueuePoisonShadowWithOrigin( uint32_t NumEvents, const ur_event_handle_t *EventWaitList, ur_event_handle_t *OutEvent) { if (Size) { - const uptr ShadowBegin = MemToShadow(Ptr); - const uptr ShadowEnd = MemToShadow(Ptr + Size - 1); - assert(ShadowBegin <= ShadowEnd); - UR_LOG_L(getContext()->logger, DEBUG, - "EnqueuePoisonShadow(addr={}, count={}, value={})", - (void *)ShadowBegin, ShadowEnd - ShadowBegin + 1, - (void *)(uptr)Value); - memset((void *)ShadowBegin, Value, ShadowEnd - ShadowBegin + 1); + { + const uptr ShadowBegin = MemToShadow(Ptr); + const uptr ShadowEnd = MemToShadow(Ptr + Size - 1); + assert(ShadowBegin <= ShadowEnd); + UR_LOG_L(getContext()->logger, DEBUG, + "EnqueuePoisonShadow(addr={}, count={}, value={})", + (void *)ShadowBegin, ShadowEnd - ShadowBegin + 1, + (void *)(uptr)Value); + memset((void *)ShadowBegin, Value, ShadowEnd - ShadowBegin + 1); + } + { + const uptr OriginBegin = MemToOrigin(Ptr); + const uptr OriginEnd = + MemToOrigin(Ptr + Size - 1) + MSAN_ORIGIN_GRANULARITY; + assert(OriginBegin <= OriginEnd); + UR_LOG_L(getContext()->logger, DEBUG, + "EnqueuePoisonOrigin(addr={}, count={}, value={})", + (void *)OriginBegin, OriginEnd - OriginBegin + 1, + (void *)(uptr)Origin); + // memset((void *)OriginBegin, Value, OriginEnd - OriginBegin + 1); + std::fill((uint32_t *)OriginBegin, (uint32_t *)OriginEnd, Origin); + } } - // if (Size) { - // const uptr OriginBegin = MemToOrigin(Ptr); - // const uptr OriginEnd = - // MemToOrigin(Ptr + Size - 1) + MSAN_ORIGIN_GRANULARITY; - // assert(OriginBegin <= OriginEnd); - // UR_LOG_L(getContext()->logger, DEBUG, - // "EnqueuePoisonOrigin(addr={}, count={}, value={})", - // (void *)OriginBegin, OriginEnd - OriginBegin + 1, - // (void *)(uptr)Origin); - // // memset((void *)OriginBegin, Value, OriginEnd - OriginBegin + 1); - // std::fill((uint32_t *)OriginBegin, (uint32_t *)OriginEnd, Origin); - // } - if (OutEvent) { UR_CALL(getContext()->urDdiTable.Enqueue.pfnEventsWait( Queue, NumEvents, EventWaitList, OutEvent)); From 75f2f7804967ffffaf4593d471fcdc1edeaf3a20 Mon Sep 17 00:00:00 2001 From: "Zhao, Yang2" Date: Fri, 20 Jun 2025 07:49:14 +0200 Subject: [PATCH 20/26] fix llvm ir test --- .../SPIRV/instrument_global_address_space.ll | 8 ++++---- .../MemorySanitizer/SPIRV/instrument_private_mem.ll | 2 +- .../MemorySanitizer/SPIRV/instrument_static_local_mem.ll | 2 +- 3 files changed, 6 insertions(+), 6 deletions(-) diff --git a/llvm/test/Instrumentation/MemorySanitizer/SPIRV/instrument_global_address_space.ll b/llvm/test/Instrumentation/MemorySanitizer/SPIRV/instrument_global_address_space.ll index bb71ec9a436f7..bf3496d6667ba 100644 --- a/llvm/test/Instrumentation/MemorySanitizer/SPIRV/instrument_global_address_space.ll +++ b/llvm/test/Instrumentation/MemorySanitizer/SPIRV/instrument_global_address_space.ll @@ -13,12 +13,12 @@ define spir_kernel void @MyKernel(ptr addrspace(1) noundef align 4 %_arg_array) entry: %0 = load i32, ptr addrspace(1) %_arg_array, align 4 ; CHECK: %1 = ptrtoint ptr addrspace(1) %_arg_array to i64 - ; CHECK-NEXT: %2 = call ptr addrspace(1) @__msan_get_shadow(i64 %1, i32 1, ptr addrspace(2) null) + ; CHECK-NEXT: %2 = call ptr addrspace(1) @__msan_get_shadow(i64 %1, i32 1) ; CHECK-NEXT: %_msld = load i32, ptr addrspace(1) %2, align 4 - ; CHECK-NEXT: call void @__msan_maybe_warning_4(i32 zeroext %_msld, i64 zeroext 0, ptr addrspace(2) null, i32 0, ptr addrspace(2) @__msan_kernel) + ; CHECK-NEXT: call void @__msan_maybe_warning_4(i32 zeroext %_msld, i32 zeroext 0, ptr addrspace(2) null, i32 0, ptr addrspace(2) @__msan_kernel) %call = call spir_func i32 @foo(i32 %0) ; CHECK: %3 = ptrtoint ptr addrspace(1) %_arg_array to i64 - ; CHECK-NEXT: %4 = call ptr addrspace(1) @__msan_get_shadow(i64 %3, i32 1, ptr addrspace(2) null) + ; CHECK-NEXT: %4 = call ptr addrspace(1) @__msan_get_shadow(i64 %3, i32 1) ; CHECK-NEXT: store i32 0, ptr addrspace(1) %4, align 4 store i32 %call, ptr addrspace(1) %_arg_array, align 4 ret void @@ -31,4 +31,4 @@ entry: } ; CHECK: attributes [[ATTR0]] -; CHECK-SAME: "sycl-device-global-size"="32" "sycl-device-image-scope" "sycl-host-access"="0" "sycl-unique-id"="__MsanKernelMetadata3ff767e9a7a43f1f3968062dbb4ee3b4" +; CHECK-SAME: "sycl-device-global-size"="24" "sycl-device-image-scope" "sycl-host-access"="0" "sycl-unique-id"="__MsanKernelMetadata3ff767e9a7a43f1f3968062dbb4ee3b4" diff --git a/llvm/test/Instrumentation/MemorySanitizer/SPIRV/instrument_private_mem.ll b/llvm/test/Instrumentation/MemorySanitizer/SPIRV/instrument_private_mem.ll index f6e6aeb6f2ad2..2c8aa588d4efa 100644 --- a/llvm/test/Instrumentation/MemorySanitizer/SPIRV/instrument_private_mem.ll +++ b/llvm/test/Instrumentation/MemorySanitizer/SPIRV/instrument_private_mem.ll @@ -20,7 +20,7 @@ define spir_func void @ByValFunc(ptr noundef byval(%"class.sycl::_V1::range") al ; CHECK-LABEL: define spir_func void @ByValFunc entry: ; CHECK: %0 = ptrtoint ptr %_arg_array12 to i64 - ; CHECK: %1 = call ptr addrspace(1) @__msan_get_shadow(i64 %0, i32 0, ptr addrspace(2) null) + ; CHECK: %1 = call ptr addrspace(1) @__msan_get_shadow(i64 %0, i32 0) ; CHECK: call void @llvm.memset.p1.i64(ptr addrspace(1) align 8 %1, i8 0, i64 8, i1 false) %_arg_array12.ascast = addrspacecast ptr %_arg_array12 to ptr addrspace(4) ret void diff --git a/llvm/test/Instrumentation/MemorySanitizer/SPIRV/instrument_static_local_mem.ll b/llvm/test/Instrumentation/MemorySanitizer/SPIRV/instrument_static_local_mem.ll index 15cf8a1584399..d6a7c21a79a8f 100644 --- a/llvm/test/Instrumentation/MemorySanitizer/SPIRV/instrument_static_local_mem.ll +++ b/llvm/test/Instrumentation/MemorySanitizer/SPIRV/instrument_static_local_mem.ll @@ -4,7 +4,7 @@ target triple = "spir64-unknown-unknown" @WGCopy = internal addrspace(3) global i64 zeroinitializer, align 8 @WGLocal = internal addrspace(3) global i64 zeroinitializer, align 8 -; CHECK: @__MsanKernelMetadata{{.*}}i64 14, i64 1, i64 0 +; CHECK: @__MsanKernelMetadata{{.*}}i64 14, i64 4 define spir_kernel void @MyKernelMemset() sanitize_memory { ; CHECK-LABEL: @MyKernelMemset From 290be9917cc9a7d953a626de091f193eaa5d89ca Mon Sep 17 00:00:00 2001 From: "Zhao, Yang2" Date: Fri, 20 Jun 2025 08:21:01 +0200 Subject: [PATCH 21/26] add llvm ir test --- clang/lib/Driver/SanitizerArgs.cpp | 2 - .../Instrumentation/MemorySanitizer.cpp | 29 +++++---------- .../MemorySanitizer/SPIRV/track_origins.ll | 37 +++++++++++++++++++ 3 files changed, 46 insertions(+), 22 deletions(-) create mode 100644 llvm/test/Instrumentation/MemorySanitizer/SPIRV/track_origins.ll diff --git a/clang/lib/Driver/SanitizerArgs.cpp b/clang/lib/Driver/SanitizerArgs.cpp index 408fc95ef416a..5e1dedb2d6f30 100644 --- a/clang/lib/Driver/SanitizerArgs.cpp +++ b/clang/lib/Driver/SanitizerArgs.cpp @@ -1322,8 +1322,6 @@ void SanitizerArgs::addArgs(const ToolChain &TC, const llvm::opt::ArgList &Args, CmdArgs.push_back("-mllvm"); CmdArgs.push_back(Args.MakeArgString("-msan-track-origins=" + Twine(MsanTrackOrigins))); - CmdArgs.push_back("-mllvm"); - CmdArgs.push_back("-msan-print-stack-names=0"); } } else if (Sanitizers.has(SanitizerKind::Thread)) { CmdArgs.push_back("-fsanitize=thread"); diff --git a/llvm/lib/Transforms/Instrumentation/MemorySanitizer.cpp b/llvm/lib/Transforms/Instrumentation/MemorySanitizer.cpp index 0831804d6811e..b1e95deec66dd 100644 --- a/llvm/lib/Transforms/Instrumentation/MemorySanitizer.cpp +++ b/llvm/lib/Transforms/Instrumentation/MemorySanitizer.cpp @@ -394,16 +394,6 @@ static cl::opt cl::desc("instrument private pointer"), cl::Hidden, cl::init(true)); -// This flag is used to enhance debug for spirv (internal use only) -// - Add function name (not demangled for easily debug) on __msan_get_shadow -// (but not work on GPU) -// - Diable combination of origin and shadow propagation -// - The "origin" parameter of "__msan_maybe_warning_N" is the shadow address -// of UUM -static cl::opt ClSpirOffloadDebug("msan-spir-debug", - cl::desc("enhance debug for spirv"), - cl::Hidden, cl::init(false)); - const char kMsanModuleCtorName[] = "msan.module_ctor"; const char kMsanInitName[] = "__msan_init"; @@ -794,6 +784,8 @@ class MemorySanitizerOnSpirv { Constant *getOrCreateGlobalString(StringRef Name, StringRef Value, unsigned AddressSpace); + operator bool() const { return IsSPIRV; } + private: void initializeCallbacks(); void instrumentGlobalVariables(); @@ -1343,11 +1335,8 @@ void MemorySanitizerPass::printPipeline( static GlobalVariable *createPrivateConstGlobalForString(Module &M, StringRef Str) { Constant *StrConst = ConstantDataArray::getString(M.getContext(), Str); - bool SpirOrSpirv = Triple(M.getTargetTriple()).isSPIROrSPIRV(); return new GlobalVariable(M, StrConst->getType(), /*isConstant=*/true, - GlobalValue::PrivateLinkage, StrConst, "", nullptr, - llvm::GlobalValue::NotThreadLocal, - SpirOrSpirv ? kSpirOffloadConstantAS : 0); + GlobalValue::PrivateLinkage, StrConst, ""); } template @@ -1453,7 +1442,7 @@ void MemorySanitizer::createUserspaceApi(Module &M, IRB.getVoidTy(), IRB.getInt32Ty()); } else { // __msan_warning_with_origin[_noreturn]( - // int origin, + // uint32_t origin, // char* file, // unsigned int line, // char* func @@ -1530,7 +1519,7 @@ void MemorySanitizer::createUserspaceApi(Module &M, } else { // SPIR or SPIR-V // __msan_maybe_warning_N( // intN_t status, - // int origin, + // uint32_t origin, // char* file, // unsigned int line, // char* func @@ -1541,11 +1530,11 @@ void MemorySanitizer::createUserspaceApi(Module &M, IRB.getInt8PtrTy(kSpirOffloadConstantAS), IRB.getInt32Ty(), IRB.getInt8PtrTy(kSpirOffloadConstantAS)); - // __msan_maybe_warning_N( + // __msan_maybe_store_origin_N( // intN_t status, // uptr addr, // uint32_t as, - // int origin, + // uint32_t origin // ) FunctionName = "__msan_maybe_store_origin_" + itostr(AccessSize); MaybeStoreOriginFn[AccessSizeIndex] = M.getOrInsertFunction( @@ -1743,7 +1732,7 @@ void MemorySanitizer::initializeModule(Module &M) { ColdCallWeights = MDBuilder(*C).createUnlikelyBranchWeights(); OriginStoreWeights = MDBuilder(*C).createUnlikelyBranchWeights(); - if (!CompileKernel) { + if (!CompileKernel && !Spirv) { if (TrackOrigins) M.getOrInsertGlobal("__msan_track_origins", IRB.getInt32Ty(), [&] { return new GlobalVariable( @@ -2324,7 +2313,7 @@ struct MemorySanitizerVisitor : public InstVisitor { const DataLayout &DL = F.getDataLayout(); // Disable combining in some cases. TrackOrigins checks each shadow to pick // correct origin. - bool Combine = !(MS.TrackOrigins || ClSpirOffloadDebug); + bool Combine = !MS.TrackOrigins; Instruction *Instruction = InstructionChecks.front().OrigIns; Value *Shadow = nullptr; for (const auto &ShadowData : InstructionChecks) { diff --git a/llvm/test/Instrumentation/MemorySanitizer/SPIRV/track_origins.ll b/llvm/test/Instrumentation/MemorySanitizer/SPIRV/track_origins.ll new file mode 100644 index 0000000000000..a5b383ba36932 --- /dev/null +++ b/llvm/test/Instrumentation/MemorySanitizer/SPIRV/track_origins.ll @@ -0,0 +1,37 @@ +; RUN: opt < %s -passes=msan -msan-instrumentation-with-call-threshold=0 -msan-eager-checks=1 -msan-spir-privates=0 -msan-track-origins=1 -S | FileCheck %s + +target datalayout = "e-i64:64-v16:16-v24:32-v32:32-v48:64-v96:128-v192:256-v256:256-v512:512-v1024:1024-n8:16:32:64-G1" +target triple = "spir64-unknown-unknown" + +; CHECK: @__MsanKernelMetadata{{.*}}i64 8, i64 36 +; CHECK-SAME: [[ATTR0:#[0-9]+]] +; CHECK-NOT: __msan_track_origins +; CHECK-NOT: _tls + +define spir_kernel void @MyKernel(ptr addrspace(1) noundef align 4 %_arg_array) sanitize_memory { +; CHECK-LABEL: define spir_kernel void @MyKernel +entry: + %0 = load i32, ptr addrspace(1) %_arg_array, align 4 + ; CHECK: %1 = ptrtoint ptr addrspace(1) %_arg_array to i64 + ; CHECK-NEXT: %2 = call ptr addrspace(1) @__msan_get_shadow(i64 %1, i32 1) + ; CHECK-NEXT: %3 = call ptr addrspace(1) @__msan_get_origin(i64 %1, i32 1) + ; CHECK-NEXT: %_msld = load i32, ptr addrspace(1) %2, align 4 + ; CHECK-NEXT: %4 = load i32, ptr addrspace(1) %3, align 4 + ; CHECK-NEXT: call void @__msan_maybe_warning_4(i32 zeroext %_msld, i32 zeroext %4, ptr addrspace(2) null, i32 0, ptr addrspace(2) @__msan_kernel) + %call = call spir_func i32 @foo(i32 %0) + ; CHECK: %5 = ptrtoint ptr addrspace(1) %_arg_array to i64 + ; CHECK-NEXT: %6 = call ptr addrspace(1) @__msan_get_shadow(i64 %5, i32 1) + ; CHECK-NEXT: %7 = call ptr addrspace(1) @__msan_get_origin(i64 %5, i32 1) + ; CHECK-NEXT: store i32 0, ptr addrspace(1) %6, align 4 + store i32 %call, ptr addrspace(1) %_arg_array, align 4 + ret void +} + +define spir_func i32 @foo(i32 %data) sanitize_memory { +; CHECK-LABEL: define spir_func i32 @foo +entry: + ret i32 %data +} + +; CHECK: attributes [[ATTR0]] +; CHECK-SAME: "sycl-device-global-size"="24" "sycl-device-image-scope" "sycl-host-access"="0" "sycl-unique-id"="__MsanKernelMetadata3ff767e9a7a43f1f3968062dbb4ee3b4" From 838b53b3e5f725ae09322c7edd6f528dc143ef74 Mon Sep 17 00:00:00 2001 From: "Zhao, Yang2" Date: Fri, 20 Jun 2025 08:30:55 +0200 Subject: [PATCH 22/26] fix brakets --- .../sanitizer/sanitizer_common/sanitizer_allocator.cpp | 7 ++----- 1 file changed, 2 insertions(+), 5 deletions(-) diff --git a/unified-runtime/source/loader/layers/sanitizer/sanitizer_common/sanitizer_allocator.cpp b/unified-runtime/source/loader/layers/sanitizer/sanitizer_common/sanitizer_allocator.cpp index 7ff0c253a3cea..29cb99b436cce 100644 --- a/unified-runtime/source/loader/layers/sanitizer/sanitizer_common/sanitizer_allocator.cpp +++ b/unified-runtime/source/loader/layers/sanitizer/sanitizer_common/sanitizer_allocator.cpp @@ -21,25 +21,22 @@ namespace ur_sanitizer_layer { namespace { void validateDeviceUSM(uptr Allocated, DeviceType DeviceType) { switch (DeviceType) { - case DeviceType::GPU_PVC: { + case DeviceType::GPU_PVC: assert((Allocated >> 52) == 0xff0); break; default: break; } - } } void validateSharedUSM(uptr Allocated, DeviceType DeviceType) { switch (DeviceType) { - case DeviceType::GPU_PVC: { - std::cout << (void *)Allocated << std::endl; + case DeviceType::GPU_PVC: assert((Allocated >> 44) == 0x7); break; default: break; } - } } } // namespace From 292c7a5f00d208fedd4fab4e6501da241a559771 Mon Sep 17 00:00:00 2001 From: "Zhao, Yang2" Date: Fri, 20 Jun 2025 08:59:19 +0200 Subject: [PATCH 23/26] fix build --- .../loader/layers/sanitizer/msan/msan_interceptor.cpp | 3 ++- .../sanitizer/sanitizer_common/sanitizer_allocator.cpp | 6 ++++-- 2 files changed, 6 insertions(+), 3 deletions(-) diff --git a/unified-runtime/source/loader/layers/sanitizer/msan/msan_interceptor.cpp b/unified-runtime/source/loader/layers/sanitizer/msan/msan_interceptor.cpp index c339e1cd5dd5a..6a32120b67b43 100644 --- a/unified-runtime/source/loader/layers/sanitizer/msan/msan_interceptor.cpp +++ b/unified-runtime/source/loader/layers/sanitizer/msan/msan_interceptor.cpp @@ -96,7 +96,8 @@ ur_result_t MsanInterceptor::allocateMemory(ur_context_handle_t Context, HeapType = HeapType::SharedUSM; break; default: - assert(false && "Unknown heap type"); + UR_LOG_L(getContext()->logger, ERR, "Unknown heap type"); + return UR_RESULT_ERROR_UNKNOWN; } StackTrace Stack = GetCurrentBacktrace(); diff --git a/unified-runtime/source/loader/layers/sanitizer/sanitizer_common/sanitizer_allocator.cpp b/unified-runtime/source/loader/layers/sanitizer/sanitizer_common/sanitizer_allocator.cpp index 29cb99b436cce..2e6de1eac7f28 100644 --- a/unified-runtime/source/loader/layers/sanitizer/sanitizer_common/sanitizer_allocator.cpp +++ b/unified-runtime/source/loader/layers/sanitizer/sanitizer_common/sanitizer_allocator.cpp @@ -19,7 +19,8 @@ namespace ur_sanitizer_layer { namespace { -void validateDeviceUSM(uptr Allocated, DeviceType DeviceType) { +void validateDeviceUSM([[maybe_unused]] uptr Allocated, + [[maybe_unused]] DeviceType DeviceType) { switch (DeviceType) { case DeviceType::GPU_PVC: assert((Allocated >> 52) == 0xff0); @@ -29,7 +30,8 @@ void validateDeviceUSM(uptr Allocated, DeviceType DeviceType) { } } -void validateSharedUSM(uptr Allocated, DeviceType DeviceType) { +void validateSharedUSM([[maybe_unused]] uptr Allocated, + [[maybe_unused]] DeviceType DeviceType) { switch (DeviceType) { case DeviceType::GPU_PVC: assert((Allocated >> 44) == 0x7); From 6adc69869b41ceaa4a7401195deca7376a7e36a7 Mon Sep 17 00:00:00 2001 From: "Zhao, Yang2" Date: Fri, 20 Jun 2025 11:59:34 +0200 Subject: [PATCH 24/26] fix buffer + add driver test --- clang/lib/Driver/SanitizerArgs.cpp | 2 + clang/test/Driver/sycl-device-sanitizer.cpp | 6 +++ .../layers/sanitizer/msan/msan_buffer.cpp | 48 ++++++------------- .../loader/layers/sanitizer/msan/msan_ddi.cpp | 36 +++++++------- .../loader/layers/sanitizer/msan/msan_ddi.hpp | 43 +++++++++++++++++ .../sanitizer_common/sanitizer_utils.cpp | 35 +++++++++++--- .../sanitizer_common/sanitizer_utils.hpp | 5 ++ 7 files changed, 116 insertions(+), 59 deletions(-) diff --git a/clang/lib/Driver/SanitizerArgs.cpp b/clang/lib/Driver/SanitizerArgs.cpp index 5e1dedb2d6f30..d243d4b6ace1f 100644 --- a/clang/lib/Driver/SanitizerArgs.cpp +++ b/clang/lib/Driver/SanitizerArgs.cpp @@ -1319,6 +1319,8 @@ void SanitizerArgs::addArgs(const ToolChain &TC, const llvm::opt::ArgList &Args, CmdArgs.push_back("-msan-poison-stack-with-call=1"); if (MsanTrackOrigins) { + assert(MsanTrackOrigins == 1 && + "Only support -fsanitize-memory-track-origins=1"); CmdArgs.push_back("-mllvm"); CmdArgs.push_back(Args.MakeArgString("-msan-track-origins=" + Twine(MsanTrackOrigins))); diff --git a/clang/test/Driver/sycl-device-sanitizer.cpp b/clang/test/Driver/sycl-device-sanitizer.cpp index 7045469cae652..91054319bdb81 100644 --- a/clang/test/Driver/sycl-device-sanitizer.cpp +++ b/clang/test/Driver/sycl-device-sanitizer.cpp @@ -48,6 +48,12 @@ // SYCL-MSAN-SAME: "-mllvm" "-msan-instrumentation-with-call-threshold=0" // SYCL-MSAN-SAME: "-mllvm" "-msan-eager-checks=1" +// RUN: %clangxx -fsycl -fsanitize=memory -fsanitize-memory-track-origins=1 -c %s -### 2>&1 \ +// RUN: | FileCheck --check-prefix=SYCL-MSAN %s +// SYCL-MSAN: clang{{.*}} "-fsycl-is-device" +// SYCL-MSAN-SAME: -fsanitize=memory +// SYCL-MSAN-SAME: "-mllvm" "-msan-track-origins=1" + // RUN: %clangxx -fsycl -Xarch_device -fsanitize=memory -c %s -### 2>&1 \ // RUN: | FileCheck --check-prefix=SYCL-MSAN-XARCH-DEVICE %s // SYCL-MSAN-XARCH-DEVICE: clang{{.*}} "-fsycl-is-device" diff --git a/unified-runtime/source/loader/layers/sanitizer/msan/msan_buffer.cpp b/unified-runtime/source/loader/layers/sanitizer/msan/msan_buffer.cpp index 6d8e9f95a38f5..985cd4a884ab5 100644 --- a/unified-runtime/source/loader/layers/sanitizer/msan/msan_buffer.cpp +++ b/unified-runtime/source/loader/layers/sanitizer/msan/msan_buffer.cpp @@ -12,6 +12,7 @@ */ #include "msan_buffer.hpp" +#include "msan_ddi.hpp" #include "msan_interceptor.hpp" #include "sanitizer_common/sanitizer_utils.hpp" #include "ur_sanitizer_layer.hpp" @@ -58,25 +59,11 @@ ur_result_t EnqueueMemCopyRectHelper( // loop call 2D memory copy function to implement it. for (size_t i = 0; i < Region.depth; i++) { ur_event_handle_t NewEvent{}; - UR_CALL(getContext()->urDdiTable.Enqueue.pfnUSMMemcpy2D( + UR_CALL(msan::urEnqueueUSMMemcpy2D( Queue, false, DstOrigin + (i * DstSlicePitch), DstRowPitch, SrcOrigin + (i * SrcSlicePitch), SrcRowPitch, Region.width, Region.height, NumEventsInWaitList, EventWaitList, &NewEvent)); Events.push_back(NewEvent); - - // Update shadow memory - { - NewEvent = nullptr; - uptr DstShadowAddr = DeviceInfo->Shadow->MemToShadow((uptr)DstOrigin + - (i * DstSlicePitch)); - uptr SrcShadowAddr = DeviceInfo->Shadow->MemToShadow((uptr)SrcOrigin + - (i * SrcSlicePitch)); - UR_CALL(getContext()->urDdiTable.Enqueue.pfnUSMMemcpy2D( - Queue, false, (void *)DstShadowAddr, DstRowPitch, - (void *)SrcShadowAddr, SrcRowPitch, Region.width, Region.height, - NumEventsInWaitList, EventWaitList, &NewEvent)); - Events.push_back(NewEvent); - } } if (Blocking) { @@ -118,9 +105,8 @@ ur_result_t MemBuffer::getHandle(ur_device_handle_t Device, char *&Handle) { ur_usm_desc_t USMDesc{}; USMDesc.align = getAlignment(); ur_usm_pool_handle_t Pool{}; - URes = getMsanInterceptor()->allocateMemory(Context, Device, &USMDesc, Pool, - Size, AllocType::DEVICE_USM, - ur_cast(&Allocation)); + URes = msan::urUSMDeviceAlloc(Context, Device, &USMDesc, Pool, Size, + ur_cast(&Allocation)); if (URes != UR_RESULT_SUCCESS) { UR_LOG_L(getContext()->logger, ERR, "Failed to allocate {} bytes memory for buffer {}", Size, this); @@ -129,8 +115,8 @@ ur_result_t MemBuffer::getHandle(ur_device_handle_t Device, char *&Handle) { if (HostPtr) { ManagedQueue Queue(Context, Device); - URes = getContext()->urDdiTable.Enqueue.pfnUSMMemcpy( - Queue, true, Allocation, HostPtr, Size, 0, nullptr, nullptr); + URes = msan::urEnqueueUSMMemcpy(Queue, true, Allocation, HostPtr, Size, 0, + nullptr, nullptr); if (URes != UR_RESULT_SUCCESS) { UR_LOG_L( getContext()->logger, ERR, @@ -138,12 +124,6 @@ ur_result_t MemBuffer::getHandle(ur_device_handle_t Device, char *&Handle) { Size, (void *)HostPtr, this); return URes; } - - // Update shadow memory - std::shared_ptr DeviceInfo = - getMsanInterceptor()->getDeviceInfo(Device); - UR_CALL(DeviceInfo->Shadow->EnqueuePoisonShadow(Queue, (uptr)Allocation, - Size, 0)); } } @@ -162,8 +142,8 @@ ur_result_t MemBuffer::getHandle(ur_device_handle_t Device, char *&Handle) { ur_usm_desc_t USMDesc{}; USMDesc.align = getAlignment(); ur_usm_pool_handle_t Pool{}; - URes = getContext()->urDdiTable.USM.pfnHostAlloc( - Context, &USMDesc, Pool, Size, ur_cast(&HostAllocation)); + URes = msan::urUSMHostAlloc(Context, &USMDesc, Pool, Size, + ur_cast(&HostAllocation)); if (URes != UR_RESULT_SUCCESS) { UR_LOG_L(getContext()->logger, ERR, "Failed to allocate {} bytes host " @@ -176,9 +156,9 @@ ur_result_t MemBuffer::getHandle(ur_device_handle_t Device, char *&Handle) { // Copy data from last synced device to host { ManagedQueue Queue(Context, LastSyncedDevice.hDevice); - URes = getContext()->urDdiTable.Enqueue.pfnUSMMemcpy( - Queue, true, HostAllocation, LastSyncedDevice.MemHandle, Size, 0, - nullptr, nullptr); + URes = msan::urEnqueueUSMMemcpy(Queue, true, HostAllocation, + LastSyncedDevice.MemHandle, Size, 0, + nullptr, nullptr); if (URes != UR_RESULT_SUCCESS) { UR_LOG_L(getContext()->logger, ERR, "Failed to migrate memory buffer data"); @@ -189,8 +169,8 @@ ur_result_t MemBuffer::getHandle(ur_device_handle_t Device, char *&Handle) { // Sync data back to device { ManagedQueue Queue(Context, Device); - URes = getContext()->urDdiTable.Enqueue.pfnUSMMemcpy( - Queue, true, Allocation, HostAllocation, Size, 0, nullptr, nullptr); + URes = msan::urEnqueueUSMMemcpy(Queue, true, Allocation, HostAllocation, + Size, 0, nullptr, nullptr); if (URes != UR_RESULT_SUCCESS) { UR_LOG_L(getContext()->logger, ERR, "Failed to migrate memory buffer data"); @@ -206,7 +186,7 @@ ur_result_t MemBuffer::getHandle(ur_device_handle_t Device, char *&Handle) { ur_result_t MemBuffer::free() { for (const auto &[_, Ptr] : Allocations) { - ur_result_t URes = getContext()->urDdiTable.USM.pfnFree(Context, Ptr); + ur_result_t URes = msan::urUSMFree(Context, Ptr); if (URes != UR_RESULT_SUCCESS) { UR_LOG_L(getContext()->logger, ERR, "Failed to free buffer handle {}", (void *)Ptr); diff --git a/unified-runtime/source/loader/layers/sanitizer/msan/msan_ddi.cpp b/unified-runtime/source/loader/layers/sanitizer/msan/msan_ddi.cpp index b40ce477df1ec..66000a4921b45 100644 --- a/unified-runtime/source/loader/layers/sanitizer/msan/msan_ddi.cpp +++ b/unified-runtime/source/loader/layers/sanitizer/msan/msan_ddi.cpp @@ -108,7 +108,7 @@ ur_result_t urUSMDeviceAlloc( /////////////////////////////////////////////////////////////////////////////// /// @brief Intercept function for urUSMHostAlloc -ur_result_t UR_APICALL urUSMHostAlloc( +ur_result_t urUSMHostAlloc( ur_context_handle_t hContext, ///< [in] handle of the context object const ur_usm_desc_t *pUSMDesc, ///< [in][optional] USM memory allocation descriptor @@ -126,7 +126,7 @@ ur_result_t UR_APICALL urUSMHostAlloc( /////////////////////////////////////////////////////////////////////////////// /// @brief Intercept function for urUSMSharedAlloc -ur_result_t UR_APICALL urUSMSharedAlloc( +ur_result_t urUSMSharedAlloc( ur_context_handle_t hContext, ///< [in] handle of the context object ur_device_handle_t hDevice, ///< [in] handle of the device object const ur_usm_desc_t *pUSMDesc, ///< [in][optional] Pointer to USM memory @@ -145,7 +145,7 @@ ur_result_t UR_APICALL urUSMSharedAlloc( /////////////////////////////////////////////////////////////////////////////// /// @brief Intercept function for urUSMFree -ur_result_t UR_APICALL urUSMFree( +ur_result_t urUSMFree( /// [in] handle of the context object ur_context_handle_t hContext, /// [in] pointer to USM memory object @@ -1422,7 +1422,7 @@ ur_result_t urKernelSetArgMemObj( /////////////////////////////////////////////////////////////////////////////// /// @brief Intercept function for urKernelSetArgLocal -__urdlllocal ur_result_t UR_APICALL urKernelSetArgLocal( +__urdlllocal ur_result_t urKernelSetArgLocal( /// [in] handle of the kernel object ur_kernel_handle_t hKernel, /// [in] argument index in range [0, num args - 1] @@ -1448,7 +1448,7 @@ __urdlllocal ur_result_t UR_APICALL urKernelSetArgLocal( /////////////////////////////////////////////////////////////////////////////// /// @brief Intercept function for urEnqueueUSMFill -ur_result_t UR_APICALL urEnqueueUSMFill( +ur_result_t urEnqueueUSMFill( /// [in] handle of the queue object ur_queue_handle_t hQueue, /// [in][bounds(0, size)] pointer to USM memory object @@ -1481,11 +1481,10 @@ ur_result_t UR_APICALL urEnqueueUSMFill( Events.push_back(Event); { - ur_context_handle_t hContext = GetContext(hQueue); - ur_device_handle_t hDevice = GetUSMAllocDevice(hContext, pMem); + ur_device_handle_t hDevice = GetUSMAllocDevice(hQueue, pMem); assert(hDevice); const auto &DeviceInfo = getMsanInterceptor()->getDeviceInfo(hDevice); - const auto MemShadow = DeviceInfo->Shadow->MemToShadow((uptr)pMem); + uptr MemShadow = DeviceInfo->Shadow->MemToShadow((uptr)pMem); ur_event_handle_t Event = nullptr; UR_CALL(EnqueueUSMBlockingSet(hQueue, (void *)MemShadow, (char)0, size, 0, @@ -1508,7 +1507,7 @@ ur_result_t UR_APICALL urEnqueueUSMFill( /////////////////////////////////////////////////////////////////////////////// /// @brief Intercept function for urEnqueueUSMMemcpy -ur_result_t UR_APICALL urEnqueueUSMMemcpy( +ur_result_t urEnqueueUSMMemcpy( /// [in] handle of the queue object ur_queue_handle_t hQueue, /// [in] blocking or non-blocking copy @@ -1544,8 +1543,8 @@ ur_result_t UR_APICALL urEnqueueUSMMemcpy( bool IsDstUSM = IsUSM(hContext, pDst); if (IsSrcUSM && IsDstUSM) { - ur_device_handle_t SrcDevice = GetUSMAllocDevice(hContext, pSrc); - ur_device_handle_t DstDevice = GetUSMAllocDevice(hContext, pDst); + ur_device_handle_t SrcDevice = GetUSMAllocDevice(hQueue, pSrc); + ur_device_handle_t DstDevice = GetUSMAllocDevice(hQueue, pDst); assert(SrcDevice && DstDevice); const auto SrcDI = getMsanInterceptor()->getDeviceInfo(SrcDevice); const auto DstDI = getMsanInterceptor()->getDeviceInfo(DstDevice); @@ -1574,7 +1573,7 @@ ur_result_t UR_APICALL urEnqueueUSMMemcpy( } else if (IsDstUSM) { // FIXME: Assume host memory is always initialized memory, but the better // way may enable host-side Msan as well - ur_device_handle_t DstDevice = GetUSMAllocDevice(hContext, pDst); + ur_device_handle_t DstDevice = GetUSMAllocDevice(hQueue, pDst); assert(DstDevice); const auto DstDI = getMsanInterceptor()->getDeviceInfo(DstDevice); { @@ -1599,7 +1598,7 @@ ur_result_t UR_APICALL urEnqueueUSMMemcpy( /////////////////////////////////////////////////////////////////////////////// /// @brief Intercept function for urEnqueueUSMFill2D -ur_result_t UR_APICALL urEnqueueUSMFill2D( +ur_result_t urEnqueueUSMFill2D( /// [in] handle of the queue to submit to. ur_queue_handle_t hQueue, /// [in][bounds(0, pitch * height)] pointer to memory to be filled. @@ -1637,8 +1636,7 @@ ur_result_t UR_APICALL urEnqueueUSMFill2D( Events.push_back(Event); { - ur_context_handle_t hContext = GetContext(hQueue); - ur_device_handle_t hDevice = GetUSMAllocDevice(hContext, pMem); + ur_device_handle_t hDevice = GetUSMAllocDevice(hQueue, pMem); assert(hDevice); const auto &DeviceInfo = getMsanInterceptor()->getDeviceInfo(hDevice); const auto MemShadow = DeviceInfo->Shadow->MemToShadow((uptr)pMem); @@ -1665,7 +1663,7 @@ ur_result_t UR_APICALL urEnqueueUSMFill2D( /////////////////////////////////////////////////////////////////////////////// /// @brief Intercept function for urEnqueueUSMMemcpy2D -ur_result_t UR_APICALL urEnqueueUSMMemcpy2D( +ur_result_t urEnqueueUSMMemcpy2D( /// [in] handle of the queue to submit to. ur_queue_handle_t hQueue, /// [in] indicates if this operation should block the host. @@ -1709,8 +1707,8 @@ ur_result_t UR_APICALL urEnqueueUSMMemcpy2D( bool IsDstUSM = IsUSM(hContext, pDst); if (IsSrcUSM && IsDstUSM) { - ur_device_handle_t SrcDevice = GetUSMAllocDevice(hContext, pSrc); - ur_device_handle_t DstDevice = GetUSMAllocDevice(hContext, pDst); + ur_device_handle_t SrcDevice = GetUSMAllocDevice(hQueue, pSrc); + ur_device_handle_t DstDevice = GetUSMAllocDevice(hQueue, pDst); assert(SrcDevice && DstDevice); const auto SrcDI = getMsanInterceptor()->getDeviceInfo(SrcDevice); const auto DstDI = getMsanInterceptor()->getDeviceInfo(DstDevice); @@ -1752,7 +1750,7 @@ ur_result_t UR_APICALL urEnqueueUSMMemcpy2D( } else if (IsDstUSM) { // FIXME: Assume host memory is always initialized memory, but the better // way may enable host-side Msan as well - ur_device_handle_t DstDevice = GetUSMAllocDevice(hContext, pDst); + ur_device_handle_t DstDevice = GetUSMAllocDevice(hQueue, pDst); assert(DstDevice); const auto DstDI = getMsanInterceptor()->getDeviceInfo(DstDevice); const auto DstShadow = DstDI->Shadow->MemToShadow((uptr)pDst); diff --git a/unified-runtime/source/loader/layers/sanitizer/msan/msan_ddi.hpp b/unified-runtime/source/loader/layers/sanitizer/msan/msan_ddi.hpp index a4bafc7022672..19da591b03fc9 100644 --- a/unified-runtime/source/loader/layers/sanitizer/msan/msan_ddi.hpp +++ b/unified-runtime/source/loader/layers/sanitizer/msan/msan_ddi.hpp @@ -15,6 +15,49 @@ namespace ur_sanitizer_layer { +namespace msan { + +ur_result_t urUSMDeviceAlloc(ur_context_handle_t hContext, + ur_device_handle_t hDevice, + const ur_usm_desc_t *pUSMDesc, + ur_usm_pool_handle_t pool, size_t size, + void **ppMem); + +ur_result_t urUSMHostAlloc(ur_context_handle_t hContext, + const ur_usm_desc_t *pUSMDesc, + ur_usm_pool_handle_t pool, size_t size, + void **ppMem); + +ur_result_t urUSMFree(ur_context_handle_t hContext, void *pMem); + +ur_result_t urEnqueueUSMMemcpy(ur_queue_handle_t hQueue, bool blocking, + void *pDst, const void *pSrc, size_t size, + uint32_t numEventsInWaitList, + const ur_event_handle_t *phEventWaitList, + ur_event_handle_t *phEvent); + +ur_result_t urEnqueueUSMMemcpy2D(ur_queue_handle_t hQueue, bool blocking, + void *pDst, size_t dstPitch, const void *pSrc, + size_t srcPitch, size_t width, size_t height, + uint32_t numEventsInWaitList, + const ur_event_handle_t *phEventWaitList, + ur_event_handle_t *phEvent); + +ur_result_t urEnqueueUSMFill(ur_queue_handle_t hQueue, void *pMem, + size_t patternSize, const void *pPattern, + size_t size, uint32_t numEventsInWaitList, + const ur_event_handle_t *phEventWaitList, + ur_event_handle_t *phEvent); + +ur_result_t urEnqueueUSMFill2D(ur_queue_handle_t hQueue, void *pMem, + size_t pitch, size_t patternSize, + const void *pPattern, size_t width, + size_t height, uint32_t numEventsInWaitList, + const ur_event_handle_t *phEventWaitList, + ur_event_handle_t *phEvent); + +} // namespace msan + void initMsanInterceptor(); void destroyMsanInterceptor(); diff --git a/unified-runtime/source/loader/layers/sanitizer/sanitizer_common/sanitizer_utils.cpp b/unified-runtime/source/loader/layers/sanitizer/sanitizer_common/sanitizer_utils.cpp index ce21b5496616f..928bc60ebbb25 100644 --- a/unified-runtime/source/loader/layers/sanitizer/sanitizer_common/sanitizer_utils.cpp +++ b/unified-runtime/source/loader/layers/sanitizer/sanitizer_common/sanitizer_utils.cpp @@ -16,6 +16,20 @@ namespace ur_sanitizer_layer { +namespace { + +ur_usm_type_t GetUSMType(ur_context_handle_t Context, const void *MemPtr) { + ur_usm_type_t USMType = UR_USM_TYPE_UNKNOWN; + [[maybe_unused]] auto Result = + getContext()->urDdiTable.USM.pfnGetMemAllocInfo( + Context, MemPtr, UR_USM_ALLOC_INFO_TYPE, sizeof(USMType), &USMType, + nullptr); + assert(Result == UR_RESULT_SUCCESS); + return USMType; +} + +} // namespace + ManagedQueue::ManagedQueue(ur_context_handle_t Context, ur_device_handle_t Device) { [[maybe_unused]] auto Result = getContext()->urDdiTable.Queue.pfnCreate( @@ -124,24 +138,33 @@ std::string GetKernelName(ur_kernel_handle_t Kernel) { } bool IsUSM(ur_context_handle_t Context, const void *MemPtr) { - ur_usm_type_t USMType = UR_USM_TYPE_UNKNOWN; - auto Result = getContext()->urDdiTable.USM.pfnGetMemAllocInfo( - Context, MemPtr, UR_USM_ALLOC_INFO_TYPE, sizeof(USMType), &USMType, - nullptr); - assert(Result == UR_RESULT_SUCCESS); + ur_usm_type_t USMType = GetUSMType(Context, MemPtr); return USMType != UR_USM_TYPE_UNKNOWN; } +bool IsHostUSM(ur_context_handle_t Context, const void *MemPtr) { + ur_usm_type_t USMType = GetUSMType(Context, MemPtr); + return USMType == UR_USM_TYPE_HOST; +} + ur_device_handle_t GetUSMAllocDevice(ur_context_handle_t Context, const void *MemPtr) { + assert(IsUSM(Context, MemPtr)); ur_device_handle_t Device{}; - // if urGetMemAllocInfo failed, return nullptr getContext()->urDdiTable.USM.pfnGetMemAllocInfo( Context, MemPtr, UR_USM_ALLOC_INFO_DEVICE, sizeof(Device), &Device, nullptr); return Device; } +ur_device_handle_t GetUSMAllocDevice(ur_queue_handle_t Queue, + const void *MemPtr) { + ur_context_handle_t Context = GetContext(Queue); + assert(Context && IsUSM(Context, MemPtr)); + return IsHostUSM(Context, MemPtr) ? GetDevice(Queue) + : GetUSMAllocDevice(Context, MemPtr); +} + DeviceType GetDeviceType(ur_context_handle_t Context, ur_device_handle_t Device) { ur_device_type_t DeviceType = UR_DEVICE_TYPE_DEFAULT; diff --git a/unified-runtime/source/loader/layers/sanitizer/sanitizer_common/sanitizer_utils.hpp b/unified-runtime/source/loader/layers/sanitizer/sanitizer_common/sanitizer_utils.hpp index 9291d1bcc3dac..b989569d94699 100644 --- a/unified-runtime/source/loader/layers/sanitizer/sanitizer_common/sanitizer_utils.hpp +++ b/unified-runtime/source/loader/layers/sanitizer/sanitizer_common/sanitizer_utils.hpp @@ -51,8 +51,13 @@ std::string GetKernelName(ur_kernel_handle_t Kernel); size_t GetDeviceLocalMemorySize(ur_device_handle_t Device); ur_program_handle_t GetProgram(ur_kernel_handle_t Kernel); bool IsUSM(ur_context_handle_t Context, const void *MemPtr); +bool IsHostUSM(ur_context_handle_t Context, const void *MemPtr); ur_device_handle_t GetUSMAllocDevice(ur_context_handle_t Context, const void *MemPtr); +// Get the device of MemPtr. If MemPtr is host USM, then return the device +// of Queue +ur_device_handle_t GetUSMAllocDevice(ur_queue_handle_t Queue, + const void *MemPtr); uint32_t GetKernelNumArgs(ur_kernel_handle_t Kernel); size_t GetKernelLocalMemorySize(ur_kernel_handle_t Kernel, ur_device_handle_t Device); From ee4c0a2d31f2672c7625073ed711ad33a98ee908 Mon Sep 17 00:00:00 2001 From: "Zhao, Yang2" Date: Fri, 20 Jun 2025 12:05:59 +0200 Subject: [PATCH 25/26] rename EnqueueUSMBlockingSet --- .../loader/layers/sanitizer/asan/asan_shadow.cpp | 15 +++++++-------- .../loader/layers/sanitizer/msan/msan_ddi.cpp | 8 ++++---- .../layers/sanitizer/msan/msan_interceptor.cpp | 6 +++--- .../loader/layers/sanitizer/msan/msan_shadow.cpp | 14 +++++++------- .../sanitizer_common/sanitizer_utils.hpp | 9 ++++----- .../loader/layers/sanitizer/tsan/tsan_shadow.cpp | 10 ++++------ 6 files changed, 29 insertions(+), 33 deletions(-) diff --git a/unified-runtime/source/loader/layers/sanitizer/asan/asan_shadow.cpp b/unified-runtime/source/loader/layers/sanitizer/asan/asan_shadow.cpp index 8c8e26639f4a3..ce7c07591847a 100644 --- a/unified-runtime/source/loader/layers/sanitizer/asan/asan_shadow.cpp +++ b/unified-runtime/source/loader/layers/sanitizer/asan/asan_shadow.cpp @@ -210,8 +210,7 @@ ur_result_t ShadowMemoryGPU::EnqueuePoisonShadow(ur_queue_handle_t Queue, (void *)MappedPtr, (void *)(MappedPtr + PageSize - 1)); // Initialize to zero - URes = - EnqueueUSMBlockingSet(Queue, (void *)MappedPtr, (char)0, PageSize); + URes = EnqueueUSMSet(Queue, (void *)MappedPtr, (char)0, PageSize); if (URes != UR_RESULT_SUCCESS) { UR_LOG_L(getContext()->logger, ERR, "EnqueueUSMBlockingSet(): {}", URes); @@ -223,8 +222,8 @@ ur_result_t ShadowMemoryGPU::EnqueuePoisonShadow(ur_queue_handle_t Queue, } } - auto URes = EnqueueUSMBlockingSet(Queue, (void *)ShadowBegin, Value, - ShadowEnd - ShadowBegin + 1); + auto URes = EnqueueUSMSet(Queue, (void *)ShadowBegin, Value, + ShadowEnd - ShadowBegin + 1); if (URes != UR_RESULT_SUCCESS) { UR_LOG_L(getContext()->logger, ERR, @@ -260,8 +259,8 @@ ur_result_t ShadowMemoryGPU::AllocLocalShadow(ur_queue_handle_t Queue, (void **)&LocalShadowOffset)); // Initialize shadow memory - ur_result_t URes = EnqueueUSMBlockingSet(Queue, (void *)LocalShadowOffset, - (char)0, RequiredShadowSize); + ur_result_t URes = EnqueueUSMSet(Queue, (void *)LocalShadowOffset, (char)0, + RequiredShadowSize); if (URes != UR_RESULT_SUCCESS) { UR_CALL(getContext()->urDdiTable.USM.pfnFree(Context, (void *)LocalShadowOffset)); @@ -311,8 +310,8 @@ ur_result_t ShadowMemoryGPU::AllocPrivateShadow(ur_queue_handle_t Queue, (void **)&PrivateShadowOffset)); // Initialize shadow memory - ur_result_t URes = EnqueueUSMBlockingSet( - Queue, (void *)PrivateShadowOffset, (char)0, RequiredShadowSize); + ur_result_t URes = EnqueueUSMSet(Queue, (void *)PrivateShadowOffset, + (char)0, RequiredShadowSize); if (URes != UR_RESULT_SUCCESS) { UR_CALL(getContext()->urDdiTable.USM.pfnFree( Context, (void *)PrivateShadowOffset)); diff --git a/unified-runtime/source/loader/layers/sanitizer/msan/msan_ddi.cpp b/unified-runtime/source/loader/layers/sanitizer/msan/msan_ddi.cpp index 66000a4921b45..dc5e5ab77034d 100644 --- a/unified-runtime/source/loader/layers/sanitizer/msan/msan_ddi.cpp +++ b/unified-runtime/source/loader/layers/sanitizer/msan/msan_ddi.cpp @@ -1487,8 +1487,8 @@ ur_result_t urEnqueueUSMFill( uptr MemShadow = DeviceInfo->Shadow->MemToShadow((uptr)pMem); ur_event_handle_t Event = nullptr; - UR_CALL(EnqueueUSMBlockingSet(hQueue, (void *)MemShadow, (char)0, size, 0, - nullptr, &Event)); + UR_CALL(EnqueueUSMSet(hQueue, (void *)MemShadow, (char)0, size, 0, nullptr, + &Event)); Events.push_back(Event); } @@ -1579,8 +1579,8 @@ ur_result_t urEnqueueUSMMemcpy( { const auto DstShadow = DstDI->Shadow->MemToShadow((uptr)pDst); ur_event_handle_t Event = nullptr; - UR_CALL(EnqueueUSMBlockingSet(hQueue, (void *)DstShadow, (char)0, size, 0, - nullptr, &Event)); + UR_CALL(EnqueueUSMSet(hQueue, (void *)DstShadow, (char)0, size, 0, + nullptr, &Event)); Events.push_back(Event); } } diff --git a/unified-runtime/source/loader/layers/sanitizer/msan/msan_interceptor.cpp b/unified-runtime/source/loader/layers/sanitizer/msan/msan_interceptor.cpp index 6a32120b67b43..a1ffb2f102859 100644 --- a/unified-runtime/source/loader/layers/sanitizer/msan/msan_interceptor.cpp +++ b/unified-runtime/source/loader/layers/sanitizer/msan/msan_interceptor.cpp @@ -500,9 +500,9 @@ ur_result_t MsanInterceptor::prepareLaunch( ContextInfo->Handle, DeviceInfo->Handle, nullptr, nullptr, ContextInfo->CleanShadowSize, (void **)&LaunchInfo.Data.Host.CleanShadow)); - UR_CALL(EnqueueUSMBlockingSet(Queue, (void *)LaunchInfo.Data.Host.CleanShadow, - (char)0, ContextInfo->CleanShadowSize, 0, - nullptr, nullptr)); + UR_CALL(EnqueueUSMSet(Queue, (void *)LaunchInfo.Data.Host.CleanShadow, + (char)0, ContextInfo->CleanShadowSize, 0, nullptr, + nullptr)); if (LaunchInfo.LocalWorkSize.empty()) { LaunchInfo.LocalWorkSize.resize(LaunchInfo.WorkDim); diff --git a/unified-runtime/source/loader/layers/sanitizer/msan/msan_shadow.cpp b/unified-runtime/source/loader/layers/sanitizer/msan/msan_shadow.cpp index 2bd69710c57d9..fc9196094c74c 100644 --- a/unified-runtime/source/loader/layers/sanitizer/msan/msan_shadow.cpp +++ b/unified-runtime/source/loader/layers/sanitizer/msan/msan_shadow.cpp @@ -315,9 +315,9 @@ ur_result_t MsanShadowMemoryGPU::EnqueuePoisonShadowWithOrigin( (void *)ShadowBegin, ShadowEnd - ShadowBegin + 1, (void *)(size_t)Value); - UR_CALL(EnqueueUSMBlockingSet(Queue, (void *)ShadowBegin, Value, - ShadowEnd - ShadowBegin + 1, Events.size(), - Events.data(), OutEvent)); + UR_CALL(EnqueueUSMSet(Queue, (void *)ShadowBegin, Value, + ShadowEnd - ShadowBegin + 1, Events.size(), + Events.data(), OutEvent)); } { @@ -386,8 +386,8 @@ ur_result_t MsanShadowMemoryGPU::AllocLocalShadow(ur_queue_handle_t Queue, (void **)&LocalShadowOffset)); // Initialize shadow memory - ur_result_t URes = EnqueueUSMBlockingSet(Queue, (void *)LocalShadowOffset, - (char)0, RequiredShadowSize); + ur_result_t URes = EnqueueUSMSet(Queue, (void *)LocalShadowOffset, (char)0, + RequiredShadowSize); if (URes != UR_RESULT_SUCCESS) { UR_CALL(getContext()->urDdiTable.USM.pfnFree(Context, (void *)LocalShadowOffset)); @@ -434,8 +434,8 @@ ur_result_t MsanShadowMemoryGPU::AllocPrivateShadow(ur_queue_handle_t Queue, (void **)&PrivateShadowOffset)); // Initialize shadow memory - ur_result_t URes = EnqueueUSMBlockingSet( - Queue, (void *)PrivateShadowOffset, (char)0, RequiredShadowSize); + ur_result_t URes = EnqueueUSMSet(Queue, (void *)PrivateShadowOffset, + (char)0, RequiredShadowSize); if (URes != UR_RESULT_SUCCESS) { UR_CALL(getContext()->urDdiTable.USM.pfnFree( Context, (void *)PrivateShadowOffset)); diff --git a/unified-runtime/source/loader/layers/sanitizer/sanitizer_common/sanitizer_utils.hpp b/unified-runtime/source/loader/layers/sanitizer/sanitizer_common/sanitizer_utils.hpp index b989569d94699..607267ec6c4ef 100644 --- a/unified-runtime/source/loader/layers/sanitizer/sanitizer_common/sanitizer_utils.hpp +++ b/unified-runtime/source/loader/layers/sanitizer/sanitizer_common/sanitizer_utils.hpp @@ -67,11 +67,10 @@ size_t GetVirtualMemGranularity(ur_context_handle_t Context, ur_device_handle_t Device); template -ur_result_t -EnqueueUSMBlockingSet(ur_queue_handle_t Queue, void *Ptr, T Value, size_t Size, - uint32_t NumEvents = 0, - const ur_event_handle_t *EventWaitList = nullptr, - ur_event_handle_t *OutEvent = nullptr) { +ur_result_t EnqueueUSMSet(ur_queue_handle_t Queue, void *Ptr, T Value, + size_t Size, uint32_t NumEvents = 0, + const ur_event_handle_t *EventWaitList = nullptr, + ur_event_handle_t *OutEvent = nullptr) { assert(Size % sizeof(T) == 0); return getContext()->urDdiTable.Enqueue.pfnUSMFill( Queue, Ptr, sizeof(T), &Value, Size, NumEvents, EventWaitList, OutEvent); diff --git a/unified-runtime/source/loader/layers/sanitizer/tsan/tsan_shadow.cpp b/unified-runtime/source/loader/layers/sanitizer/tsan/tsan_shadow.cpp index 0c27fd4a5a86c..966df3f202863 100644 --- a/unified-runtime/source/loader/layers/sanitizer/tsan/tsan_shadow.cpp +++ b/unified-runtime/source/loader/layers/sanitizer/tsan/tsan_shadow.cpp @@ -170,8 +170,7 @@ ur_result_t ShadowMemoryGPU::CleanShadow(ur_queue_handle_t Queue, uptr Ptr, (void *)MappedPtr, (void *)(MappedPtr + PageSize - 1)); // Initialize to zero - URes = - EnqueueUSMBlockingSet(Queue, (void *)MappedPtr, (char)0, PageSize); + URes = EnqueueUSMSet(Queue, (void *)MappedPtr, (char)0, PageSize); if (URes != UR_RESULT_SUCCESS) { UR_LOG_L(getContext()->logger, ERR, "EnqueueUSMBlockingSet(): {}", URes); @@ -183,8 +182,7 @@ ur_result_t ShadowMemoryGPU::CleanShadow(ur_queue_handle_t Queue, uptr Ptr, } } - auto URes = - EnqueueUSMBlockingSet(Queue, (void *)Begin, (char)0, + auto URes = EnqueueUSMSet(Queue, (void *)Begin, (char)0, Size / kShadowCell * kShadowCnt * kShadowSize); if (URes != UR_RESULT_SUCCESS) { UR_LOG_L(getContext()->logger, ERR, "EnqueueUSMBlockingSet(): {}", URes); @@ -216,8 +214,8 @@ ur_result_t ShadowMemoryGPU::AllocLocalShadow(ur_queue_handle_t Queue, (void **)&LocalShadowOffset)); // Initialize shadow memory - ur_result_t URes = EnqueueUSMBlockingSet(Queue, (void *)LocalShadowOffset, - 0, RequiredShadowSize); + ur_result_t URes = + EnqueueUSMSet(Queue, (void *)LocalShadowOffset, 0, RequiredShadowSize); if (URes != UR_RESULT_SUCCESS) { UR_CALL(getContext()->urDdiTable.USM.pfnFree(Context, (void *)LocalShadowOffset)); From 08373e90876e31dad9334487aaf30999b8bb6d2c Mon Sep 17 00:00:00 2001 From: "Zhao, Yang2" Date: Sat, 21 Jun 2025 04:48:07 +0200 Subject: [PATCH 26/26] fix failures --- clang/lib/Driver/SanitizerArgs.cpp | 7 +++++-- clang/test/Driver/sycl-device-sanitizer.cpp | 8 ++++---- 2 files changed, 9 insertions(+), 6 deletions(-) diff --git a/clang/lib/Driver/SanitizerArgs.cpp b/clang/lib/Driver/SanitizerArgs.cpp index d243d4b6ace1f..b49c6a71bb83b 100644 --- a/clang/lib/Driver/SanitizerArgs.cpp +++ b/clang/lib/Driver/SanitizerArgs.cpp @@ -6,6 +6,7 @@ // //===----------------------------------------------------------------------===// #include "clang/Driver/SanitizerArgs.h" +#include "clang/Basic/DiagnosticDriver.h" #include "clang/Basic/Sanitizers.h" #include "clang/Driver/Driver.h" #include "clang/Driver/Options.h" @@ -1319,8 +1320,10 @@ void SanitizerArgs::addArgs(const ToolChain &TC, const llvm::opt::ArgList &Args, CmdArgs.push_back("-msan-poison-stack-with-call=1"); if (MsanTrackOrigins) { - assert(MsanTrackOrigins == 1 && - "Only support -fsanitize-memory-track-origins=1"); + // FIXME: Support enhanced origins tracking in device offloading. + if (MsanTrackOrigins != 1) + TC.getDriver().Diag(diag::err_drv_argument_only_allowed_with) + << "-fsanitize-memory-track-origins=1"; CmdArgs.push_back("-mllvm"); CmdArgs.push_back(Args.MakeArgString("-msan-track-origins=" + Twine(MsanTrackOrigins))); diff --git a/clang/test/Driver/sycl-device-sanitizer.cpp b/clang/test/Driver/sycl-device-sanitizer.cpp index 91054319bdb81..cc50c298ce09e 100644 --- a/clang/test/Driver/sycl-device-sanitizer.cpp +++ b/clang/test/Driver/sycl-device-sanitizer.cpp @@ -49,10 +49,10 @@ // SYCL-MSAN-SAME: "-mllvm" "-msan-eager-checks=1" // RUN: %clangxx -fsycl -fsanitize=memory -fsanitize-memory-track-origins=1 -c %s -### 2>&1 \ -// RUN: | FileCheck --check-prefix=SYCL-MSAN %s -// SYCL-MSAN: clang{{.*}} "-fsycl-is-device" -// SYCL-MSAN-SAME: -fsanitize=memory -// SYCL-MSAN-SAME: "-mllvm" "-msan-track-origins=1" +// RUN: | FileCheck --check-prefix=SYCL-MSAN-ORIGIN %s +// SYCL-MSAN-ORIGIN: clang{{.*}} "-fsycl-is-device" +// SYCL-MSAN-ORIGIN-SAME: -fsanitize=memory +// SYCL-MSAN-ORIGIN-SAME: "-mllvm" "-msan-track-origins=1" // RUN: %clangxx -fsycl -Xarch_device -fsanitize=memory -c %s -### 2>&1 \ // RUN: | FileCheck --check-prefix=SYCL-MSAN-XARCH-DEVICE %s