diff --git a/llvm/CMakeLists.txt b/llvm/CMakeLists.txt index a90be4f6235e6..589d67a191544 100644 --- a/llvm/CMakeLists.txt +++ b/llvm/CMakeLists.txt @@ -890,6 +890,7 @@ option (LLVM_ENABLE_SPHINX "Use Sphinx to generate llvm documentation." OFF) option (LLVM_ENABLE_OCAMLDOC "Build OCaml bindings documentation." ON) option (LLVM_ENABLE_BINDINGS "Build bindings." ON) option (LLVM_ENABLE_TELEMETRY "Enable the telemetry library. If set to OFF, library cannot be enabled after build (eg., at runtime)" ON) +option (LLVM_ENABLE_ONDISK_CAS "Build OnDiskCAS." ON) set(LLVM_INSTALL_DOXYGEN_HTML_DIR "${CMAKE_INSTALL_DOCDIR}/llvm/doxygen-html" CACHE STRING "Doxygen-generated HTML documentation install directory") diff --git a/llvm/include/llvm/CAS/MappedFileRegionBumpPtr.h b/llvm/include/llvm/CAS/MappedFileRegionBumpPtr.h new file mode 100644 index 0000000000000..a47fd16209b6b --- /dev/null +++ b/llvm/include/llvm/CAS/MappedFileRegionBumpPtr.h @@ -0,0 +1,127 @@ +//===- MappedFileRegionBumpPtr.h --------------------------------*- C++ -*-===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +// +/// \file +/// This file declares interface for MappedFileRegionBumpPtr, a bump pointer +/// allocator, backed by a memory-mapped file. +/// +//===----------------------------------------------------------------------===// + +#ifndef LLVM_CAS_MAPPEDFILEREGIONBUMPPTR_H +#define LLVM_CAS_MAPPEDFILEREGIONBUMPPTR_H + +#include "llvm/Support/Alignment.h" +#include "llvm/Support/FileSystem.h" +#include + +namespace llvm::cas { + +/// Allocator for an owned mapped file region that supports thread-safe and +/// process-safe bump pointer allocation. +/// +/// This allocator is designed to create a sparse file when supported by the +/// filesystem's \c ftruncate so that it can be used with a large maximum size. +/// It will also attempt to shrink the underlying file down to its current +/// allocation size when the last concurrent mapping is closed. +/// +/// Process-safe. Uses file locks when resizing the file during initialization +/// and destruction. +/// +/// Thread-safe, assuming all threads use the same instance to talk to a given +/// file/mapping. Unsafe to have multiple instances talking to the same file +/// in the same process since file locks will misbehave. Clients should +/// coordinate (somehow). +/// +/// Provides 8-byte alignment for all allocations. +class MappedFileRegionBumpPtr { +public: + using RegionT = sys::fs::mapped_file_region; + + /// Header for MappedFileRegionBumpPtr. It can be configured to be located + /// at any location within the file and the allocation will be appended after + /// the header. + struct Header { + std::atomic BumpPtr; + std::atomic AllocatedSize; + }; + + /// Create a \c MappedFileRegionBumpPtr. + /// + /// \param Path the path to open the mapped region. + /// \param Capacity the maximum size for the mapped file region. + /// \param HeaderOffset the offset at which to store the header. This is so + /// that information can be stored before the header, like a file magic. + /// \param NewFileConstructor is for constructing new files. It has exclusive + /// access to the file. Must call \c initializeBumpPtr. + static Expected + create(const Twine &Path, uint64_t Capacity, uint64_t HeaderOffset, + function_ref NewFileConstructor); + + /// Finish initializing the header. Must be called by \c NewFileConstructor. + void initializeHeader(uint64_t HeaderOffset); + + /// Minimum alignment for allocations, currently hardcoded to 8B. + static constexpr Align getAlign() { + // Trick Align into giving us '8' as a constexpr. + struct alignas(8) T {}; + static_assert(alignof(T) == 8, "Tautology failed?"); + return Align::Of(); + } + + /// Allocate at least \p AllocSize. Rounds up to \a getAlign(). + Expected allocate(uint64_t AllocSize) { + auto Offset = allocateOffset(AllocSize); + if (LLVM_UNLIKELY(!Offset)) + return Offset.takeError(); + return data() + *Offset; + } + /// Allocate, returning the offset from \a data() instead of a pointer. + Expected allocateOffset(uint64_t AllocSize); + + char *data() const { return Region.data(); } + uint64_t size() const { return H->BumpPtr; } + uint64_t capacity() const { return Region.size(); } + + RegionT &getRegion() { return Region; } + + ~MappedFileRegionBumpPtr() { destroyImpl(); } + + MappedFileRegionBumpPtr() = default; + MappedFileRegionBumpPtr(MappedFileRegionBumpPtr &&RHS) { moveImpl(RHS); } + MappedFileRegionBumpPtr &operator=(MappedFileRegionBumpPtr &&RHS) { + destroyImpl(); + moveImpl(RHS); + return *this; + } + + MappedFileRegionBumpPtr(const MappedFileRegionBumpPtr &) = delete; + MappedFileRegionBumpPtr &operator=(const MappedFileRegionBumpPtr &) = delete; + +private: + void destroyImpl(); + void moveImpl(MappedFileRegionBumpPtr &RHS) { + std::swap(Region, RHS.Region); + std::swap(H, RHS.H); + std::swap(Path, RHS.Path); + std::swap(FD, RHS.FD); + std::swap(SharedLockFD, RHS.SharedLockFD); + } + +private: + RegionT Region; + Header *H = nullptr; + std::string Path; + // File descriptor for the main storage file. + std::optional FD; + // File descriptor for the file used as reader/writer lock. + std::optional SharedLockFD; +}; + +} // namespace llvm::cas + +#endif // LLVM_CAS_MAPPEDFILEREGIONBUMPPTR_H diff --git a/llvm/include/llvm/Config/llvm-config.h.cmake b/llvm/include/llvm/Config/llvm-config.h.cmake index 39136bc45c292..2a5c580f336f2 100644 --- a/llvm/include/llvm/Config/llvm-config.h.cmake +++ b/llvm/include/llvm/Config/llvm-config.h.cmake @@ -146,4 +146,7 @@ coverage bugs, and to 0 otherwise. */ #cmakedefine01 LLVM_ENABLE_DEBUGLOC_TRACKING_ORIGIN +/* Define to 1 to enable LLVM OnDisk Content Addressable Storage*/ +#cmakedefine01 LLVM_ENABLE_ONDISK_CAS + #endif diff --git a/llvm/include/llvm/Support/FileSystem.h b/llvm/include/llvm/Support/FileSystem.h index a21b0a272d2b0..c203779307840 100644 --- a/llvm/include/llvm/Support/FileSystem.h +++ b/llvm/include/llvm/Support/FileSystem.h @@ -410,6 +410,11 @@ LLVM_ABI std::error_code copy_file(const Twine &From, int ToFD); /// platform-specific error_code. LLVM_ABI std::error_code resize_file(int FD, uint64_t Size); +/// Resize path to size with sparse files explicitly enabled. It uses +/// FSCTL_SET_SPARSE On Windows. This is the same as resize_file on +/// non-Windows +LLVM_ABI std::error_code resize_file_sparse(int FD, uint64_t Size); + /// Resize \p FD to \p Size before mapping \a mapped_file_region::readwrite. On /// non-Windows, this calls \a resize_file(). On Windows, this is a no-op, /// since the subsequent mapping (via \c CreateFileMapping) automatically diff --git a/llvm/lib/CAS/CMakeLists.txt b/llvm/lib/CAS/CMakeLists.txt index f3d2b41c704bc..2f1ed1d7081aa 100644 --- a/llvm/lib/CAS/CMakeLists.txt +++ b/llvm/lib/CAS/CMakeLists.txt @@ -3,7 +3,9 @@ add_llvm_component_library(LLVMCAS ActionCaches.cpp BuiltinCAS.cpp InMemoryCAS.cpp + MappedFileRegionBumpPtr.cpp ObjectStore.cpp + OnDiskCommon.cpp ADDITIONAL_HEADER_DIRS ${LLVM_MAIN_INCLUDE_DIR}/llvm/CAS diff --git a/llvm/lib/CAS/MappedFileRegionBumpPtr.cpp b/llvm/lib/CAS/MappedFileRegionBumpPtr.cpp new file mode 100644 index 0000000000000..8788d446c0430 --- /dev/null +++ b/llvm/lib/CAS/MappedFileRegionBumpPtr.cpp @@ -0,0 +1,321 @@ +//===----------------------------------------------------------------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +/// \file Implements MappedFileRegionBumpPtr. +/// +/// A bump pointer allocator, backed by a memory-mapped file. +/// +/// The effect we want is: +/// +/// 1. If it doesn't exist, create the file with an initial size. +/// 2. Reserve virtual memory large enough for the max file size. +/// 3. Map the file into memory in the reserved region. +/// 4. Increase the file size and update the mapping when necessary. +/// +/// However, updating the mapping is challenging when it needs to work portably, +/// and across multiple processes without locking for every read. Our current +/// implementation strategy is: +/// +/// 1. Use \c sys::fs::resize_file_sparse to grow the file to its max size +/// (typically several GB). If the file system doesn't support sparse file, +/// this may return a fully allocated file. +/// 2. Call \c sys::fs::mapped_file_region to map the entire file. +/// 3. [Automatic as part of 2.] +/// 4. If supported, use \c fallocate or similiar APIs to ensure the file system +/// storage for the sparse file so we won't end up with partial file if the +/// disk is out of space. +/// +/// Additionally, we attempt to resize the file to its actual data size when +/// closing the mapping, if this is the only concurrent instance. This is done +/// using file locks. Shrinking the file mitigates problems with having large +/// files: on filesystems without sparse files it avoids unnecessary space use; +/// it also avoids allocating the full size if another process copies the file, +/// which typically loses sparseness. These mitigations only work while the file +/// is not in use. +/// +/// If different values of the capacity is used for concurrent users of the same +/// mapping, the actual capacity will be the largest value requested at the time +/// of the creation. As a result, the mapped region in one process can be +/// smaller than the size of the file on disk and can run out of reserved space +/// when the file has still space. It is highly recommanded to use the same +/// capacity for all the concurrent users of the same instance of +/// MappedFileRegionBumpPtr. +/// +/// To support resizing, we use two separate file locks: +/// 1. We use a shared reader lock on a ".shared" file until destruction. +/// 2. We use a lock on the main file during initialization - shared to check +/// the status, upgraded to exclusive to resize/initialize the file. +/// +/// Then during destruction we attempt to get exclusive access on (1), which +/// requires no concurrent readers. If so, we shrink the file. Using two +/// separate locks simplifies the implementation and enables it to work on +/// platforms (e.g. Windows) where a shared/reader lock prevents writing. +//===----------------------------------------------------------------------===// + +#include "llvm/CAS/MappedFileRegionBumpPtr.h" +#include "OnDiskCommon.h" + +#if LLVM_ON_UNIX +#include +#if __has_include() +#include +#endif +#ifdef DEV_BSIZE +#define MAPPED_FILE_BSIZE DEV_BSIZE +#elif __linux__ +#define MAPPED_FILE_BSIZE 512 +#endif +#endif + +using namespace llvm; +using namespace llvm::cas; +using namespace llvm::cas::ondisk; + +namespace { +struct FileLockRAII { + std::string Path; + int FD; + std::optional Locked; + + FileLockRAII(std::string Path, int FD) : Path(std::move(Path)), FD(FD) {} + ~FileLockRAII() { consumeError(unlock()); } + + Error lock(sys::fs::LockKind LK) { + if (std::error_code EC = lockFileThreadSafe(FD, LK)) + return createFileError(Path, EC); + Locked = LK; + return Error::success(); + } + + Error unlock() { + if (Locked) { + Locked = std::nullopt; + if (std::error_code EC = unlockFileThreadSafe(FD)) + return createFileError(Path, EC); + } + return Error::success(); + } +}; + +struct FileSizeInfo { + uint64_t Size; + uint64_t AllocatedSize; + + static ErrorOr get(sys::fs::file_t File); +}; +} // end anonymous namespace + +Expected MappedFileRegionBumpPtr::create( + const Twine &Path, uint64_t Capacity, uint64_t HeaderOffset, + function_ref NewFileConstructor) { + MappedFileRegionBumpPtr Result; + Result.Path = Path.str(); + // Open the main file. + int FD; + if (std::error_code EC = sys::fs::openFileForReadWrite( + Result.Path, FD, sys::fs::CD_OpenAlways, sys::fs::OF_None)) + return createFileError(Path, EC); + Result.FD = FD; + + // Open the shared lock file. See file comment for details of locking scheme. + SmallString<128> SharedLockPath(Result.Path); + SharedLockPath.append(".shared"); + int SharedLockFD; + if (std::error_code EC = sys::fs::openFileForReadWrite( + SharedLockPath, SharedLockFD, sys::fs::CD_OpenAlways, + sys::fs::OF_None)) + return createFileError(SharedLockPath, EC); + Result.SharedLockFD = SharedLockFD; + + // Take shared/reader lock that will be held until we close the file; unlocked + // by destroyImpl. + if (std::error_code EC = + lockFileThreadSafe(SharedLockFD, sys::fs::LockKind::Shared)) + return createFileError(Path, EC); + + // Take shared/reader lock for initialization. + FileLockRAII InitLock(Result.Path, FD); + if (Error E = InitLock.lock(sys::fs::LockKind::Shared)) + return std::move(E); + + sys::fs::file_t File = sys::fs::convertFDToNativeFile(FD); + auto FileSize = FileSizeInfo::get(File); + if (!FileSize) + return createFileError(Result.Path, FileSize.getError()); + + if (FileSize->Size < Capacity) { + // Lock the file exclusively so only one process will do the initialization. + if (Error E = InitLock.unlock()) + return std::move(E); + if (Error E = InitLock.lock(sys::fs::LockKind::Exclusive)) + return std::move(E); + // Retrieve the current size now that we have exclusive access. + FileSize = FileSizeInfo::get(File); + if (!FileSize) + return createFileError(Result.Path, FileSize.getError()); + } + + // At this point either the file is still under-sized, or we have the size for + // the completely initialized file. + + if (FileSize->Size < Capacity) { + // We are initializing the file; it may be empty, or may have been shrunk + // during a previous close. + // TODO: Detect a case where someone opened it with a smaller capacity. + assert(InitLock.Locked == sys::fs::LockKind::Exclusive); + if (std::error_code EC = sys::fs::resize_file_sparse(FD, Capacity)) + return createFileError(Result.Path, EC); + } else { + // Someone else initialized it. + Capacity = FileSize->Size; + } + + // Create the mapped region. + { + std::error_code EC; + sys::fs::mapped_file_region Map( + File, sys::fs::mapped_file_region::readwrite, Capacity, 0, EC); + if (EC) + return createFileError(Result.Path, EC); + Result.Region = std::move(Map); + } + + if (FileSize->Size == 0) { + assert(InitLock.Locked == sys::fs::LockKind::Exclusive); + // We are creating a new file; run the constructor. + if (Error E = NewFileConstructor(Result)) + return std::move(E); + } else { + Result.initializeHeader(HeaderOffset); + } + + if (FileSize->Size < Capacity && FileSize->AllocatedSize < Capacity) { + // We are initializing the file; sync the allocated size in case it + // changed when truncating or during construction. + FileSize = FileSizeInfo::get(File); + if (!FileSize) + return createFileError(Result.Path, FileSize.getError()); + assert(InitLock.Locked == sys::fs::LockKind::Exclusive); + Result.H->AllocatedSize.exchange(FileSize->AllocatedSize); + } + + return Result; +} + +void MappedFileRegionBumpPtr::destroyImpl() { + if (!FD) + return; + + // Drop the shared lock indicating we are no longer accessing the file. + if (SharedLockFD) + (void)unlockFileThreadSafe(*SharedLockFD); + + // Attempt to truncate the file if we can get exclusive access. Ignore any + // errors. + if (H) { + assert(SharedLockFD && "Must have shared lock file open"); + if (tryLockFileThreadSafe(*SharedLockFD) == std::error_code()) { + size_t Size = size(); + size_t Capacity = capacity(); + assert(Size < Capacity); + // sync to file system to make sure all contents are up-to-date. + (void)Region.sync(); + // unmap the file before resizing since that is the requirement for + // some platforms. + Region.unmap(); + (void)sys::fs::resize_file(*FD, Size); + (void)unlockFileThreadSafe(*SharedLockFD); + } + } + + auto Close = [](std::optional &FD) { + if (FD) { + sys::fs::file_t File = sys::fs::convertFDToNativeFile(*FD); + sys::fs::closeFile(File); + FD = std::nullopt; + } + }; + + // Close the file and shared lock. + Close(FD); + Close(SharedLockFD); +} + +void MappedFileRegionBumpPtr::initializeHeader(uint64_t HeaderOffset) { + assert(capacity() < (uint64_t)INT64_MAX && "capacity must fit in int64_t"); + uint64_t HeaderEndOffset = HeaderOffset + sizeof(decltype(*H)); + assert(HeaderEndOffset <= capacity() && + "Expected end offset to be pre-allocated"); + assert(isAligned(Align::Of(), HeaderOffset) && + "Expected end offset to be aligned"); + H = reinterpret_cast(data() + HeaderOffset); + + uint64_t ExistingValue = 0; + if (!H->BumpPtr.compare_exchange_strong(ExistingValue, HeaderEndOffset)) + assert(ExistingValue >= HeaderEndOffset && + "Expected 0, or past the end of the BumpPtr itself"); +} + +static Error createAllocatorOutOfSpaceError() { + return createStringError(std::make_error_code(std::errc::not_enough_memory), + "memory mapped file allocator is out of space"); +} + +Expected MappedFileRegionBumpPtr::allocateOffset(uint64_t AllocSize) { + AllocSize = alignTo(AllocSize, getAlign()); + uint64_t OldEnd = H->BumpPtr.fetch_add(AllocSize); + uint64_t NewEnd = OldEnd + AllocSize; + if (LLVM_UNLIKELY(NewEnd > capacity())) { + // Return the allocation. If the start already passed the end, that means + // some other concurrent allocations already consumed all the capacity. + // There is no need to return the original value. If the start was not + // passed the end, current allocation certainly bumped it passed the end. + // All other allocation afterwards must have failed and current allocation + // is in charge of return the allocation back to a valid value. + if (OldEnd <= capacity()) + (void)H->BumpPtr.exchange(OldEnd); + + return createAllocatorOutOfSpaceError(); + } + + uint64_t DiskSize = H->AllocatedSize; + if (LLVM_UNLIKELY(NewEnd > DiskSize)) { + uint64_t NewSize; + // The minimum increment is a page, but allocate more to amortize the cost. + constexpr uint64_t Increment = 1 * 1024 * 1024; // 1 MB + if (Error E = preallocateFileTail(*FD, DiskSize, DiskSize + Increment) + .moveInto(NewSize)) + return std::move(E); + assert(NewSize >= DiskSize + Increment); + // FIXME: on Darwin this can under-count the size if there is a race to + // preallocate disk, because the semantics of F_PREALLOCATE are to add bytes + // to the end of the file, not to allocate up to a fixed size. + // Any discrepancy will be resolved the next time the file is truncated and + // then reopend. + while (DiskSize < NewSize) + H->AllocatedSize.compare_exchange_strong(DiskSize, NewSize); + } + return OldEnd; +} + +ErrorOr FileSizeInfo::get(sys::fs::file_t File) { +#if LLVM_ON_UNIX && defined(MAPPED_FILE_BSIZE) + struct stat Status; + int StatRet = ::fstat(File, &Status); + if (StatRet) + return errnoAsErrorCode(); + uint64_t AllocatedSize = uint64_t(Status.st_blksize) * MAPPED_FILE_BSIZE; + return FileSizeInfo{uint64_t(Status.st_size), AllocatedSize}; +#else + // Fallback: assume the file is fully allocated. Note: this may result in + // data loss on out-of-space. + sys::fs::file_status Status; + if (std::error_code EC = sys::fs::status(File, Status)) + return EC; + return FileSizeInfo{Status.getSize(), Status.getSize()}; +#endif +} diff --git a/llvm/lib/CAS/OnDiskCommon.cpp b/llvm/lib/CAS/OnDiskCommon.cpp new file mode 100644 index 0000000000000..89b00398d820f --- /dev/null +++ b/llvm/lib/CAS/OnDiskCommon.cpp @@ -0,0 +1,120 @@ +//===- OnDiskCommon.cpp ---------------------------------------------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#include "OnDiskCommon.h" +#include "llvm/Config/config.h" +#include "llvm/Support/Error.h" +#include "llvm/Support/FileSystem.h" +#include + +#if __has_include() +#include +#ifdef LOCK_SH +#define HAVE_FLOCK 1 +#else +#define HAVE_FLOCK 0 +#endif +#endif + +#if __has_include() +#include +#endif + +using namespace llvm; + +std::error_code cas::ondisk::lockFileThreadSafe(int FD, + sys::fs::LockKind Kind) { +#if HAVE_FLOCK + if (flock(FD, Kind == sys::fs::LockKind::Exclusive ? LOCK_EX : LOCK_SH) == 0) + return std::error_code(); + return std::error_code(errno, std::generic_category()); +#elif defined(_WIN32) + // On Windows this implementation is thread-safe. + return sys::fs::lockFile(FD, Kind); +#else + return make_error_code(std::errc::no_lock_available); +#endif +} + +std::error_code cas::ondisk::unlockFileThreadSafe(int FD) { +#if HAVE_FLOCK + if (flock(FD, LOCK_UN) == 0) + return std::error_code(); + return std::error_code(errno, std::generic_category()); +#elif defined(_WIN32) + // On Windows this implementation is thread-safe. + return sys::fs::unlockFile(FD); +#else + return make_error_code(std::errc::no_lock_available); +#endif +} + +std::error_code +cas::ondisk::tryLockFileThreadSafe(int FD, std::chrono::milliseconds Timeout, + sys::fs::LockKind Kind) { +#if HAVE_FLOCK + auto Start = std::chrono::steady_clock::now(); + auto End = Start + Timeout; + do { + if (flock(FD, (Kind == sys::fs::LockKind::Exclusive ? LOCK_EX : LOCK_SH) | + LOCK_NB) == 0) + return std::error_code(); + int Error = errno; + if (Error == EWOULDBLOCK) { + // Match sys::fs::tryLockFile, which sleeps for 1 ms per attempt. + std::this_thread::sleep_for(std::chrono::milliseconds(1)); + continue; + } + return std::error_code(Error, std::generic_category()); + } while (std::chrono::steady_clock::now() < End); + return make_error_code(std::errc::no_lock_available); +#elif defined(_WIN32) + // On Windows this implementation is thread-safe. + return sys::fs::tryLockFile(FD, Timeout, Kind); +#else + return make_error_code(std::errc::no_lock_available); +#endif +} + +Expected cas::ondisk::preallocateFileTail(int FD, size_t CurrentSize, + size_t NewSize) { + auto CreateError = [&](std::error_code EC) -> Expected { + if (EC == std::errc::not_supported) + // Ignore ENOTSUP in case the filesystem cannot preallocate. + return NewSize; +#if defined(HAVE_POSIX_FALLOCATE) + if (EC == std::errc::invalid_argument && CurrentSize < NewSize && // len > 0 + NewSize < std::numeric_limits::max()) // 0 <= offset, len < max + // Prior to 2024, POSIX required EINVAL for cases that should be ENOTSUP, + // so handle it the same as above if it is not one of the other ways to + // get EINVAL. + return NewSize; +#endif + return createStringError(EC, + "failed to allocate to CAS file: " + EC.message()); + }; +#if defined(HAVE_POSIX_FALLOCATE) + // Note: posix_fallocate returns its error directly, not via errno. + if (int Err = posix_fallocate(FD, CurrentSize, NewSize - CurrentSize)) + return CreateError(std::error_code(Err, std::generic_category())); + return NewSize; +#elif defined(__APPLE__) + fstore_t FAlloc; + FAlloc.fst_flags = F_ALLOCATEALL | F_ALLOCATEPERSIST; + FAlloc.fst_posmode = F_PEOFPOSMODE; + FAlloc.fst_offset = 0; + FAlloc.fst_length = NewSize - CurrentSize; + FAlloc.fst_bytesalloc = 0; + if (fcntl(FD, F_PREALLOCATE, &FAlloc)) + return CreateError(errnoAsErrorCode()); + assert(CurrentSize + FAlloc.fst_bytesalloc >= NewSize); + return CurrentSize + FAlloc.fst_bytesalloc; +#else + return NewSize; // Pretend it worked. +#endif +} diff --git a/llvm/lib/CAS/OnDiskCommon.h b/llvm/lib/CAS/OnDiskCommon.h new file mode 100644 index 0000000000000..8b79ffe5c3158 --- /dev/null +++ b/llvm/lib/CAS/OnDiskCommon.h @@ -0,0 +1,46 @@ +//===- OnDiskCommon.h -------------------------------------------*- C++ -*-===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#ifndef LLVM_LIB_CAS_ONDISKCOMMON_H +#define LLVM_LIB_CAS_ONDISKCOMMON_H + +#include "llvm/Support/Error.h" +#include "llvm/Support/FileSystem.h" +#include + +namespace llvm::cas::ondisk { + +/// Thread-safe alternative to \c sys::fs::lockFile. This does not support all +/// the platforms that \c sys::fs::lockFile does, so keep it in the CAS library +/// for now. +std::error_code lockFileThreadSafe(int FD, llvm::sys::fs::LockKind Kind); + +/// Thread-safe alternative to \c sys::fs::unlockFile. This does not support all +/// the platforms that \c sys::fs::lockFile does, so keep it in the CAS library +/// for now. +std::error_code unlockFileThreadSafe(int FD); + +/// Thread-safe alternative to \c sys::fs::tryLockFile. This does not support +/// all the platforms that \c sys::fs::lockFile does, so keep it in the CAS +/// library for now. +std::error_code tryLockFileThreadSafe( + int FD, std::chrono::milliseconds Timeout = std::chrono::milliseconds(0), + llvm::sys::fs::LockKind Kind = llvm::sys::fs::LockKind::Exclusive); + +/// Allocate space for the file \p FD on disk, if the filesystem supports it. +/// +/// On filesystems that support this operation, this ensures errors such as +/// \c std::errc::no_space_on_device are detected before we write data. +/// +/// \returns the new size of the file, or an \c Error. +Expected preallocateFileTail(int FD, size_t CurrentSize, + size_t NewSize); + +} // namespace llvm::cas::ondisk + +#endif // LLVM_LIB_CAS_ONDISKCOMMON_H diff --git a/llvm/lib/Support/Unix/Path.inc b/llvm/lib/Support/Unix/Path.inc index 2f563e2899b56..0d991ead72416 100644 --- a/llvm/lib/Support/Unix/Path.inc +++ b/llvm/lib/Support/Unix/Path.inc @@ -600,6 +600,11 @@ std::error_code resize_file(int FD, uint64_t Size) { return std::error_code(); } +std::error_code resize_file_sparse(int FD, uint64_t Size) { + // On Unix, this is the same as `resize_file`. + return resize_file(FD, Size); +} + static int convertAccessMode(AccessMode Mode) { switch (Mode) { case AccessMode::Exist: diff --git a/llvm/lib/Support/Windows/Path.inc b/llvm/lib/Support/Windows/Path.inc index 6672d8e0ec777..be007b7abdb51 100644 --- a/llvm/lib/Support/Windows/Path.inc +++ b/llvm/lib/Support/Windows/Path.inc @@ -27,6 +27,7 @@ #include "llvm/Support/Windows/WindowsSupport.h" #include #include +#include #undef max @@ -617,6 +618,22 @@ std::error_code resize_file(int FD, uint64_t Size) { return std::error_code(error, std::generic_category()); } +std::error_code resize_file_sparse(int FD, uint64_t Size) { + HANDLE hFile = reinterpret_cast(::_get_osfhandle(FD)); + DWORD temp; + if (!DeviceIoControl(hFile, FSCTL_SET_SPARSE, NULL, 0, NULL, 0, &temp, + NULL)) { + return mapWindowsError(GetLastError()); + } + LARGE_INTEGER liSize; + liSize.QuadPart = Size; + if (!SetFilePointerEx(hFile, liSize, NULL, FILE_BEGIN) || + !SetEndOfFile(hFile)) { + return mapWindowsError(GetLastError()); + } + return std::error_code(); +} + std::error_code access(const Twine &Path, AccessMode Mode) { SmallVector PathUtf16; diff --git a/llvm/unittests/CAS/CMakeLists.txt b/llvm/unittests/CAS/CMakeLists.txt index ff081007f31bc..ab709e30369bf 100644 --- a/llvm/unittests/CAS/CMakeLists.txt +++ b/llvm/unittests/CAS/CMakeLists.txt @@ -1,3 +1,7 @@ +if (LLVM_ENABLE_ONDISK_CAS) + add_definitions(-DLLVM_ENABLE_ONDISK_CAS=1) +endif() + set(LLVM_LINK_COMPONENTS Support CAS @@ -8,6 +12,7 @@ add_llvm_unittest(CASTests ActionCacheTest.cpp CASTestConfig.cpp ObjectStoreTest.cpp + ProgramTest.cpp ) target_link_libraries(CASTests PRIVATE LLVMTestingSupport) diff --git a/llvm/unittests/CAS/ProgramTest.cpp b/llvm/unittests/CAS/ProgramTest.cpp new file mode 100644 index 0000000000000..c30e0e1dbb44f --- /dev/null +++ b/llvm/unittests/CAS/ProgramTest.cpp @@ -0,0 +1,233 @@ +//===----------------------------------------------------------------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#include "llvm/Support/Program.h" +#include "llvm/CAS/MappedFileRegionBumpPtr.h" +#include "llvm/Config/llvm-config.h" +#include "llvm/Support/ConvertUTF.h" +#include "llvm/Support/ExponentialBackoff.h" +#include "llvm/Support/FileSystem.h" +#include "llvm/Support/Path.h" +#include "llvm/Support/ThreadPool.h" +#include "llvm/Testing/Support/Error.h" +#include "gtest/gtest.h" +#if defined(__APPLE__) +#include +#elif !defined(_MSC_VER) +// Forward declare environ in case it's not provided by stdlib.h. +extern char **environ; +#endif + +using namespace llvm; +using namespace llvm::cas; + +extern const char *TestMainArgv0; +static char ProgramID = 0; + +class CASProgramTest : public testing::Test { + std::vector EnvTable; + std::vector EnvStorage; + +protected: + void SetUp() override { + auto EnvP = [] { +#if defined(_WIN32) + _wgetenv(L"TMP"); // Populate _wenviron, initially is null + return _wenviron; +#elif defined(__APPLE__) + return *_NSGetEnviron(); +#else + return environ; +#endif + }(); + ASSERT_TRUE(EnvP); + + auto prepareEnvVar = [this](decltype(*EnvP) Var) -> StringRef { +#if defined(_WIN32) + // On Windows convert UTF16 encoded variable to UTF8 + auto Len = wcslen(Var); + ArrayRef Ref{reinterpret_cast(Var), + Len * sizeof(*Var)}; + EnvStorage.emplace_back(); + auto convStatus = llvm::convertUTF16ToUTF8String(Ref, EnvStorage.back()); + EXPECT_TRUE(convStatus); + return EnvStorage.back(); +#else + (void)this; + return StringRef(Var); +#endif + }; + + while (*EnvP != nullptr) { + auto S = prepareEnvVar(*EnvP); + if (!StringRef(S).starts_with("GTEST_")) + EnvTable.emplace_back(S); + ++EnvP; + } + } + + void TearDown() override { + EnvTable.clear(); + EnvStorage.clear(); + } + + void addEnvVar(StringRef Var) { EnvTable.emplace_back(Var); } + + ArrayRef getEnviron() const { return EnvTable; } +}; + +#if LLVM_ENABLE_ONDISK_CAS + +TEST_F(CASProgramTest, MappedFileRegionBumpPtrTest) { + auto TestAllocator = [](StringRef Path) { + auto NewFileConstructor = [&](MappedFileRegionBumpPtr &Alloc) -> Error { + Alloc.initializeHeader(0); + return Error::success(); + }; + + std::optional Alloc; + ASSERT_THAT_ERROR( + MappedFileRegionBumpPtr::create(Path, /*Capacity=*/10 * 1024 * 1024, + /*HeaderOffset=*/0, NewFileConstructor) + .moveInto(Alloc), + Succeeded()); + + std::vector AllocatedPtr; + AllocatedPtr.resize(100); + DefaultThreadPool Threads; + for (unsigned I = 0; I < 100; ++I) { + Threads.async( + [&](unsigned Idx) { + // Allocate a buffer that is larger than needed so allocator hits + // additional pages for test coverage. + unsigned *P = (unsigned *)cantFail(Alloc->allocate(100)); + *P = Idx; + AllocatedPtr[Idx] = P; + }, + I); + } + + Threads.wait(); + for (unsigned I = 0; I < 100; ++I) + EXPECT_EQ(*AllocatedPtr[I], I); + }; + + if (const char *File = getenv("LLVM_CAS_TEST_MAPPED_FILE_REGION")) { + TestAllocator(File); + exit(0); + } + + SmallString<128> FilePath; + sys::fs::createUniqueDirectory("MappedFileRegionBumpPtr", FilePath); + sys::path::append(FilePath, "allocation-file"); + + std::string Executable = + sys::fs::getMainExecutable(TestMainArgv0, &ProgramID); + StringRef Argv[] = { + Executable, "--gtest_filter=CASProgramTest.MappedFileRegionBumpPtrTest"}; + + // Add LLVM_PROGRAM_TEST_LOCKED_FILE to the environment of the child. + std::string EnvVar = "LLVM_CAS_TEST_MAPPED_FILE_REGION="; + EnvVar += FilePath.str(); + addEnvVar(EnvVar); + + std::string Error; + bool ExecutionFailed; + sys::ProcessInfo PI = sys::ExecuteNoWait(Executable, Argv, getEnviron(), {}, + 0, &Error, &ExecutionFailed); + TestAllocator(FilePath); + + ASSERT_FALSE(ExecutionFailed) << Error; + ASSERT_TRUE(Error.empty()); + ASSERT_NE(PI.Pid, sys::ProcessInfo::InvalidPid) << "Invalid process id"; + llvm::sys::Wait(PI, /*SecondsToWait=*/5, &Error); + ASSERT_TRUE(Error.empty()); + + // Clean up after both processes finish testing. + sys::fs::remove(FilePath); + sys::fs::remove_directories(sys::path::parent_path(FilePath)); +} + +TEST_F(CASProgramTest, MappedFileRegionBumpPtrSizeTest) { + using namespace std::chrono_literals; + auto NewFileConstructor = [&](MappedFileRegionBumpPtr &Alloc) -> Error { + Alloc.initializeHeader(0); + return Error::success(); + }; + + if (const char *File = getenv("LLVM_CAS_TEST_MAPPED_FILE_REGION")) { + ExponentialBackoff Backoff(5s); + do { + if (sys::fs::exists(File)) { + break; + } + } while (Backoff.waitForNextAttempt()); + + std::optional Alloc; + ASSERT_THAT_ERROR(MappedFileRegionBumpPtr::create(File, /*Capacity=*/1024, + /*HeaderOffset=*/0, + NewFileConstructor) + .moveInto(Alloc), + Succeeded()); + + ASSERT_TRUE(Alloc->capacity() == 2048); + + Alloc.reset(); + + ASSERT_THAT_ERROR(MappedFileRegionBumpPtr::create(File, /*Capacity=*/4096, + /*HeaderOffset=*/0, + NewFileConstructor) + .moveInto(Alloc), + Succeeded()); + + ASSERT_TRUE(Alloc->capacity() == 4096); + exit(0); + } + + SmallString<128> FilePath; + sys::fs::createUniqueDirectory("MappedFileRegionBumpPtr", FilePath); + sys::path::append(FilePath, "allocation-file"); + + std::string Executable = + sys::fs::getMainExecutable(TestMainArgv0, &ProgramID); + StringRef Argv[] = { + Executable, + "--gtest_filter=CASProgramTest.MappedFileRegionBumpPtrSizeTest"}; + + // Add LLVM_PROGRAM_TEST_LOCKED_FILE to the environment of the child. + std::string EnvVar = "LLVM_CAS_TEST_MAPPED_FILE_REGION="; + EnvVar += FilePath.str(); + addEnvVar(EnvVar); + + std::string Error; + bool ExecutionFailed; + sys::ProcessInfo PI = sys::ExecuteNoWait(Executable, Argv, getEnviron(), {}, + 0, &Error, &ExecutionFailed); + + std::optional Alloc; + ASSERT_THAT_ERROR(MappedFileRegionBumpPtr::create(FilePath, /*Capacity=*/2048, + /*HeaderOffset=*/0, + NewFileConstructor) + .moveInto(Alloc), + Succeeded()); + + ASSERT_FALSE(ExecutionFailed) << Error; + ASSERT_TRUE(Error.empty()); + ASSERT_NE(PI.Pid, sys::ProcessInfo::InvalidPid) << "Invalid process id"; + llvm::sys::Wait(PI, /*SecondsToWait=*/100, &Error); + ASSERT_TRUE(Error.empty()); + + // Size is still the requested 2048. + ASSERT_TRUE(Alloc->capacity() == 2048); + + // Clean up after both processes finish testing. + sys::fs::remove(FilePath); + sys::fs::remove_directories(sys::path::parent_path(FilePath)); +} + +#endif // LLVM_ENABLE_ONDISK_CAS