From 148c8c0c81e88c483227f42956e4b4371ac4dc18 Mon Sep 17 00:00:00 2001 From: Matthew Parkinson Date: Fri, 27 Jun 2025 16:48:42 +0100 Subject: [PATCH] Detours implementation of overriding CRT This commit adds an override to Windows for replacing the CRT malloc/free etc routines. --- .github/workflows/main.yml | 2 +- src/snmalloc/pal/pal_windows.h | 2 + src/windows/CMakeLists.txt | 46 ++++++++ src/windows/README.md | 81 ++++++++++++++ src/windows/override/detours.cc | 156 +++++++++++++++++++++++++++ src/windows/override/detours.h | 11 ++ src/windows/override/lib.cc | 11 ++ src/windows/test/main.cc | 22 ++++ src/windows/vcpkg-configuration.json | 14 +++ src/windows/vcpkg.json | 5 + 10 files changed, 349 insertions(+), 1 deletion(-) create mode 100644 src/windows/CMakeLists.txt create mode 100644 src/windows/README.md create mode 100644 src/windows/override/detours.cc create mode 100644 src/windows/override/detours.h create mode 100644 src/windows/override/lib.cc create mode 100644 src/windows/test/main.cc create mode 100644 src/windows/vcpkg-configuration.json create mode 100644 src/windows/vcpkg.json diff --git a/.github/workflows/main.yml b/.github/workflows/main.yml index 3050f98d7..16e087c6e 100644 --- a/.github/workflows/main.yml +++ b/.github/workflows/main.yml @@ -403,7 +403,7 @@ jobs: build-type: [ Release, Debug ] arch: [ Win32, x64 ] toolchain: [ "", "-T ClangCL" ] - extra-cmake-flags: [ "" ] + extra-cmake-flags: [ "", "-S src/windows" ] # Add an extra check for the Windows 8 compatible PAL include: - os: windows-2022 diff --git a/src/snmalloc/pal/pal_windows.h b/src/snmalloc/pal/pal_windows.h index 3b9866d86..886662032 100644 --- a/src/snmalloc/pal/pal_windows.h +++ b/src/snmalloc/pal/pal_windows.h @@ -41,8 +41,10 @@ * be destroyed when the program exits or the DLL is * unloaded. */ +# pragma warning(push) # pragma warning(disable : 4075) # pragma init_seg(".CRT$XCB") +# pragma warning(pop) namespace snmalloc { diff --git a/src/windows/CMakeLists.txt b/src/windows/CMakeLists.txt new file mode 100644 index 000000000..603e08167 --- /dev/null +++ b/src/windows/CMakeLists.txt @@ -0,0 +1,46 @@ +cmake_minimum_required(VERSION 3.14) +include(FetchContent) + +FetchContent_Declare( + vcpkg + GIT_REPOSITORY https://github.com/microsoft/vcpkg + GIT_TAG 2025.06.13 + GIT_SHALLOW TRUE +) + +FetchContent_MakeAvailable(vcpkg) + +set(CMAKE_TOOLCHAIN_FILE + ${vcpkg_SOURCE_DIR}/scripts/buildsystems/vcpkg.cmake + CACHE STRING "vcpkg toolchain file") + +project(snmallocdetours CXX) + +add_subdirectory(../.. src/snmalloc EXCLUDE_FROM_ALL) + +# On Windows, we need to use the detours library to override the malloc +# functions. +find_path(DETOURS_INCLUDE_DIRS "detours/detours.h") +find_library(DETOURS_LIBRARY detours REQUIRED) + +add_library(snmallocdetours OBJECT override/detours.cc) + +target_include_directories(snmallocdetours PRIVATE ${DETOURS_INCLUDE_DIRS}) +target_link_libraries(snmallocdetours PUBLIC snmalloc) +target_link_libraries(snmallocdetours PRIVATE ${DETOURS_LIBRARY}) + +set(CMAKE_WINDOWS_EXPORT_ALL_SYMBOLS ON) +add_library(snmallocdetourslib SHARED override/lib.cc) +target_include_directories(snmallocdetourslib PRIVATE ${DETOURS_INCLUDE_DIRS}) +target_link_libraries(snmallocdetourslib PRIVATE snmallocdetours) + +add_executable(snmallocdetoursexample test/main.cc) +target_link_libraries(snmallocdetoursexample snmallocdetourslib) +# Check if 32 bit windows. +if (CMAKE_SIZEOF_VOID_P EQUAL 4) + target_link_options(snmallocdetoursexample PRIVATE "/INCLUDE:_is_snmalloc_detour") +else() + target_link_options(snmallocdetoursexample PRIVATE "/INCLUDE:is_snmalloc_detour") +endif() + +add_test(NAME snmallocdetours COMMAND snmallocdetoursexample) \ No newline at end of file diff --git a/src/windows/README.md b/src/windows/README.md new file mode 100644 index 000000000..eb99bdb9a --- /dev/null +++ b/src/windows/README.md @@ -0,0 +1,81 @@ +# Windows snmalloc override + +This directory contains an implementation of the snmalloc allocator that +overrides the default Windows allocator for the C-runtime (CRT). +The implementation uses the detours library to intercept calls to the CRT +and redirects them to snmalloc. + +# Building + +Build the cmake projects in this directory. From the root of the repository, do + +```bash +cmake -B build_detours -S src/windows +cmake --build build_detours +``` + +This will install the detours library using vcpkg and then build the snmalloc +static library, and a DLL that automatically load snmalloc as the CRT allocator. + +# Using + +## DLL +To use the snmalloc allocator in your Windows application, you need to link against the `snmallocdetourslib.dll` that was built in the previous step. +You can do this by adding the following line to your CMakeLists.txt: + +```cmake +target_link_libraries(your_target_name PRIVATE snmallocdetourslib) +target_link_options(snmallocdetoursexample PRIVATE "/INCLUDE:is_snmalloc_detour") +``` + +The second line is necessary to ensure that the linker does not optimize away the DLL import. + + +## Static Library + +To use the snmalloc allocator as a static library, you can link against the `snmallocdetourslib.lib` file that was built in the previous step. +You can do this by adding the following line to your CMakeLists.txt: + +```cmake +target_link_libraries(your_target_name PRIVATE snmallocdetourslib_static) +``` + +Then you need to cause the detours routine to be run by adding the following lines to a C++ source file: + +```cpp +#include "detours.h" +#pragma warning(push) +#pragma warning(disable : 4075) +#pragma init_seg(".CRT$XCV") +static SnmallocDetour snmalloc_detour; +#pragma warning(pop) +``` + +This causes the detours code to be run early in the program startup. + +## Locally scoped detour + +Finally, you can use the detour in a locally scoped manner. +This requires linking against the `snmallocdetourslib.lib`, and then can be used as: + +```cpp +#include "detours.h" + + +void my_function() +{ + SnmallocDetour snmalloc_detour; + // snmalloc is now the CRT allocator for this function + // ... +} // snmalloc_detour goes out of scope and the CRT allocator is restored +``` + +Upon exiting the scope of `snmalloc_detour`, the CRT allocator will be restored to its original state. + + +# Status + +This implementation is currently in an alpha state, and is not yet suitable for production use. + +Significant testing is required to ensure that it works for a range of applications. +If you sucessfully use this in your application, please let us know by commenting on #700. diff --git a/src/windows/override/detours.cc b/src/windows/override/detours.cc new file mode 100644 index 000000000..be159bb64 --- /dev/null +++ b/src/windows/override/detours.cc @@ -0,0 +1,156 @@ +/* + * This file provides a Windows-specific overriding of the malloc, + * free, calloc, realloc, and msize functions using the Detours library. + */ +#ifdef _DEBUG +# include +#endif + +#include "detours.h" + +#include +#include + +// Symbols for the original malloc, free, calloc, realloc, and msize functions +auto* original_malloc = malloc; +auto* original_calloc = calloc; +auto* original_realloc = realloc; +auto* original_free = free; +auto* original_msize = _msize; + +void* (*original_new)(size_t) = operator new; +void* (*original_new2)(size_t, const std::nothrow_t&) = operator new; + +void* (*original_new_array)(size_t) = operator new[]; +void* (*original_new_array2)(size_t, const std::nothrow_t&) = operator new[]; + +void (*original_delete)(void*) = operator delete; +void (*original_delete2)(void*, size_t) = operator delete; +void (*original_delete3)(void*, const std::nothrow_t&) = operator delete; + +void (*original_delete_array)(void*) = operator delete[]; +void (*original_delete_array2)(void*, size_t) = operator delete[]; + +#include +// Provides the global configuration for the snmalloc implementation. +#include + +#define SNMALLOC_PROVIDE_OWN_CONFIG + +namespace snmalloc +{ + class WindowsHeapAsSecondaryAllocator + { + public: + // This flag is used to turn off checks on fast paths if the secondary + // allocator does not own the memory at all. + static constexpr inline bool pass_through = false; + + SNMALLOC_FAST_PATH + static void initialize() {} + + // We always use snmalloc for allocation. + template + SNMALLOC_FAST_PATH static void* allocate(SizeAlign&&) + { + return nullptr; + } + + // If the memory was not deallocated by snmalloc, then try the + // original free. + SNMALLOC_FAST_PATH + static void deallocate(void* pointer) + { + if (pointer == nullptr) + return; + + original_free(pointer); + } + + SNMALLOC_FAST_PATH + static size_t alloc_size(const void* p) + { + return original_msize(const_cast(p)); + } + }; + + // Root failed deallocations and msize requests to the Windows heap. + using Config = snmalloc::StandardConfigClientMeta< + NoClientMetaDataProvider, + WindowsHeapAsSecondaryAllocator>; + using Alloc = snmalloc::Allocator; +} // namespace snmalloc + +#define SNMALLOC_STATIC_LIBRARY_PREFIX snmalloc_ +#include "detours/detours.h" + +#include +#include +#include + +// This name is not provided by malloc.cc above, so we define it here. +size_t snmalloc_msize(void* ptr) +{ + // Call the snmalloc function to get the allocation size. + // This is not accurate as it rounds up, whereas the original msize + // function returns the exact size of the allocation. + return snmalloc::alloc_size(ptr); +} + +SnmallocDetour::SnmallocDetour() +{ + // Initilialize snmalloc. + snmalloc_free(snmalloc_malloc(1)); + + DetourTransactionBegin(); + DetourAttach(&(PVOID&)original_free, snmalloc_free); + DetourAttach(&(PVOID&)original_delete, snmalloc_free); + DetourAttach(&(PVOID&)original_delete2, snmalloc_free); + DetourAttach(&(PVOID&)original_delete3, snmalloc_free); + DetourAttach(&(PVOID&)original_delete_array, snmalloc_free); + DetourAttach(&(PVOID&)original_delete_array2, snmalloc_free); + DetourAttach(&(PVOID&)original_malloc, snmalloc_malloc); + DetourAttach(&(PVOID&)original_calloc, snmalloc_calloc); + DetourAttach(&(PVOID&)original_realloc, snmalloc_realloc); + DetourAttach(&(PVOID&)original_msize, snmalloc_msize); + DetourAttach(&(PVOID&)original_new, snmalloc_malloc); + DetourAttach(&(PVOID&)original_new2, snmalloc_malloc); + DetourAttach(&(PVOID&)original_new_array, snmalloc_malloc); + DetourAttach(&(PVOID&)original_new_array2, snmalloc_malloc); + + DetourTransactionCommit(); +} + +SnmallocDetour::~SnmallocDetour() +{ + // Detours performs allocation so during this some data structures will + // be allocated with snmalloc. These cannot be handled by the Windows heap + // so leave snmalloc::free in place to handle these allocations. + + DetourTransactionBegin(); + DetourDetach(&(PVOID&)original_calloc, snmalloc_calloc); + DetourDetach(&(PVOID&)original_realloc, snmalloc_realloc); + DetourDetach(&(PVOID&)original_malloc, snmalloc_malloc); + DetourDetach(&(PVOID&)original_msize, snmalloc_msize); + DetourDetach(&(PVOID&)original_new, snmalloc_malloc); + DetourDetach(&(PVOID&)original_new2, snmalloc_malloc); + DetourDetach(&(PVOID&)original_new_array, snmalloc_malloc); + DetourDetach(&(PVOID&)original_new_array2, snmalloc_malloc); + DetourTransactionCommit(); + + // This transaction's allocation will come from the Windows heap, so it is + // safe to use the Windows heap's free during teardown. + DetourTransactionBegin(); + DetourDetach(&(PVOID&)original_free, snmalloc_free); + DetourDetach(&(PVOID&)original_delete, snmalloc_free); + DetourAttach(&(PVOID&)original_delete2, snmalloc_free); + DetourDetach(&(PVOID&)original_delete3, snmalloc_free); + DetourDetach(&(PVOID&)original_delete_array, snmalloc_free); + DetourDetach(&(PVOID&)original_delete_array2, snmalloc_free); + DetourTransactionCommit(); +} + +extern "C" bool is_snmalloc_detour(void* ptr) +{ + return snmalloc::is_owned(ptr); +} \ No newline at end of file diff --git a/src/windows/override/detours.h b/src/windows/override/detours.h new file mode 100644 index 000000000..7479106b1 --- /dev/null +++ b/src/windows/override/detours.h @@ -0,0 +1,11 @@ +#pragma once + +class SnmallocDetour +{ +public: + SnmallocDetour(); + ~SnmallocDetour(); +}; + +// Used to check that a pointer is from the snmalloc detour. +extern "C" __declspec(dllexport) bool is_snmalloc_detour(void* ptr); \ No newline at end of file diff --git a/src/windows/override/lib.cc b/src/windows/override/lib.cc new file mode 100644 index 000000000..54bd90636 --- /dev/null +++ b/src/windows/override/lib.cc @@ -0,0 +1,11 @@ +#include "detours.h" + +#pragma warning(push) +#pragma warning(disable : 4075) +// This pragma uses a segment that is alphabetically later than the +// one used in pal_windows.h. This is required to ensure that the +// global function pointers have been initialized before we attempt to +// detour them. +#pragma init_seg(".CRT$XCV") +static SnmallocDetour snmalloc_detour; +#pragma warning(pop) \ No newline at end of file diff --git a/src/windows/test/main.cc b/src/windows/test/main.cc new file mode 100644 index 000000000..b3f39e69f --- /dev/null +++ b/src/windows/test/main.cc @@ -0,0 +1,22 @@ +#include "../override/detours.h" + +#include +#include + +// This is done in CMake, but could be done here as well +// #pragma comment(linker, "/include:is_snmalloc_detour") + +int main() +{ + auto p_old = malloc(16); + + if (p_old != nullptr && !is_snmalloc_detour(p_old)) + { + printf("Detouring malloc and free failed...\n"); + } + + free(p_old); + + printf("Test passed: Detouring malloc and free succeeded.\n"); + return 0; +} \ No newline at end of file diff --git a/src/windows/vcpkg-configuration.json b/src/windows/vcpkg-configuration.json new file mode 100644 index 000000000..b02b07797 --- /dev/null +++ b/src/windows/vcpkg-configuration.json @@ -0,0 +1,14 @@ +{ + "default-registry": { + "kind": "git", + "baseline": "1a66c32c6f90c2f646529975c3c076ed3dbdae0c", + "repository": "https://github.com/microsoft/vcpkg" + }, + "registries": [ + { + "kind": "artifact", + "location": "https://github.com/microsoft/vcpkg-ce-catalog/archive/refs/heads/main.zip", + "name": "microsoft" + } + ] +} diff --git a/src/windows/vcpkg.json b/src/windows/vcpkg.json new file mode 100644 index 000000000..f6217a27c --- /dev/null +++ b/src/windows/vcpkg.json @@ -0,0 +1,5 @@ +{ + "dependencies": [ + "detours" + ] +} \ No newline at end of file