From a696b44e73159fe5d82e0e5700acf5cdc3a00250 Mon Sep 17 00:00:00 2001 From: Jeff Whiteside Date: Fri, 6 Sep 2024 17:00:31 -0700 Subject: [PATCH 1/5] [Windows Build] Implement MMAP for mmap_data_loader.cpp There is no sys/mman.h or posix-compatible mmap() implementation on Windows. The extension data loaders use it to map in data files, so adding an implementation. Test-run, & .\cmake-out\extension\data_loader\test\Debug\extension_data_loader_test.exe --gtest_brief=1 --gtest_filter=MmapDataLoader* Running main() from ...\executorch\third-party\googletest\googletest\src\gtest_main.cc [==========] 8 tests from 1 test suite ran. (50 ms total) [ PASSED ] 8 tests. --- extension/data_loader/mman.h | 41 +++ extension/data_loader/mman_windows.cpp | 251 ++++++++++++++++++ extension/data_loader/mman_windows.h | 71 +++++ extension/data_loader/mmap_data_loader.cpp | 23 +- extension/data_loader/targets.bzl | 11 +- .../test/mmap_data_loader_test.cpp | 5 +- 6 files changed, 393 insertions(+), 9 deletions(-) create mode 100644 extension/data_loader/mman.h create mode 100644 extension/data_loader/mman_windows.cpp create mode 100644 extension/data_loader/mman_windows.h diff --git a/extension/data_loader/mman.h b/extension/data_loader/mman.h new file mode 100644 index 00000000000..6f0a9b21064 --- /dev/null +++ b/extension/data_loader/mman.h @@ -0,0 +1,41 @@ +/* + * Copyright (c) Meta Platforms, Inc. and affiliates. + * All rights reserved. + * + * This source code is licensed under the BSD-style license found in the + * LICENSE file in the root directory of this source tree. + */ + +// This file ensures that mman.h compatible functions are defined in the global +// namespace for windows and posix environments. + +#pragma once + +#include + +#ifndef _WIN32 + +#include +#include + +ET_INLINE long get_os_page_size(){return sysconf(_SC_PAGESIZE)} + +#else + +#define NOMINMAX +#include +#undef NOMINMAX +#include + +#include + +ET_INLINE long get_os_page_size() { + SYSTEM_INFO si; + GetSystemInfo(&si); + long pagesize = si.dwAllocationGranularity > si.dwPageSize + ? si.dwAllocationGranularity + : si.dwPageSize; + return pagesize; +} + +#endif diff --git a/extension/data_loader/mman_windows.cpp b/extension/data_loader/mman_windows.cpp new file mode 100644 index 00000000000..d5f4f136e22 --- /dev/null +++ b/extension/data_loader/mman_windows.cpp @@ -0,0 +1,251 @@ +/* + * Copyright (c) Google Inc. and affiliates. + * All rights reserved. + * + * This source code is licensed under the MIT license. + */ + +/* + * Adapted from: https://code.google.com/archive/p/mman-win32/ + * + * mman-win32 + * mman library for Windows + * + * A light implementation of the mmap functions for MinGW. + * + * The mmap-win32 library implements a wrapper for mmap functions around the + * memory mapping Windows API. + */ + +#include +#include +#include + +#include "mman_windows.h" + +#ifndef STATUS_SECTION_TOO_BIG +#define STATUS_SECTION_TOO_BIG ((NTSTATUS)0xC0000040L) +#endif + +#ifndef FILE_MAP_EXECUTE +#define FILE_MAP_EXECUTE 0x0020 +#endif /* FILE_MAP_EXECUTE */ + +#define RETURN_IF_FAILED(hr) \ + do { \ + if (FAILED((hr))) { \ + return hr; \ + } \ + } while (false) + +namespace { + +HRESULT try_grow_process_memory_working_set(DWORD dwSizeRequired) { + // Get current working set + size_t minWorkingSetInitial; + size_t maxWorkingSet; + if (!GetProcessWorkingSetSize( + GetCurrentProcess(), &minWorkingSetInitial, &maxWorkingSet)) { + return GetLastError(); + } + + // Calculate new sizes + size_t minWorkingSet = minWorkingSetInitial + dwSizeRequired; + if (minWorkingSet < minWorkingSetInitial) { + return HRESULT_FROM_WIN32(ERROR_ARITHMETIC_OVERFLOW); + } + + if (maxWorkingSet < minWorkingSet) { + maxWorkingSet = minWorkingSet; + } + + // Grow working set + if (!SetProcessWorkingSetSize( + GetCurrentProcess(), minWorkingSet, maxWorkingSet)) { + return GetLastError(); + } + return S_OK; +} + +HRESULT virtual_lock(void* pMem, DWORD dwSize) { + if (!VirtualLock(pMem, dwSize)) { + return GetLastError(); + } + return S_OK; +} + +HRESULT virtual_lock_allowing_working_set_growth(void* pMem, DWORD dwSize) { + HRESULT hr = virtual_lock(pMem, dwSize); + + if (hr == HRESULT_FROM_WIN32(STATUS_SECTION_TOO_BIG)) { + // Attempt to grow the process working set and try again + RETURN_IF_FAILED(try_grow_process_memory_working_set(dwSize)); + RETURN_IF_FAILED(virtual_lock(pMem, dwSize)); + } + + return hr; +} + +static int __map_mman_error(const DWORD err, const int deferr) { + if (err == 0) { + return 0; + } + // TODO: implement + return err; +} + +static DWORD __map_mmap_prot_page(const int prot) { + DWORD protect = 0; + + if (prot == PROT_NONE) { + return protect; + } + if ((prot & PROT_EXEC) != 0) { + protect = + ((prot & PROT_WRITE) != 0) ? PAGE_EXECUTE_READWRITE : PAGE_EXECUTE_READ; + } else { + protect = ((prot & PROT_WRITE) != 0) ? PAGE_READWRITE : PAGE_READONLY; + } + return protect; +} + +static DWORD __map_mmap_prot_file(const int prot) { + DWORD desiredAccess = 0; + + if (prot == PROT_NONE) { + return desiredAccess; + } + if ((prot & PROT_READ) != 0) { + desiredAccess |= FILE_MAP_READ; + } + if ((prot & PROT_WRITE) != 0) { + desiredAccess |= FILE_MAP_WRITE; + } + if ((prot & PROT_EXEC) != 0) { + desiredAccess |= FILE_MAP_EXECUTE; + } + return desiredAccess; +} + +} // namespace + +void* mmap(void* addr, size_t len, int prot, int flags, int fildes, off_t off) { + HANDLE fm, h; + + void* map = MAP_FAILED; + +#ifdef _MSC_VER +#pragma warning(push) +#pragma warning(disable : 4293) +#endif + + const DWORD dwFileOffsetLow = (sizeof(off_t) <= sizeof(DWORD)) + ? (DWORD)off + : (DWORD)(off & 0xFFFFFFFFL); + const DWORD dwFileOffsetHigh = (sizeof(off_t) <= sizeof(DWORD)) + ? (DWORD)0 + : (DWORD)((off >> 32) & 0xFFFFFFFFL); + const DWORD protect = __map_mmap_prot_page(prot); + const DWORD desiredAccess = __map_mmap_prot_file(prot); + + const off_t maxSize = off + (off_t)len; + + const DWORD dwMaxSizeLow = (sizeof(off_t) <= sizeof(DWORD)) + ? (DWORD)maxSize + : (DWORD)(maxSize & 0xFFFFFFFFL); + const DWORD dwMaxSizeHigh = (sizeof(off_t) <= sizeof(DWORD)) + ? (DWORD)0 + : (DWORD)((maxSize >> 32) & 0xFFFFFFFFL); + +#ifdef _MSC_VER +#pragma warning(pop) +#endif + + errno = 0; + + if (len == 0 + /* Unsupported flag combinations */ + || (flags & MAP_FIXED) != 0 + /* Usupported protection combinations */ + || prot == PROT_EXEC) { + errno = EINVAL; + return MAP_FAILED; + } + + h = ((flags & MAP_ANONYMOUS) == 0) ? (HANDLE)_get_osfhandle(fildes) + : INVALID_HANDLE_VALUE; + + if ((flags & MAP_ANONYMOUS) == 0 && h == INVALID_HANDLE_VALUE) { + errno = EBADF; + return MAP_FAILED; + } + + fm = CreateFileMapping(h, NULL, protect, dwMaxSizeHigh, dwMaxSizeLow, NULL); + + if (fm == NULL) { + errno = __map_mman_error(GetLastError(), EPERM); + return MAP_FAILED; + } + + map = + MapViewOfFile(fm, desiredAccess, dwFileOffsetHigh, dwFileOffsetLow, len); + + CloseHandle(fm); + + if (map == NULL) { + errno = __map_mman_error(GetLastError(), EPERM); + return MAP_FAILED; + } + + return map; +} + +int munmap(void* addr, size_t len) { + if (UnmapViewOfFile(addr)) + return 0; + + errno = __map_mman_error(GetLastError(), EPERM); + + return -1; +} + +int mprotect(void* addr, size_t len, int prot) { + DWORD newProtect = __map_mmap_prot_page(prot); + DWORD oldProtect = 0; + + if (VirtualProtect(addr, len, newProtect, &oldProtect)) + return 0; + + errno = __map_mman_error(GetLastError(), EPERM); + + return -1; +} + +int msync(void* addr, size_t len, int flags) { + if (FlushViewOfFile(addr, len)) + return 0; + + errno = __map_mman_error(GetLastError(), EPERM); + + return -1; +} + +int mlock(const void* addr, size_t len) { + HRESULT hr = virtual_lock_allowing_working_set_growth((LPVOID)addr, len); + if (SUCCEEDED(hr)) { + return 0; + } + + errno = __map_mman_error(hr, EPERM); + + return -1; +} + +int munlock(const void* addr, size_t len) { + if (VirtualUnlock((LPVOID)addr, len)) + return 0; + + errno = __map_mman_error(GetLastError(), EPERM); + + return -1; +} diff --git a/extension/data_loader/mman_windows.h b/extension/data_loader/mman_windows.h new file mode 100644 index 00000000000..4c247cd4fca --- /dev/null +++ b/extension/data_loader/mman_windows.h @@ -0,0 +1,71 @@ +/* + * Copyright (c) Google Inc. and affiliates. + * All rights reserved. + * + * This source code is licensed under the MIT license. + */ + +/* + * Adapted from: https://code.google.com/archive/p/mman-win32/ + * + * mman-win32 + * mman library for Windows + * + * A light implementation of the mmap functions for MinGW. + * + * The mmap-win32 library implements a wrapper for mmap functions around the + * memory mapping Windows API. + */ + +#ifndef _SYS_MMAN_H_ +#define _SYS_MMAN_H_ + +#ifndef _WIN32_WINNT // Allow use of features specific to Windows XP or later. +#define _WIN32_WINNT \ + 0x0501 // Change this to the appropriate value to target other versions of + // Windows. +#endif + +/* All the headers include this file. */ +#ifndef _MSC_VER +#include <_mingw.h> +#endif + +#include + +#ifdef __cplusplus +extern "C" { +#endif + +#define PROT_NONE 0 +#define PROT_READ 1 +#define PROT_WRITE 2 +#define PROT_EXEC 4 + +#define MAP_FILE 0 +#define MAP_SHARED 1 +#define MAP_PRIVATE 2 +#define MAP_TYPE 0xf +#define MAP_FIXED 0x10 +#define MAP_ANONYMOUS 0x20 +#define MAP_ANON MAP_ANONYMOUS + +#define MAP_FAILED ((void*)-1) + +/* Flags for msync. */ +#define MS_ASYNC 1 +#define MS_SYNC 2 +#define MS_INVALIDATE 4 + +void* mmap(void* addr, size_t len, int prot, int flags, int fildes, off_t off); +int munmap(void* addr, size_t len); +int mprotect(void* addr, size_t len, int prot); +int msync(void* addr, size_t len, int flags); +int mlock(const void* addr, size_t len); +int munlock(const void* addr, size_t len); + +#ifdef __cplusplus +}; +#endif + +#endif /* _SYS_MMAN_H_ */ diff --git a/extension/data_loader/mmap_data_loader.cpp b/extension/data_loader/mmap_data_loader.cpp index e990117d586..a2d365faa0c 100644 --- a/extension/data_loader/mmap_data_loader.cpp +++ b/extension/data_loader/mmap_data_loader.cpp @@ -6,6 +6,7 @@ * LICENSE file in the root directory of this source tree. */ +#include #include #include @@ -13,10 +14,8 @@ #include #include -#include #include #include -#include #include #include @@ -63,14 +62,16 @@ MmapDataLoader::~MmapDataLoader() { std::free(const_cast(file_name_)); // fd_ can be -1 if this instance was moved from, but closing a negative fd is // safe (though it will return an error). - ::close(fd_); + if (fd_ != -1) { + ::close(fd_); + } } Result MmapDataLoader::from( const char* file_name, MmapDataLoader::MlockConfig mlock_config) { // Cache the page size. - long page_size = sysconf(_SC_PAGESIZE); + long page_size = get_os_page_size(); if (page_size < 0) { ET_LOG(Error, "Could not get page size: %s (%d)", ::strerror(errno), errno); return Error::AccessFailed; @@ -182,12 +183,24 @@ Result MmapDataLoader::load( Range range = get_overlapping_pages(static_cast(offset), size, page_size_); + size_t map_size = range.size; +#ifdef _WIN32 + // On Windows, don't mmap-in memory past end of on-disk file. + // + // The Windows implementation of mmap uses CreateFileMapping which returns + // error STATUS_SECTION_TOO_BIG (0xc0000040) if we try to map past the end + // of the last page of a file mapped in as read-only. + if (range.start + range.size > file_size_) { + map_size = file_size_ - range.start; + } +#endif + // Map the pages read-only. MAP_PRIVATE vs. MAP_SHARED doesn't matter since // the data is read-only, but use PRIVATE just to further avoid accidentally // modifying the file. void* pages = ::mmap( nullptr, - range.size, + map_size, PROT_READ, MAP_PRIVATE, fd_, diff --git a/extension/data_loader/targets.bzl b/extension/data_loader/targets.bzl index fcc7cba5419..1a2b77cfaeb 100644 --- a/extension/data_loader/targets.bzl +++ b/extension/data_loader/targets.bzl @@ -69,7 +69,16 @@ def define_common_targets(): runtime.cxx_library( name = "mmap_data_loader", - srcs = ["mmap_data_loader.cpp"], + srcs = [ + "mmap_data_loader.cpp", + "mman_windows.cpp" + ] if host_info().os.is_windows else [ + "mmap_data_loader.cpp" + ], + headers = [ + "mman.h", + "mman_windows.h" + ], exported_headers = ["mmap_data_loader.h"], visibility = [ "//executorch/test/...", diff --git a/extension/data_loader/test/mmap_data_loader_test.cpp b/extension/data_loader/test/mmap_data_loader_test.cpp index a76121109a8..b217705c618 100644 --- a/extension/data_loader/test/mmap_data_loader_test.cpp +++ b/extension/data_loader/test/mmap_data_loader_test.cpp @@ -6,12 +6,11 @@ * LICENSE file in the root directory of this source tree. */ +#include #include #include -#include - #include #include @@ -34,7 +33,7 @@ class MmapDataLoaderTest : public ::testing::Test { executorch::runtime::runtime_init(); // Get the page size and ensure it's a power of 2. - long page_size = sysconf(_SC_PAGESIZE); + long page_size = get_os_page_size(); ASSERT_GT(page_size, 0); ASSERT_EQ(page_size & ~(page_size - 1), page_size); page_size_ = page_size; From 197e198cbf2aee1286e2859807152a61109cf890 Mon Sep 17 00:00:00 2001 From: Sam Gondelman Date: Mon, 3 Mar 2025 19:49:49 -0800 Subject: [PATCH 2/5] apply code suggestions --- extension/data_loader/mman.h | 4 +++- extension/data_loader/mman_windows.cpp | 4 ++-- extension/data_loader/mman_windows.h | 5 +---- extension/data_loader/mmap_data_loader.cpp | 16 +++++++--------- extension/data_loader/targets.bzl | 12 ++++-------- .../data_loader/test/mmap_data_loader_test.cpp | 2 +- 6 files changed, 18 insertions(+), 25 deletions(-) diff --git a/extension/data_loader/mman.h b/extension/data_loader/mman.h index 6f0a9b21064..246068986ea 100644 --- a/extension/data_loader/mman.h +++ b/extension/data_loader/mman.h @@ -18,7 +18,9 @@ #include #include -ET_INLINE long get_os_page_size(){return sysconf(_SC_PAGESIZE)} +ET_INLINE size_t get_os_page_size() { + return sysconf(_SC_PAGESIZE); +} #else diff --git a/extension/data_loader/mman_windows.cpp b/extension/data_loader/mman_windows.cpp index d5f4f136e22..2a7f462f99c 100644 --- a/extension/data_loader/mman_windows.cpp +++ b/extension/data_loader/mman_windows.cpp @@ -17,12 +17,12 @@ * memory mapping Windows API. */ +#include + #include #include #include -#include "mman_windows.h" - #ifndef STATUS_SECTION_TOO_BIG #define STATUS_SECTION_TOO_BIG ((NTSTATUS)0xC0000040L) #endif diff --git a/extension/data_loader/mman_windows.h b/extension/data_loader/mman_windows.h index 4c247cd4fca..563db5d8b21 100644 --- a/extension/data_loader/mman_windows.h +++ b/extension/data_loader/mman_windows.h @@ -17,8 +17,7 @@ * memory mapping Windows API. */ -#ifndef _SYS_MMAN_H_ -#define _SYS_MMAN_H_ +#pragma once #ifndef _WIN32_WINNT // Allow use of features specific to Windows XP or later. #define _WIN32_WINNT \ @@ -67,5 +66,3 @@ int munlock(const void* addr, size_t len); #ifdef __cplusplus }; #endif - -#endif /* _SYS_MMAN_H_ */ diff --git a/extension/data_loader/mmap_data_loader.cpp b/extension/data_loader/mmap_data_loader.cpp index a2d365faa0c..53fd7bdf624 100644 --- a/extension/data_loader/mmap_data_loader.cpp +++ b/extension/data_loader/mmap_data_loader.cpp @@ -6,7 +6,6 @@ * LICENSE file in the root directory of this source tree. */ -#include #include #include @@ -17,6 +16,7 @@ #include #include +#include #include #include #include @@ -184,16 +184,14 @@ Result MmapDataLoader::load( get_overlapping_pages(static_cast(offset), size, page_size_); size_t map_size = range.size; -#ifdef _WIN32 - // On Windows, don't mmap-in memory past end of on-disk file. - // - // The Windows implementation of mmap uses CreateFileMapping which returns - // error STATUS_SECTION_TOO_BIG (0xc0000040) if we try to map past the end - // of the last page of a file mapped in as read-only. - if (range.start + range.size > file_size_) { + if (range.start + map_size > file_size_) { + // Clamp to the end of the file. + // + // The Windows implementation of mmap uses CreateFileMapping which returns + // error STATUS_SECTION_TOO_BIG (0xc0000040) if we try to map past the end + // of the last page of a file mapped in as read-only. map_size = file_size_ - range.start; } -#endif // Map the pages read-only. MAP_PRIVATE vs. MAP_SHARED doesn't matter since // the data is read-only, but use PRIVATE just to further avoid accidentally diff --git a/extension/data_loader/targets.bzl b/extension/data_loader/targets.bzl index 1a2b77cfaeb..9e0f95d453c 100644 --- a/extension/data_loader/targets.bzl +++ b/extension/data_loader/targets.bzl @@ -69,16 +69,12 @@ def define_common_targets(): runtime.cxx_library( name = "mmap_data_loader", - srcs = [ - "mmap_data_loader.cpp", - "mman_windows.cpp" - ] if host_info().os.is_windows else [ + src = [ "mmap_data_loader.cpp" - ], + ] + ["mman_windows.cpp"] if host_info().os.is_windows else [], headers = [ - "mman.h", - "mman_windows.h" - ], + "mman.h" + ] + ["mman_windows.h"] if host_info().os.is_windows else [], exported_headers = ["mmap_data_loader.h"], visibility = [ "//executorch/test/...", diff --git a/extension/data_loader/test/mmap_data_loader_test.cpp b/extension/data_loader/test/mmap_data_loader_test.cpp index b217705c618..c01b3454493 100644 --- a/extension/data_loader/test/mmap_data_loader_test.cpp +++ b/extension/data_loader/test/mmap_data_loader_test.cpp @@ -6,13 +6,13 @@ * LICENSE file in the root directory of this source tree. */ -#include #include #include #include +#include #include #include #include From aac6b5dfb26b11fdcf79f2ca8d7b0b2b5695c39e Mon Sep 17 00:00:00 2001 From: Sam Gondelman Date: Tue, 4 Mar 2025 21:00:47 -0800 Subject: [PATCH 3/5] fix src -> srcs typo --- extension/data_loader/targets.bzl | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/extension/data_loader/targets.bzl b/extension/data_loader/targets.bzl index 9e0f95d453c..c00dd20d0ef 100644 --- a/extension/data_loader/targets.bzl +++ b/extension/data_loader/targets.bzl @@ -69,7 +69,7 @@ def define_common_targets(): runtime.cxx_library( name = "mmap_data_loader", - src = [ + srcs = [ "mmap_data_loader.cpp" ] + ["mman_windows.cpp"] if host_info().os.is_windows else [], headers = [ From 5c4e93fb3305ce92e50a26a7d0cfe9bb5c8196b9 Mon Sep 17 00:00:00 2001 From: Sam Gondelman Date: Tue, 4 Mar 2025 23:27:34 -0800 Subject: [PATCH 4/5] try to fix build --- extension/data_loader/targets.bzl | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/extension/data_loader/targets.bzl b/extension/data_loader/targets.bzl index c00dd20d0ef..50ac8a6339b 100644 --- a/extension/data_loader/targets.bzl +++ b/extension/data_loader/targets.bzl @@ -72,10 +72,10 @@ def define_common_targets(): srcs = [ "mmap_data_loader.cpp" ] + ["mman_windows.cpp"] if host_info().os.is_windows else [], - headers = [ - "mman.h" + exported_headers = [ + "mman.h", + "mmap_data_loader.h", ] + ["mman_windows.h"] if host_info().os.is_windows else [], - exported_headers = ["mmap_data_loader.h"], visibility = [ "//executorch/test/...", "//executorch/extension/pybindings/...", From 4e6b9730ebd387b2a5c04881e4044a1adcc4bc8d Mon Sep 17 00:00:00 2001 From: Sam Gondelman Date: Wed, 5 Mar 2025 13:19:22 -0800 Subject: [PATCH 5/5] fix src/headers brackets --- extension/data_loader/targets.bzl | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/extension/data_loader/targets.bzl b/extension/data_loader/targets.bzl index 50ac8a6339b..c23e4536520 100644 --- a/extension/data_loader/targets.bzl +++ b/extension/data_loader/targets.bzl @@ -71,11 +71,11 @@ def define_common_targets(): name = "mmap_data_loader", srcs = [ "mmap_data_loader.cpp" - ] + ["mman_windows.cpp"] if host_info().os.is_windows else [], - exported_headers = [ + ] + (["mman_windows.cpp"] if host_info().os.is_windows else []), + headers = [ "mman.h", - "mmap_data_loader.h", - ] + ["mman_windows.h"] if host_info().os.is_windows else [], + ] + (["mman_windows.h"] if host_info().os.is_windows else []), + exported_headers = ["mmap_data_loader.h"], visibility = [ "//executorch/test/...", "//executorch/extension/pybindings/...",