Skip to content

Commit 50c3ca7

Browse files
committed
Update threadpool size options, add CMake options
1 parent e89aa3e commit 50c3ca7

File tree

6 files changed

+89
-36
lines changed

6 files changed

+89
-36
lines changed

extension/threadpool/CMakeLists.txt

Lines changed: 13 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -20,6 +20,16 @@ if(NOT CMAKE_CXX_STANDARD)
2020
set(CMAKE_CXX_STANDARD 17)
2121
endif()
2222

23+
# Threadpool size specifiers. Mutual exclusion is checking in default.cmake.
24+
# Default to using performance cores if
25+
# EXECUTORCH_THREADPOOL_USE_ALL_LOGICAL_CORES isn't set.
26+
set(_threadpool_size_flag)
27+
if(EXECUTORCH_THREADPOOL_USE_ALL_LOGICAL_CORES)
28+
set(_threadpool_size_flag "EXECUTORCH_THREADPOOL_USE_ALL_LOGICAL_CORES")
29+
else()
30+
set(_threadpool_size_flag "EXECUTORCH_THREADPOOL_USE_PERFORMANCE_CORES")
31+
endif()
32+
2333
add_library(
2434
extension_threadpool threadpool.cpp threadpool_guard.cpp thread_parallel.cpp
2535
cpuinfo_utils.cpp
@@ -36,7 +46,9 @@ target_include_directories(
3646
$<BUILD_INTERFACE:${EXECUTORCH_ROOT}/backends/xnnpack/third-party/cpuinfo/include>
3747
$<BUILD_INTERFACE:${EXECUTORCH_ROOT}/backends/xnnpack/third-party/pthreadpool/include>
3848
)
39-
target_compile_definitions(extension_threadpool PUBLIC ET_USE_THREADPOOL)
49+
target_compile_definitions(
50+
extension_threadpool PUBLIC ET_USE_THREADPOOL ${threadpool_size_flag}
51+
)
4052
target_compile_options(extension_threadpool PUBLIC ${_common_compile_options})
4153

4254
# Install libraries

extension/threadpool/targets.bzl

Lines changed: 2 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -22,9 +22,9 @@ def define_common_targets():
2222
name = "threadpool_lib",
2323
srcs = _THREADPOOL_SRCS,
2424
deps = [
25+
":cpuinfo_utils",
2526
"//executorch/runtime/core:core",
2627
"//executorch/runtime/core/portable_type/c10/c10:c10",
27-
":cpuinfo_utils",
2828
],
2929
exported_headers = _THREADPOOL_HEADERS,
3030
exported_deps = [
@@ -35,10 +35,7 @@ def define_common_targets():
3535
],
3636
exported_preprocessor_flags = [
3737
"-DET_USE_THREADPOOL",
38-
] + (
39-
# Default to perf heuristic (0) in OSS, all cores (-1) otherwise.
40-
["-DEXECUTORCH_THREADPOOL_SIZE=0"] if runtime.is_oss else ["-DEXECUTORCH_THREADPOOL_SIZE=-1"]
41-
),
38+
],
4239
visibility = [
4340
"//executorch/...",
4441
"//executorch/backends/...",

extension/threadpool/test/threadpool_test.cpp

Lines changed: 9 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -7,6 +7,7 @@
77
*/
88

99
#include <executorch/extension/threadpool/threadpool.h>
10+
#include <executorch/runtime/platform/runtime.h>
1011

1112
#include <mutex>
1213
#include <numeric>
@@ -71,6 +72,8 @@ void run_lambda_with_size(
7172
} // namespace
7273

7374
TEST(ThreadPoolTest, ParallelAdd) {
75+
executorch::runtime::runtime_init();
76+
7477
std::vector<int32_t> a, b, c, c_ref;
7578
size_t vector_size = 100;
7679
size_t grain_size = 10;
@@ -111,6 +114,8 @@ TEST(ThreadPoolTest, ParallelAdd) {
111114

112115
// Test parallel reduction where we acquire lock within lambda
113116
TEST(ThreadPoolTest, ParallelReduce) {
117+
executorch::runtime::runtime_init();
118+
114119
std::vector<int32_t> a;
115120
int32_t c = 0, c_ref = 0;
116121
size_t vector_size = 100;
@@ -144,6 +149,8 @@ TEST(ThreadPoolTest, ParallelReduce) {
144149
// Copied from
145150
// caffe2/aten/src/ATen/test/test_thread_pool_guard.cp
146151
TEST(TestNoThreadPoolGuard, TestThreadPoolGuard) {
152+
executorch::runtime::runtime_init();
153+
147154
auto threadpool_ptr = ::executorch::extension::threadpool::get_pthreadpool();
148155

149156
ASSERT_NE(threadpool_ptr, nullptr);
@@ -173,6 +180,8 @@ TEST(TestNoThreadPoolGuard, TestThreadPoolGuard) {
173180
}
174181

175182
TEST(TestNoThreadPoolGuard, TestRunWithGuard) {
183+
executorch::runtime::runtime_init();
184+
176185
const std::vector<int64_t> array = {1, 2, 3};
177186

178187
auto pool = ::executorch::extension::threadpool::get_threadpool();

extension/threadpool/threadpool.cpp

Lines changed: 25 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -15,9 +15,26 @@
1515

1616
#include <executorch/extension/threadpool/threadpool_guard.h>
1717
#include <executorch/runtime/platform/assert.h>
18+
#include <executorch/runtime/platform/runtime.h>
1819

1920
#include <cpuinfo.h>
2021

22+
// At most one mode should be set.
23+
#if ( \
24+
defined(EXECUTORCH_THREADPOOL_USE_ALL_LOGICAL_CORES) && \
25+
defined(EXECUTORCH_THREADPOOL_USE_PERFORMANCE_CORES))
26+
#error Multiple \
27+
threadpool size specifiers are set.At most one of \
28+
EXECUTORCH_THREADPOOL_USE_ALL_LOGICAL_CORES, \
29+
and EXECUTORCH_THREADPOOL_USE_PERFORMANCE_CORES may be defined.
30+
#endif
31+
32+
// Default to EXECUTORCH_THREADPOOL_USE_ALL_LOGICAL_CORES if no mode is set.
33+
#if !defined(EXECUTORCH_THREADPOOL_USE_ALL_LOGICAL_CORES) && \
34+
!defined(EXECUTORCH_THREADPOOL_USE_PERFORMANCE_CORES)
35+
#define EXECUTORCH_THREADPOOL_USE_ALL_LOGICAL_CORES 1
36+
#endif
37+
2138
namespace executorch::extension::threadpool {
2239

2340
#if !(defined(WIN32))
@@ -97,24 +114,22 @@ void ThreadPool::run(
97114
// get_threadpool is not thread safe due to leak_corrupted_threadpool
98115
// Make this part threadsafe: TODO(kimishpatel)
99116
ThreadPool* get_threadpool() {
117+
executorch::runtime::runtime_init();
118+
100119
if (!cpuinfo_initialize()) {
101120
ET_LOG(Error, "cpuinfo initialization failed");
102121
return nullptr; // NOLINT(facebook-hte-NullableReturn)
103122
}
104123

105-
// Choose the number of threads according to the EXECUTORCH_THREADPOOL_SIZE
106-
// value. See the description in threadpool.h.
124+
// Choose the number of threads according to the EXECUTORCH_THREADPOOL_
125+
// options. See the description in threadpool.h.
107126

108-
#if defined(EXECUTORCH_THREADPOOL_SIZE) && ((EXECUTORCH_THREADPOOL_SIZE) > 0)
109-
// Use an explicit threadpool size.
110-
int num_threads = EXECUTORCH_THREADPOOL_SIZE;
111-
#elif defined(EXECUTORCH_THREADPOOL_SIZE) && \
112-
((EXECUTORCH_THREADPOOL_SIZE) == -1)
127+
#if defined(EXECUTORCH_THREADPOOL_USE_ALL_LOGICAL_CORES)
113128
// Use threads=cores.
114-
int num_threads = cpuinfo_get_processors_count();
129+
static int num_threads = cpuinfo_get_processors_count();
115130
#else
116-
// Use a performance heuristic.
117-
int num_threads =
131+
// Set threads equal to the number of performance cores.
132+
static int num_threads =
118133
::executorch::extension::cpuinfo::get_num_performant_cores();
119134
#endif
120135

extension/threadpool/threadpool.h

Lines changed: 10 additions & 20 deletions
Original file line numberDiff line numberDiff line change
@@ -17,29 +17,19 @@
1717
/*
1818
* Threadpool Options:
1919
*
20-
* Threadpool size has a sizble affect on performance. The following
21-
* options are exposed to control the threadpool size.
20+
* Threadpool size has a sizble affect on performance. By default, the
21+
* threadpool will be sized according to the number of performance cores. This
22+
* behavior can be overriden with the following build-time options. Note that
23+
* these options are mutually exclusive.
2224
*
23-
* EXECUTORCH_THREADPOOL_SIZE: int - Set the size of the threadpool,
24-
* in number of threads.
25-
*
26-
* Special Values:
27-
* - 0: Use a perforance heuristic to determine the default size,
28-
* based on the active hardware. This is the default mode
29-
* for CMake.
30-
* - -1: Set the thread count equal to the number of cores on the
31-
* active hardware.
32-
*
33-
* Any other positive value will be interpreted as a thread count.
34-
* For example, setting EXECUTORCH_THREADPOOL_SIZE=4 will default
35-
* the threadpool to use 4 threads.
25+
* - EXECUTORCH_THREADPOOL_USE_PERFORMANCE_CORES (flag) - Sizes the threadpool
26+
* equal to the number of performance cores on the system. This is the default
27+
* behavior.
28+
* - EXECUTORCH_THREADPOOL_USE_ALL_LOGICAL_CORES (flag) - Sizes the threadpool
29+
* equal to the number of logical cores on system. This is the historical
30+
* behavior.
3631
*/
3732

38-
#ifndef EXECUTORCH_THREADPOOL_SIZE
39-
// Default to using a runtime heuristic.
40-
#define EXECUTORCH_THREADPOOL_SIZE 0
41-
#endif
42-
4333
namespace executorch::extension::threadpool {
4434

4535
class ThreadPool final {

tools/cmake/preset/default.cmake

Lines changed: 30 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -176,6 +176,36 @@ define_overridable_option(
176176
${_default_executorch_build_cpuinfo}
177177
)
178178

179+
# Threadpool size options. At most one can be specified. Note that the default
180+
# is managed in threadpool.cpp to allow the user to specify an alternate mode
181+
# without needing to explicitly set the default to off.
182+
define_overridable_option(
183+
EXECUTORCH_THREADPOOL_USE_PERFORMANCE_CORES
184+
"Set the number of threads used for CPU parallel computation equal to the number of performant CPU cores."
185+
BOOL
186+
OFF
187+
)
188+
define_overridable_option(
189+
EXECUTORCH_THREADPOOL_USE_ALL_LOGICAL_CORES
190+
"Set the number of threads used for CPU parallel computation equal to the number of logical CPU cores."
191+
BOOL
192+
OFF
193+
)
194+
195+
check_required_options_on(
196+
IF_ON EXECUTORCH_THREADPOOL_USE_ALL_LOGICAL_CORES REQUIRES
197+
EXECUTORCH_BUILD_PTHREADPOOL EXECUTORCH_BUILD_CPUINFO
198+
)
199+
check_required_options_on(
200+
IF_ON EXECUTORCH_THREADPOOL_USE_PERFORMANCE_CORES REQUIRES
201+
EXECUTORCH_BUILD_PTHREADPOOL EXECUTORCH_BUILD_CPUINFO
202+
)
203+
204+
check_conflicting_options_on(
205+
IF_ON EXECUTORCH_THREADPOOL_USE_PERFORMANCE_CORES CONFLICTS_WITH
206+
EXECUTORCH_THREADPOOL_USE_ALL_LOGICAL_CORES
207+
)
208+
179209
# TODO(jathu): move this to platform specific presets when created
180210
set(_default_executorch_build_executor_runner ON)
181211
if(APPLE AND "${SDK_NAME}" STREQUAL "iphoneos")

0 commit comments

Comments
 (0)