From 70a6c547fa4b7cb636f859a1bf0011e44636aed3 Mon Sep 17 00:00:00 2001 From: goerch Date: Thu, 20 Jul 2023 12:21:12 +0200 Subject: [PATCH 1/9] Initial commit --- examples/dolly-v2/main.cpp | 2 + examples/dolly-v2/quantize.cpp | 2 +- examples/gpt-2/main.cpp | 2 + examples/gpt-2/quantize.cpp | 2 +- examples/gpt-j/main.cpp | 2 + examples/gpt-j/quantize.cpp | 2 +- examples/gpt-neox/main.cpp | 2 + examples/gpt-neox/quantize.cpp | 2 +- examples/mnist/main-cpu.cpp | 1 + examples/mnist/main-mtl.cpp | 1 + examples/mnist/main.cpp | 2 + examples/mpt/main.cpp | 2 + examples/mpt/quantize.cpp | 2 +- examples/replit/main.cpp | 2 + examples/replit/quantize.cpp | 2 +- examples/starcoder/main.cpp | 2 + examples/starcoder/quantize.cpp | 2 +- examples/starcoder/starcoder-mmap.cpp | 3 + examples/whisper/quantize.cpp | 2 +- examples/whisper/whisper.cpp | 6 + include/ggml/ggml.h | 3 +- include/ggml/pthreads.h | 448 ++++++++++++++++++++++++++ src/ggml.c | 248 +++++--------- tests/test-blas0.c | 1 + tests/test-grad0.c | 1 + tests/test-mul-mat0.c | 1 + tests/test-mul-mat2.c | 2 +- tests/test-opt.c | 1 + tests/test-pool.c | 1 + tests/test-quantize-fns.cpp | 1 + tests/test-quantize-perf.cpp | 1 + tests/test0.c | 1 + tests/test1.c | 1 + tests/test2.c | 1 + tests/test3.c | 1 + 35 files changed, 581 insertions(+), 174 deletions(-) create mode 100644 include/ggml/pthreads.h diff --git a/examples/dolly-v2/main.cpp b/examples/dolly-v2/main.cpp index ce634213b..2c0eb6a00 100644 --- a/examples/dolly-v2/main.cpp +++ b/examples/dolly-v2/main.cpp @@ -231,6 +231,7 @@ bool dollyv2_model_load(const std::string & fname, dollyv2_model & model, gpt_vo /*.mem_size =*/ ctx_size, /*.mem_buffer =*/ NULL, /*.no_alloc =*/ false, + /*.n_threads =*/ GGML_DEFAULT_N_THREADS, }; model.ctx = ggml_init(params); @@ -492,6 +493,7 @@ bool dollyv2_eval( /*.mem_size =*/ buf_size, /*.mem_buffer =*/ buf, /*.no_alloc =*/ false, + /*.n_threads =*/ n_threads, }; struct ggml_context * ctx0 = ggml_init(params); diff --git a/examples/dolly-v2/quantize.cpp b/examples/dolly-v2/quantize.cpp index 0c0d24ccf..f7bfbf531 100644 --- a/examples/dolly-v2/quantize.cpp +++ b/examples/dolly-v2/quantize.cpp @@ -139,7 +139,7 @@ int main(int argc, char ** argv) { // needed to initialize f16 tables { - struct ggml_init_params params = { 0, NULL, false }; + struct ggml_init_params params = { 0, NULL, false, GGML_DEFAULT_N_THREADS }; struct ggml_context * ctx = ggml_init(params); ggml_free(ctx); } diff --git a/examples/gpt-2/main.cpp b/examples/gpt-2/main.cpp index 7e12eab5f..08dfb59fe 100644 --- a/examples/gpt-2/main.cpp +++ b/examples/gpt-2/main.cpp @@ -203,6 +203,7 @@ bool gpt2_model_load(const std::string & fname, gpt2_model & model, gpt_vocab & /*.mem_size =*/ ctx_size, /*.mem_buffer =*/ NULL, /*.no_alloc =*/ false, + /*.n_threads =*/ GGML_DEFAULT_N_THREADS, }; model.ctx = ggml_init(params); @@ -425,6 +426,7 @@ bool gpt2_eval( /*.mem_size =*/ buf_size, /*.mem_buffer =*/ buf, /*.no_alloc =*/ false, + /*.n_threads =*/ n_threads, }; struct ggml_context * ctx0 = ggml_init(params); diff --git a/examples/gpt-2/quantize.cpp b/examples/gpt-2/quantize.cpp index 9d8d53a67..27ea72f0b 100644 --- a/examples/gpt-2/quantize.cpp +++ b/examples/gpt-2/quantize.cpp @@ -145,7 +145,7 @@ int main(int argc, char ** argv) { // needed to initialize f16 tables { - struct ggml_init_params params = { 0, NULL, false }; + struct ggml_init_params params = { 0, NULL, false, GGML_DEFAULT_N_THREADS }; struct ggml_context * ctx = ggml_init(params); ggml_free(ctx); } diff --git a/examples/gpt-j/main.cpp b/examples/gpt-j/main.cpp index b42764ce2..395a929b7 100644 --- a/examples/gpt-j/main.cpp +++ b/examples/gpt-j/main.cpp @@ -202,6 +202,7 @@ bool gptj_model_load(const std::string & fname, gptj_model & model, gpt_vocab & /*.mem_size =*/ ctx_size, /*.mem_buffer =*/ NULL, /*.no_alloc =*/ false, + /*.n_threads =*/ GGML_DEFAULT_N_THREADS, }; model.ctx = ggml_init(params); @@ -421,6 +422,7 @@ bool gptj_eval( /*.mem_size =*/ buf_size, /*.mem_buffer =*/ buf, /*.no_alloc =*/ false, + /*.n_threads =*/ n_threads, }; struct ggml_context * ctx0 = ggml_init(params); diff --git a/examples/gpt-j/quantize.cpp b/examples/gpt-j/quantize.cpp index 437053b7d..62550a86c 100644 --- a/examples/gpt-j/quantize.cpp +++ b/examples/gpt-j/quantize.cpp @@ -143,7 +143,7 @@ int main(int argc, char ** argv) { // needed to initialize f16 tables { - struct ggml_init_params params = { 0, NULL, false }; + struct ggml_init_params params = { 0, NULL, false, GGML_DEFAULT_N_THREADS }; struct ggml_context * ctx = ggml_init(params); ggml_free(ctx); } diff --git a/examples/gpt-neox/main.cpp b/examples/gpt-neox/main.cpp index 4af771b84..ac2e3b90e 100644 --- a/examples/gpt-neox/main.cpp +++ b/examples/gpt-neox/main.cpp @@ -205,6 +205,7 @@ bool gpt_neox_model_load(const std::string & fname, gpt_neox_model & model, gpt_ /*.mem_size =*/ ctx_size, /*.mem_buffer =*/ NULL, /*.no_alloc =*/ false, + /*.n_threads =*/ GGML_DEFAULT_N_THREADS, }; model.ctx = ggml_init(params); @@ -472,6 +473,7 @@ bool gpt_neox_eval( /*.mem_size =*/ buf_size, /*.mem_buffer =*/ buf, /*.no_alloc =*/ false, + /*.n_threads =*/ n_threads, }; struct ggml_context * ctx0 = ggml_init(params); diff --git a/examples/gpt-neox/quantize.cpp b/examples/gpt-neox/quantize.cpp index 96208c1e8..b0cc09fbf 100644 --- a/examples/gpt-neox/quantize.cpp +++ b/examples/gpt-neox/quantize.cpp @@ -139,7 +139,7 @@ int main(int argc, char ** argv) { // needed to initialize f16 tables { - struct ggml_init_params params = { 0, NULL, false }; + struct ggml_init_params params = { 0, NULL, false, GGML_DEFAULT_N_THREADS }; struct ggml_context * ctx = ggml_init(params); ggml_free(ctx); } diff --git a/examples/mnist/main-cpu.cpp b/examples/mnist/main-cpu.cpp index 2000c9aac..a554c0e77 100644 --- a/examples/mnist/main-cpu.cpp +++ b/examples/mnist/main-cpu.cpp @@ -51,6 +51,7 @@ int mnist_eval( /*.mem_size =*/ buf_size, /*.mem_buffer =*/ buf, /*.no_alloc =*/ false, + /*.n_threads =*/ n_threads, }; struct ggml_context * ctx_work = ggml_init(params); diff --git a/examples/mnist/main-mtl.cpp b/examples/mnist/main-mtl.cpp index a8d47ac9c..1047a3814 100644 --- a/examples/mnist/main-mtl.cpp +++ b/examples/mnist/main-mtl.cpp @@ -45,6 +45,7 @@ int mnist_eval( /*.mem_size =*/ buf_size, /*.mem_buffer =*/ buf, /*.no_alloc =*/ false, + /*.n_threads =*/ GGML_DEFAULT_N_THREADS, }; struct ggml_context * ctx_work = ggml_init(params); diff --git a/examples/mnist/main.cpp b/examples/mnist/main.cpp index 5ff4ac20f..afef54ad6 100644 --- a/examples/mnist/main.cpp +++ b/examples/mnist/main.cpp @@ -80,6 +80,7 @@ bool mnist_model_load(const std::string & fname, mnist_model & model) { /*.mem_size =*/ ctx_size + 1024*1024, /*.mem_buffer =*/ NULL, /*.no_alloc =*/ false, + /*.n_threads =*/ GGML_DEFAULT_N_THREADS, }; model.ctx = ggml_init(params); @@ -182,6 +183,7 @@ int mnist_eval( /*.mem_size =*/ buf_size, /*.mem_buffer =*/ buf, /*.no_alloc =*/ false, + /*.n_threads =*/ n_threads, }; struct ggml_context * ctx0 = ggml_init(params); diff --git a/examples/mpt/main.cpp b/examples/mpt/main.cpp index 457dc3d5b..1ab737a02 100644 --- a/examples/mpt/main.cpp +++ b/examples/mpt/main.cpp @@ -298,6 +298,7 @@ bool mpt_model_load(const std::string & fname, mpt_model & model, gpt_vocab & vo /*.mem_size =*/ ctx_size, /*.mem_buffer =*/ NULL, /*.no_alloc =*/ false, + /*.n_threads =*/ GGML_DEFAULT_N_THREADS, }; model.ctx = ggml_init(params); @@ -495,6 +496,7 @@ bool mpt_eval(const mpt_model & model, const int n_threads, const int n_past, /*.mem_size =*/ buf_size, /*.mem_buffer =*/ buf, /*.no_alloc =*/ false, + /*.n_threads =*/ n_threads, }; struct ggml_context * ctx0 = ggml_init(params); diff --git a/examples/mpt/quantize.cpp b/examples/mpt/quantize.cpp index d0c9dda82..f4c900a2f 100644 --- a/examples/mpt/quantize.cpp +++ b/examples/mpt/quantize.cpp @@ -144,7 +144,7 @@ int main(int argc, char ** argv) { // needed to initialize f16 tables { - struct ggml_init_params params = {0, NULL, false}; + struct ggml_init_params params = {0, NULL, false, GGML_DEFAULT_N_THREADS }; struct ggml_context * ctx = ggml_init(params); ggml_free(ctx); } diff --git a/examples/replit/main.cpp b/examples/replit/main.cpp index aed7f268b..a1e3b383e 100644 --- a/examples/replit/main.cpp +++ b/examples/replit/main.cpp @@ -280,6 +280,7 @@ bool replit_model_load(const std::string & fname, replit_model & model, replit_t /*.mem_size =*/ ctx_size, /*.mem_buffer =*/ NULL, /*.no_alloc =*/ false, + /*.n_threads =*/ GGML_DEFAULT_N_THREADS, }; model.ctx = ggml_init(params); @@ -472,6 +473,7 @@ bool replit_eval(const replit_model & model, const int n_threads, const int n_pa /*.mem_size =*/ buf_size, /*.mem_buffer =*/ buf, /*.no_alloc =*/ false, + /*.n_threads =*/ n_threads, }; struct ggml_context * ctx0 = ggml_init(params); diff --git a/examples/replit/quantize.cpp b/examples/replit/quantize.cpp index f274074bb..24bbf86ed 100644 --- a/examples/replit/quantize.cpp +++ b/examples/replit/quantize.cpp @@ -140,7 +140,7 @@ int main(int argc, char ** argv) { // needed to initialize f16 tables { - struct ggml_init_params params = {0, NULL, false}; + struct ggml_init_params params = {0, NULL, false, GGML_DEFAULT_N_THREADS}; struct ggml_context * ctx = ggml_init(params); ggml_free(ctx); } diff --git a/examples/starcoder/main.cpp b/examples/starcoder/main.cpp index d84e36634..5cfd9b8d0 100644 --- a/examples/starcoder/main.cpp +++ b/examples/starcoder/main.cpp @@ -226,6 +226,7 @@ bool starcoder_model_load(const std::string & fname, starcoder_model & model, gp /*.mem_size =*/ ctx_size, /*.mem_buffer =*/ NULL, /*.no_alloc =*/ false, + /*.n_threads =*/ GGML_DEFAULT_N_THREADS, }; model.ctx = ggml_init(params); @@ -460,6 +461,7 @@ bool starcoder_eval( /*.mem_size =*/ buf_size, /*.mem_buffer =*/ buf, /*.no_alloc =*/ false, + /*.n_threads =*/ n_threads, }; struct ggml_context * ctx0 = ggml_init(params); diff --git a/examples/starcoder/quantize.cpp b/examples/starcoder/quantize.cpp index d3aee3f26..edafacb3f 100644 --- a/examples/starcoder/quantize.cpp +++ b/examples/starcoder/quantize.cpp @@ -145,7 +145,7 @@ int main(int argc, char ** argv) { // needed to initialize f16 tables { - struct ggml_init_params params = { 0, NULL, false }; + struct ggml_init_params params = { 0, NULL, false, GGML_DEFAULT_N_THREADS }; struct ggml_context * ctx = ggml_init(params); ggml_free(ctx); } diff --git a/examples/starcoder/starcoder-mmap.cpp b/examples/starcoder/starcoder-mmap.cpp index 094c441d8..b240abcae 100644 --- a/examples/starcoder/starcoder-mmap.cpp +++ b/examples/starcoder/starcoder-mmap.cpp @@ -352,6 +352,7 @@ bool starcoder_model_load(const std::string & fname, starcoder_model & model, gp /*.mem_size =*/ ctx_size, /*.mem_buffer =*/ NULL, /*.no_alloc =*/ true, + /*.n_threads =*/ GGML_DEFAULT_N_THREADS, }; model.ctx = ggml_init(params); @@ -450,6 +451,7 @@ bool starcoder_model_load(const std::string & fname, starcoder_model & model, gp c_params.mem_size = model.cache.buf.size; c_params.mem_buffer = model.cache.buf.addr; c_params.no_alloc = false; + c_params.n_threads = GGML_DEFAULT_N_THREADS; model.cache.ctx = ggml_init(c_params); @@ -667,6 +669,7 @@ bool starcoder_eval( /*.mem_size =*/ buf_size, /*.mem_buffer =*/ buf, /*.no_alloc =*/ false, + /*.threads =*/ n_threads, }; struct ggml_context * ctx0 = ggml_init(params); diff --git a/examples/whisper/quantize.cpp b/examples/whisper/quantize.cpp index 64e8f35c3..82030d386 100644 --- a/examples/whisper/quantize.cpp +++ b/examples/whisper/quantize.cpp @@ -184,7 +184,7 @@ int main(int argc, char ** argv) { // needed to initialize f16 tables { - struct ggml_init_params params = { 0, NULL, false }; + struct ggml_init_params params = { 0, NULL, false, GGML_DEFAULT_N_THREADS }; struct ggml_context * ctx = ggml_init(params); ggml_free(ctx); } diff --git a/examples/whisper/whisper.cpp b/examples/whisper/whisper.cpp index cad40426c..de1f47bf5 100644 --- a/examples/whisper/whisper.cpp +++ b/examples/whisper/whisper.cpp @@ -741,6 +741,7 @@ static bool kv_cache_init( /*.mem_size =*/ cache.buf.size(), /*.mem_buffer =*/ cache.buf.data(), /*.no_alloc =*/ false, + /*.threads =*/ GGML_DEFAULT_N_THREADS, }; cache.ctx = ggml_init(params); @@ -777,6 +778,7 @@ static bool kv_cache_reinit(struct whisper_kv_cache & cache) { /*.mem_size =*/ cache.buf.size(), /*.mem_buffer =*/ cache.buf.data(), /*.no_alloc =*/ false, + /*.threads =*/ GGML_DEFAULT_N_THREADS, }; cache.ctx = ggml_init(params); @@ -1136,6 +1138,7 @@ static bool whisper_model_load(struct whisper_model_loader * loader, whisper_con /*.mem_size =*/ wctx.model.buf->size(), /*.mem_buffer =*/ wctx.model.buf->data(), /*.no_alloc =*/ false, + /*.threads =*/ GGML_DEFAULT_N_THREADS, }; model.ctx = ggml_init(params); @@ -1456,6 +1459,7 @@ static bool whisper_encode_internal( /*.mem_size =*/ wstate.buf_compute.size(), /*.mem_buffer =*/ wstate.buf_compute.data(), /*.no_alloc =*/ false, + /*.threads =*/ n_threads, }; struct ggml_context * ctx0 = ggml_init(params); @@ -1935,6 +1939,7 @@ static bool whisper_decode_internal( /*.mem_size =*/ wstate.buf_compute.size(), /*.mem_buffer =*/ wstate.buf_compute.data(), /*.no_alloc =*/ false, + /*.threads =*/ n_threads, }; struct ggml_context * ctx0 = ggml_init(params); @@ -5084,6 +5089,7 @@ WHISPER_API const char * whisper_bench_ggml_mul_mat_str(int n_threads) { /*.mem_size =*/ buf.size(), /*.mem_buffer =*/ buf.data(), /*.no_alloc =*/ false, + /*.threads =*/ n_threads, }; struct ggml_context * ctx0 = ggml_init(gparams); diff --git a/include/ggml/ggml.h b/include/ggml/ggml.h index 24856a255..9c378fdd7 100644 --- a/include/ggml/ggml.h +++ b/include/ggml/ggml.h @@ -482,6 +482,7 @@ extern "C" { size_t mem_size; // bytes void * mem_buffer; // if NULL, memory will be allocated internally bool no_alloc; // don't allocate memory for the tensor data + int n_threads; // number of threads for the thread pool }; @@ -1350,7 +1351,7 @@ extern "C" { // ggml_graph_plan() has to be called before ggml_graph_compute() // when plan.work_size > 0, caller must allocate memory for plan.work_data GGML_API struct ggml_cplan ggml_graph_plan (struct ggml_cgraph * cgraph, int n_threads /*= GGML_DEFAULT_N_THREADS*/); - GGML_API int ggml_graph_compute(struct ggml_cgraph * cgraph, struct ggml_cplan * cplan); + GGML_API int ggml_graph_compute(struct ggml_cgraph * cgraph, struct ggml_cplan * cplan, void * tpool); GGML_API void ggml_graph_reset (struct ggml_cgraph * cgraph); // same as ggml_graph_compute() but the work data is allocated as a part of the context diff --git a/include/ggml/pthreads.h b/include/ggml/pthreads.h new file mode 100644 index 000000000..c67fa3341 --- /dev/null +++ b/include/ggml/pthreads.h @@ -0,0 +1,448 @@ +#include +#include +#include + +static DWORD timespec_to_ms(const struct timespec* abstime) +{ + DWORD t; + + if (abstime == NULL) + return INFINITE; + + t = ((abstime->tv_sec - time(NULL)) * 1000) + (abstime->tv_nsec / 1000000); + if (t < 0) + t = 1; + return t; +} + +static void ms_to_timespec(struct timespec* ts, unsigned int ms) +{ + if (ts == NULL) + return; + ts->tv_sec = (ms / 1000) + time(NULL); + ts->tv_nsec = (ms % 1000) * 1000000; +} + +typedef HANDLE pthread_t; +typedef void pthread_attr_t; +typedef DWORD thread_ret_t; + +typedef struct { + void *(*start_routine)(void *); + void *start_arg; +} win_thread_start_t; + +static DWORD WINAPI win_thread_start(void *arg) +{ + win_thread_start_t *data = arg; + void *(*start_routine)(void *) = data->start_routine; + void *start_arg = data->start_arg; + + free(data); + + start_routine(start_arg); + return 0; /* ERROR_SUCCESS */ +} + +static int pthread_create(pthread_t *thread, pthread_attr_t *attr, void *(*start_routine)(void *), void *arg) +{ + win_thread_start_t *data; + + if (thread == NULL || start_routine == NULL) + return 1; + + data = malloc(sizeof(*data)); + data->start_routine = start_routine; + data->start_arg = arg; + + *thread = CreateThread(NULL, 0, win_thread_start, data, 0, NULL); + if (*thread == NULL) + return 1; + return 0; +} + +static int pthread_join(pthread_t thread, void **value_ptr) +{ + (void)value_ptr; + WaitForSingleObject(thread, INFINITE); + CloseHandle(thread); + return 0; +} + +static int pthread_detach(pthread_t thread) +{ + CloseHandle(thread); +} + +typedef CRITICAL_SECTION pthread_mutex_t; +typedef void pthread_mutexattr_t; + +static int pthread_mutex_init(pthread_mutex_t *mutex, pthread_mutexattr_t *attr) +{ + (void)attr; + + if (mutex == NULL) + return 1; + + InitializeCriticalSection(mutex); + return 0; +} + +static int pthread_mutex_destroy(pthread_mutex_t *mutex) +{ + if (mutex == NULL) + return 1; + DeleteCriticalSection(mutex); + return 0; +} + +static int pthread_mutex_lock(pthread_mutex_t *mutex) +{ + if (mutex == NULL) + return 1; + EnterCriticalSection(mutex); + return 0; +} + +static int pthread_mutex_unlock(pthread_mutex_t *mutex) +{ + if (mutex == NULL) + return 1; + LeaveCriticalSection(mutex); + return 0; +} + +typedef CONDITION_VARIABLE pthread_cond_t; +typedef void pthread_condattr_t; + +#ifdef NEEDED +struct timespec { + long tv_sec; + long tv_nsec; +}; +#endif + +static int pthread_cond_init(pthread_cond_t *cond, pthread_condattr_t *attr) +{ + (void)attr; + if (cond == NULL) + return 1; + InitializeConditionVariable(cond); + return 0; +} + +static int pthread_cond_destroy(pthread_cond_t *cond) +{ + /* Windows does not have a destroy for conditionals */ + (void)cond; + return 0; +} + +static int pthread_cond_timedwait(pthread_cond_t *cond, pthread_mutex_t *mutex, + const struct timespec *abstime) +{ + if (cond == NULL || mutex == NULL) + return 1; + if (!SleepConditionVariableCS(cond, mutex, timespec_to_ms(abstime))) + return 1; + return 0; +} + +static int pthread_cond_wait(pthread_cond_t *cond, pthread_mutex_t *mutex) +{ + if (cond == NULL || mutex == NULL) + return 1; + return pthread_cond_timedwait(cond, mutex, NULL); +} + +static int pthread_cond_signal(pthread_cond_t *cond) +{ + if (cond == NULL) + return 1; + WakeConditionVariable(cond); + return 0; +} + +static int pthread_cond_broadcast(pthread_cond_t *cond) +{ + if (cond == NULL) + return 1; + WakeAllConditionVariable(cond); + return 0; +} + +typedef struct { + SRWLOCK lock; + bool exclusive; +} pthread_rwlock_t; + +typedef void pthread_rwlockattr_t; + +static int pthread_rwlock_init(pthread_rwlock_t *rwlock, const pthread_rwlockattr_t *attr) +{ + (void)attr; + if (rwlock == NULL) + return 1; + InitializeSRWLock(&rwlock->lock); + rwlock->exclusive = false; + return 0; +} + +static int pthread_rwlock_destroy(pthread_rwlock_t *rwlock) +{ + (void)rwlock; +} + +static int pthread_rwlock_rdlock(pthread_rwlock_t *rwlock) +{ + if (rwlock == NULL) + return 1; + AcquireSRWLockShared(&rwlock->lock); +} + +static int pthread_rwlock_tryrdlock(pthread_rwlock_t *rwlock) +{ + if (rwlock == NULL) + return 1; + return !TryAcquireSRWLockShared(&rwlock->lock); +} + +static int pthread_rwlock_wrlock(pthread_rwlock_t *rwlock) +{ + if (rwlock == NULL) + return 1; + AcquireSRWLockExclusive(&rwlock->lock); + rwlock->exclusive = true; +} + +static int pthread_rwlock_trywrlock(pthread_rwlock_t *rwlock) +{ + BOOLEAN ret; + + if (rwlock == NULL) + return 1; + + ret = TryAcquireSRWLockExclusive(&rwlock->lock); + if (ret) + rwlock->exclusive = true; + return ret; +} + +static int pthread_rwlock_unlock(pthread_rwlock_t *rwlock) +{ + if (rwlock == NULL) + return 1; + + if (rwlock->exclusive) { + rwlock->exclusive = false; + ReleaseSRWLockExclusive(&rwlock->lock); + } else { + ReleaseSRWLockShared(&rwlock->lock); + } +} + +static unsigned int pcthread_get_num_procs() +{ + SYSTEM_INFO sysinfo; + + GetSystemInfo(&sysinfo); + return sysinfo.dwNumberOfProcessors; +} + +typedef void (*thread_func_t)(void *arg); + +struct tpool_work { + thread_func_t func; + void *arg; + struct tpool_work *next; +}; +typedef struct tpool_work tpool_work_t; + +static tpool_work_t *tpool_work_create(thread_func_t func, void *arg) +{ + tpool_work_t *work; + + if (func == NULL) + return NULL; + + work = malloc(sizeof(*work)); + work->func = func; + work->arg = arg; + work->next = NULL; + return work; +} + +static void tpool_work_destroy(tpool_work_t *work) +{ + if (work == NULL) + return; + free(work); +} + +struct tpool { + tpool_work_t *work_first; + tpool_work_t *work_last; + pthread_mutex_t work_mutex; + pthread_cond_t work_cond; + pthread_cond_t working_cond; + size_t working_cnt; + size_t thread_cnt; + bool stop; +}; +typedef struct tpool tpool_t; + +static tpool_work_t *tpool_work_get(tpool_t *tm) +{ + tpool_work_t *work; + + if (tm == NULL) + return NULL; + + work = tm->work_first; + if (work == NULL) + return NULL; + + if (work->next == NULL) { + tm->work_first = NULL; + tm->work_last = NULL; + } else { + tm->work_first = work->next; + } + + return work; +} + +static void *tpool_worker(void *arg) +{ + tpool_t *tm = arg; + tpool_work_t *work; + + // printf("pthreads %p starts\n", arg); + while (1) { + pthread_mutex_lock(&tm->work_mutex); + while (tm->work_first == NULL && !tm->stop) + pthread_cond_wait(&tm->work_cond, &tm->work_mutex); + if (tm->stop) + break; + work = tpool_work_get(tm); + tm->working_cnt++; + pthread_mutex_unlock(&tm->work_mutex); + + // printf("pthreads %p works\n", arg); + if (work != NULL) { + work->func(work->arg); + tpool_work_destroy(work); + } + // printf("pthreads %p waits\n", arg); + + pthread_mutex_lock(&tm->work_mutex); + tm->working_cnt--; + if (tm->working_cnt == 0 && tm->work_first == NULL) + pthread_cond_signal(&tm->working_cond); + pthread_mutex_unlock(&tm->work_mutex); + } + + // printf("pthreads %p stops\n", arg); + tm->thread_cnt--; + pthread_mutex_unlock(&tm->work_mutex); + pthread_cond_signal(&tm->working_cond); + return NULL; +} + +static tpool_t *tpool_create(size_t num) +{ + tpool_t *tm; + pthread_t thread; + size_t i; + + if (num == 0) + num = 2; + + tm = calloc(1, sizeof(*tm)); + tm->thread_cnt = num; + + pthread_mutex_init(&tm->work_mutex, NULL); + pthread_cond_init(&tm->work_cond, NULL); + pthread_cond_init(&tm->working_cond, NULL); + + tm->work_first = NULL; + tm->work_last = NULL; + + for (i=0; iwork_mutex); + while (tm->working_cnt != 0 || tm->work_first != NULL) { + pthread_cond_wait(&tm->working_cond, &tm->work_mutex); + } + pthread_mutex_unlock(&tm->work_mutex); +} + +static void tpool_destroy(tpool_t *tm) +{ + tpool_work_t *work; + tpool_work_t *work2; + + if (tm == NULL) + return; + + pthread_mutex_lock(&tm->work_mutex); + work = tm->work_first; + while (work != NULL) { + work2 = work->next; + tpool_work_destroy(work); + work = work2; + } + tm->stop = true; + pthread_mutex_unlock(&tm->work_mutex); + pthread_cond_broadcast(&tm->work_cond); + + tpool_wait(tm); + + pthread_mutex_lock(&tm->work_mutex); + while (tm->thread_cnt > 0) + pthread_cond_wait(&tm->working_cond, &tm->work_mutex); + pthread_mutex_unlock(&tm->work_mutex); + + pthread_mutex_destroy(&tm->work_mutex); + pthread_cond_destroy(&tm->work_cond); + pthread_cond_destroy(&tm->working_cond); + + free(tm); +} + +static bool tpool_add_work(tpool_t *tm, thread_func_t func, void *arg) +{ + tpool_work_t *work; + + if (tm == NULL) + return false; + + work = tpool_work_create(func, arg); + if (work == NULL) + return false; + + pthread_mutex_lock(&tm->work_mutex); + if (tm->work_first == NULL) { + tm->work_first = work; + tm->work_last = tm->work_first; + } else { + tm->work_last->next = work; + tm->work_last = work; + } + + pthread_cond_broadcast(&tm->work_cond); + pthread_mutex_unlock(&tm->work_mutex); + + return true; +} diff --git a/src/ggml.c b/src/ggml.c index c56a3d0e0..cf5f18416 100644 --- a/src/ggml.c +++ b/src/ggml.c @@ -69,30 +69,13 @@ static LONG atomic_fetch_sub(atomic_int * ptr, LONG dec) { return atomic_fetch_add(ptr, -(dec)); } -typedef HANDLE pthread_t; - -typedef DWORD thread_ret_t; -static int pthread_create(pthread_t * out, void * unused, thread_ret_t(*func)(void *), void * arg) { - (void) unused; - HANDLE handle = CreateThread(NULL, 0, (LPTHREAD_START_ROUTINE) func, arg, 0, NULL); - if (handle == NULL) - { - return EAGAIN; - } - - *out = handle; - return 0; -} - -static int pthread_join(pthread_t thread, void * unused) { - (void) unused; - return (int) WaitForSingleObject(thread, INFINITE); -} - static int sched_yield (void) { Sleep (0); return 0; } + +#include "pthreads.h" + #else #include #include @@ -119,7 +102,7 @@ typedef void * thread_ret_t; #endif /*#define GGML_PERF*/ -#define GGML_DEBUG 0 +#define GGML_DEBUG 10 #define GGML_GELU_FP16 #define GGML_GELU_QUICK_FP16 #define GGML_SILU_FP16 @@ -250,6 +233,7 @@ inline static void* ggml_aligned_malloc(size_t size) { #include "ggml-opencl.h" #endif #elif defined(GGML_USE_OPENBLAS) +#define GGML_BLAS_USE_MKL #if defined(GGML_BLAS_USE_MKL) #include #else @@ -3948,6 +3932,8 @@ struct ggml_context { struct ggml_scratch scratch; struct ggml_scratch scratch_save; + + tpool_t * tpool; }; struct ggml_context_container { @@ -4389,6 +4375,7 @@ struct ggml_context * ggml_init(struct ggml_init_params params) { /*.objects_end =*/ NULL, /*.scratch =*/ { 0, 0, NULL, }, /*.scratch_save =*/ { 0, 0, NULL, }, + /*.tpool =*/ tpool_create(params.n_threads - 1), }; GGML_ASSERT(ctx->mem_buffer != NULL); @@ -4410,6 +4397,7 @@ void ggml_free(struct ggml_context * ctx) { for (int i = 0; i < GGML_MAX_CONTEXTS; i++) { if (&g_state.contexts[i].context == ctx) { + tpool_destroy(g_state.contexts[i].context.tpool); g_state.contexts[i].used = false; GGML_PRINT_DEBUG("%s: context %d has been freed. memory used = %zu\n", @@ -16259,9 +16247,7 @@ struct ggml_compute_state_shared { const int n_threads; - // synchronization primitives - atomic_int n_active; // num active threads - atomic_int node_n; // active graph node + int node_n; // active graph node bool (*abort_callback)(void * data); // abort ggml_graph_compute when true void * abort_callback_data; @@ -16282,116 +16268,26 @@ static void ggml_graph_compute_perf_stats_node(struct ggml_tensor * node, const node->perf_time_us += time_us_cur; } -static thread_ret_t ggml_graph_compute_thread(void * data) { +static void ggml_graph_compute_thread(void * data) { struct ggml_compute_state * state = (struct ggml_compute_state *) data; const struct ggml_cgraph * cgraph = state->shared->cgraph; const struct ggml_cplan * cplan = state->shared->cplan; - const int * n_tasks_arr = cplan->n_tasks; - const int n_threads = state->shared->n_threads; + int n_threads = state->shared->n_threads; + int node_n = state->shared->node_n; set_numa_thread_affinity(state->ith, n_threads); - int node_n = -1; - - while (true) { - if (cplan->abort_callback && cplan->abort_callback(cplan->abort_callback_data)) { - state->shared->node_n += 1; - return (thread_ret_t) GGML_EXIT_ABORTED; - } - if (atomic_fetch_sub(&state->shared->n_active, 1) == 1) { - // all other threads are finished and spinning - // do finalize and init here so we don't have synchronize again - struct ggml_compute_params params = { - /*.type =*/ GGML_TASK_FINALIZE, - /*.ith =*/ 0, - /*.nth =*/ 0, - /*.wsize =*/ cplan->work_size, - /*.wdata =*/ cplan->work_data, - }; - - if (node_n != -1) { - /* FINALIZE */ - struct ggml_tensor * node = state->shared->cgraph->nodes[node_n]; - if (GGML_OP_HAS_FINALIZE[node->op]) { - params.nth = n_tasks_arr[node_n]; - ggml_compute_forward(¶ms, node); - } - ggml_graph_compute_perf_stats_node(node, state->shared); - } - - // distribute new work or execute it direct if 1T - while (++node_n < cgraph->n_nodes) { - GGML_PRINT_DEBUG_5("%s: %d/%d\n", __func__, node_n, cgraph->n_nodes); - - struct ggml_tensor * node = cgraph->nodes[node_n]; - const int n_tasks = n_tasks_arr[node_n]; - - state->shared->perf_node_start_cycles = ggml_perf_cycles(); - state->shared->perf_node_start_time_us = ggml_perf_time_us(); - - params.nth = n_tasks; - - /* INIT */ - if (GGML_OP_HAS_INIT[node->op]) { - params.type = GGML_TASK_INIT; - ggml_compute_forward(¶ms, node); - } - - if (n_tasks == 1) { - // TODO: maybe push node_n to the atomic but if other threads see n_tasks is 1, - // they do something more efficient than spinning (?) - params.type = GGML_TASK_COMPUTE; - ggml_compute_forward(¶ms, node); - - if (GGML_OP_HAS_FINALIZE[node->op]) { - params.type = GGML_TASK_FINALIZE; - ggml_compute_forward(¶ms, node); - } - - ggml_graph_compute_perf_stats_node(node, state->shared); - } else { - break; - } - - if (cplan->abort_callback && cplan->abort_callback(cplan->abort_callback_data)) { - break; - } - } - - atomic_store(&state->shared->n_active, n_threads); - atomic_store(&state->shared->node_n, node_n); - } else { - // wait for other threads to finish - const int last = node_n; - do { - //sched_yield(); - node_n = atomic_load(&state->shared->node_n); - } while (node_n == last); - } - - // check if we should stop - if (node_n >= cgraph->n_nodes) break; - - /* COMPUTE */ - struct ggml_tensor * node = cgraph->nodes[node_n]; - const int n_tasks = n_tasks_arr[node_n]; - - struct ggml_compute_params params = { - /*.type =*/ GGML_TASK_COMPUTE, - /*.ith =*/ state->ith, - /*.nth =*/ n_tasks, - /*.wsize =*/ cplan->work_size, - /*.wdata =*/ cplan->work_data, - }; - - if (state->ith < n_tasks) { - ggml_compute_forward(¶ms, node); - } - } + struct ggml_compute_params params = { + /*.type =*/ GGML_TASK_COMPUTE, + /*.ith =*/ state->ith, + /*.nth =*/ cplan->n_tasks[node_n], + /*.wsize =*/ cplan->work_size, + /*.wdata =*/ cplan->work_data, + }; - return GGML_EXIT_SUCCESS; + ggml_compute_forward(¶ms, cgraph->nodes[node_n]); } struct ggml_cplan ggml_graph_plan(struct ggml_cgraph * cgraph, int n_threads) { @@ -16737,7 +16633,7 @@ struct ggml_cplan ggml_graph_plan(struct ggml_cgraph * cgraph, int n_threads) { return cplan; } -int ggml_graph_compute(struct ggml_cgraph * cgraph, struct ggml_cplan * cplan) { +int ggml_graph_compute(struct ggml_cgraph * cgraph, struct ggml_cplan * cplan, void * tpool) { { GGML_ASSERT(cplan); GGML_ASSERT(cplan->n_threads > 0); @@ -16756,59 +16652,80 @@ int ggml_graph_compute(struct ggml_cgraph * cgraph, struct ggml_cplan * cplan) { const int n_threads = cplan->n_threads; struct ggml_compute_state_shared state_shared = { - /*.cgraph =*/ cgraph, - /*.cgraph_plan =*/ cplan, - /*.perf_node_start_cycles =*/ 0, - /*.perf_node_start_time_us =*/ 0, - /*.n_threads =*/ n_threads, - /*.n_active =*/ n_threads, - /*.node_n =*/ -1, - /*.abort_callback =*/ NULL, - /*.abort_callback_data =*/ NULL, + .cgraph = cgraph, + .cplan = cplan, + .perf_node_start_cycles = 0, + .perf_node_start_time_us = 0, + .n_threads = n_threads, + .node_n = -1, + .abort_callback = NULL, + .abort_callback_data = NULL, }; struct ggml_compute_state * workers = alloca(sizeof(struct ggml_compute_state)*n_threads); - // create thread pool - if (n_threads > 1) { - for (int j = 1; j < n_threads; ++j) { - workers[j] = (struct ggml_compute_state) { - .thrd = 0, - .ith = j, - .shared = &state_shared, - }; + const int * n_tasks_arr = cplan->n_tasks; - const int rc = ggml_thread_create(&workers[j].thrd, NULL, ggml_graph_compute_thread, &workers[j]); - GGML_ASSERT(rc == 0); - } + // create thread pool arguments + for (int j = 1; j < n_threads; ++j) { + workers[j] = (struct ggml_compute_state) { + .thrd = 0, + .ith = j, + .shared = &state_shared, + }; } workers[0].ith = 0; workers[0].shared = &state_shared; - const int64_t perf_start_cycles = ggml_perf_cycles(); - const int64_t perf_start_time_us = ggml_perf_time_us(); + int compute_status = GGML_EXIT_SUCCESS; + for (int node_n = 0; node_n < cgraph->n_nodes; ++ node_n) { + if (cplan->abort_callback && cplan->abort_callback(cplan->abort_callback_data)) { + compute_status = GGML_EXIT_ABORTED; + break; + } + + struct ggml_tensor* node = cgraph->nodes[node_n]; + + state_shared.perf_node_start_cycles = ggml_perf_cycles(); + state_shared.perf_node_start_time_us = ggml_perf_time_us(); + + struct ggml_compute_params params = { + /*.type =*/ -1, + /*.ith =*/ 0, + /*.nth =*/ n_tasks_arr[node_n], + /*.wsize =*/ cplan->work_size, + /*.wdata =*/ cplan->work_data, + }; - // this is a work thread too - int compute_status = (size_t) ggml_graph_compute_thread(&workers[0]); + if (GGML_OP_HAS_INIT[node->op]) { + params.type = GGML_TASK_INIT; + ggml_compute_forward(¶ms, node); + } - // don't leave affinity set on the main thread - clear_numa_thread_affinity(); + if (n_tasks_arr[node_n] > 1) { + state_shared.node_n = node_n; + for (int j = 1; j < n_tasks_arr[node_n]; ++j) + tpool_add_work(tpool, ggml_graph_compute_thread, &workers[j]); + } - // join or kill thread pool - if (n_threads > 1) { - for (int j = 1; j < n_threads; j++) { - const int rc = ggml_thread_join(workers[j].thrd, NULL); - GGML_ASSERT(rc == 0); + params.type = GGML_TASK_COMPUTE; + ggml_compute_forward(¶ms, node); + + if (n_tasks_arr[node_n] > 1) { + tpool_wait(tpool); } + + if (GGML_OP_HAS_FINALIZE[node->op]) { + params.type = GGML_TASK_FINALIZE; + ggml_compute_forward(¶ms, node); + } + + ggml_graph_compute_perf_stats_node(node, &state_shared); } // performance stats (graph) { - int64_t perf_cycles_cur = ggml_perf_cycles() - perf_start_cycles; - int64_t perf_time_us_cur = ggml_perf_time_us() - perf_start_time_us; - - cgraph->perf_runs++; - cgraph->perf_cycles += perf_cycles_cur; - cgraph->perf_time_us += perf_time_us_cur; + int64_t perf_cycles_cur = ggml_perf_cycles() - state_shared.perf_node_start_cycles; + int64_t perf_time_us_cur = ggml_perf_time_us() - state_shared.perf_node_start_time_us; GGML_PRINT_DEBUG("%s: perf (%d) - cpu = %.3f / %.3f ms, wall = %.3f / %.3f ms\n", __func__, cgraph->perf_runs, @@ -16839,7 +16756,7 @@ void ggml_graph_compute_with_ctx(struct ggml_context * ctx, struct ggml_cgraph * cplan.work_data = buf->data; - ggml_graph_compute(cgraph, &cplan); + ggml_graph_compute(cgraph, &cplan, ctx->tpool); } struct ggml_tensor * ggml_graph_get_tensor(struct ggml_cgraph * cgraph, const char * name) { @@ -17105,6 +17022,7 @@ struct ggml_cgraph ggml_graph_import(const char * fname, struct ggml_context ** .mem_size = fsize + overhead, .mem_buffer = NULL, .no_alloc = false, + .n_threads = 1, }; *ctx_data = ggml_init(params); @@ -17163,6 +17081,7 @@ struct ggml_cgraph ggml_graph_import(const char * fname, struct ggml_context ** .mem_size = size_eval + overhead, .mem_buffer = NULL, .no_alloc = true, + .n_threads = 1, }; *ctx_eval = ggml_init(params); @@ -18263,6 +18182,7 @@ enum ggml_opt_result ggml_opt( .mem_size = 16*1024*1024, .mem_buffer = NULL, .no_alloc = false, + .n_threads = 1, }; ctx = ggml_init(params_ctx); diff --git a/tests/test-blas0.c b/tests/test-blas0.c index 0977d3ef8..5bf5d49f1 100644 --- a/tests/test-blas0.c +++ b/tests/test-blas0.c @@ -69,6 +69,7 @@ int main(int argc, const char ** argv) { .mem_size = 2048ul*1024*1024, .mem_buffer = NULL, .no_alloc = false, + .n_threads = GGML_DEFAULT_N_THREADS, }; struct ggml_context * ctx0 = ggml_init(params); diff --git a/tests/test-grad0.c b/tests/test-grad0.c index 01467bc18..2186f5cb9 100644 --- a/tests/test-grad0.c +++ b/tests/test-grad0.c @@ -345,6 +345,7 @@ int main(int argc, const char ** argv) { .mem_size = 128*1024*1024, .mem_buffer = NULL, .no_alloc = false, + .n_threads = GGML_DEFAULT_N_THREADS, }; int64_t ne[4]; diff --git a/tests/test-mul-mat0.c b/tests/test-mul-mat0.c index 1bd6e140b..85d510da4 100644 --- a/tests/test-mul-mat0.c +++ b/tests/test-mul-mat0.c @@ -235,6 +235,7 @@ int main(int argc, const char ** argv) { .mem_size = 128*1024*1024, .mem_buffer = NULL, .no_alloc = false, + .n_threads = GGML_DEFAULT_N_THREADS, }; int64_t ne[4]; diff --git a/tests/test-mul-mat2.c b/tests/test-mul-mat2.c index ad30492b4..264434361 100644 --- a/tests/test-mul-mat2.c +++ b/tests/test-mul-mat2.c @@ -2375,7 +2375,7 @@ int main(int argc, const char ** argv) { // needed to initialize f16 tables { - struct ggml_init_params params = { 0, NULL, false }; + struct ggml_init_params params = { 0, NULL, false, GGML_DEFAULT_N_THREADS }; struct ggml_context * ctx = ggml_init(params); ggml_free(ctx); } diff --git a/tests/test-opt.c b/tests/test-opt.c index 5531814c4..b24f74647 100644 --- a/tests/test-opt.c +++ b/tests/test-opt.c @@ -122,6 +122,7 @@ int main(void) { .mem_size = 1024*1024*1024, .mem_buffer = NULL, .no_alloc = false, + .n_threads = 1, }; struct ggml_context * ctx = ggml_init(params); diff --git a/tests/test-pool.c b/tests/test-pool.c index cdf00f4ec..1c5dd3e67 100644 --- a/tests/test-pool.c +++ b/tests/test-pool.c @@ -7,6 +7,7 @@ struct ggml_context* make_ctx(void) { struct ggml_init_params params = { .mem_size = 2 * 1024 * 1024, + .n_threads = GGML_DEFAULT_N_THREADS, }; return ggml_init(params); diff --git a/tests/test-quantize-fns.cpp b/tests/test-quantize-fns.cpp index 8d3c162d2..23de47dbd 100644 --- a/tests/test-quantize-fns.cpp +++ b/tests/test-quantize-fns.cpp @@ -117,6 +117,7 @@ int main(int argc, char * argv[]) { /* .mem_size = */ 1*1024, /* .mem_buffer = */ NULL, /* .no_alloc = */ true, + /* .n_threads = */ GGML_DEFAULT_N_THREADS, }; struct ggml_context * ctx = ggml_init(ggml_params); diff --git a/tests/test-quantize-perf.cpp b/tests/test-quantize-perf.cpp index 0bb9537f6..bfea4d926 100644 --- a/tests/test-quantize-perf.cpp +++ b/tests/test-quantize-perf.cpp @@ -266,6 +266,7 @@ int main(int argc, char * argv[]) { /* .mem_size = */ 1*1024, /* .mem_buffer = */ NULL, /* .no_alloc = */ true, + /* .n_threads = */ GGML_DEFAULT_N_THREADS, }; struct ggml_context * ctx = ggml_init(ggml_params); diff --git a/tests/test0.c b/tests/test0.c index 7fba63e77..74b75f00e 100644 --- a/tests/test0.c +++ b/tests/test0.c @@ -8,6 +8,7 @@ int main(int argc, const char ** argv) { .mem_size = 128*1024*1024, .mem_buffer = NULL, .no_alloc = false, + .n_threads = GGML_DEFAULT_N_THREADS, }; struct ggml_context * ctx0 = ggml_init(params); diff --git a/tests/test1.c b/tests/test1.c index c313bf8e1..ae67a1f41 100644 --- a/tests/test1.c +++ b/tests/test1.c @@ -10,6 +10,7 @@ int main(int argc, const char ** argv) { .mem_size = 128*1024*1024, .mem_buffer = NULL, .no_alloc = false, + .n_threads = GGML_DEFAULT_N_THREADS, }; struct ggml_context * ctx0 = ggml_init(params); diff --git a/tests/test2.c b/tests/test2.c index 839e3e6de..763453fce 100644 --- a/tests/test2.c +++ b/tests/test2.c @@ -18,6 +18,7 @@ int main(int argc, const char ** argv) { .mem_size = 128*1024*1024, .mem_buffer = NULL, .no_alloc = false, + .n_threads = GGML_DEFAULT_N_THREADS, }; //struct ggml_opt_params opt_params = ggml_opt_default_params(GGML_OPT_ADAM); diff --git a/tests/test3.c b/tests/test3.c index b92d6233d..f18c8984a 100644 --- a/tests/test3.c +++ b/tests/test3.c @@ -13,6 +13,7 @@ int main(int argc, const char ** argv) { .mem_size = 1024*1024*1024, .mem_buffer = NULL, .no_alloc = false, + .n_threads = GGML_DEFAULT_N_THREADS, }; //struct ggml_opt_params opt_params = ggml_opt_default_params(GGML_OPT_ADAM); From 6198dca2445f8a305798d62c3b1be644deb08f53 Mon Sep 17 00:00:00 2001 From: goerch Date: Thu, 20 Jul 2023 13:27:58 +0200 Subject: [PATCH 2/9] Some cleanup --- examples/dolly-v2/main.cpp | 2 +- examples/dolly-v2/quantize.cpp | 2 +- examples/gpt-2/main.cpp | 2 +- examples/gpt-2/quantize.cpp | 2 +- examples/gpt-j/main.cpp | 2 +- examples/gpt-j/quantize.cpp | 2 +- examples/gpt-neox/main.cpp | 2 +- examples/gpt-neox/quantize.cpp | 2 +- examples/mnist/main-mtl.cpp | 2 +- examples/mnist/main.cpp | 2 +- examples/mpt/main.cpp | 2 +- examples/mpt/quantize.cpp | 2 +- examples/replit/main.cpp | 2 +- examples/replit/quantize.cpp | 2 +- examples/starcoder/main.cpp | 2 +- examples/starcoder/quantize.cpp | 2 +- examples/starcoder/starcoder-mmap.cpp | 4 ++-- examples/whisper/quantize.cpp | 2 +- examples/whisper/whisper.cpp | 6 +++--- src/ggml.c | 5 ++--- tests/test-opt.c | 2 +- 21 files changed, 25 insertions(+), 26 deletions(-) diff --git a/examples/dolly-v2/main.cpp b/examples/dolly-v2/main.cpp index 2c0eb6a00..b3a7d4477 100644 --- a/examples/dolly-v2/main.cpp +++ b/examples/dolly-v2/main.cpp @@ -231,7 +231,7 @@ bool dollyv2_model_load(const std::string & fname, dollyv2_model & model, gpt_vo /*.mem_size =*/ ctx_size, /*.mem_buffer =*/ NULL, /*.no_alloc =*/ false, - /*.n_threads =*/ GGML_DEFAULT_N_THREADS, + /*.n_threads =*/ 1, }; model.ctx = ggml_init(params); diff --git a/examples/dolly-v2/quantize.cpp b/examples/dolly-v2/quantize.cpp index f7bfbf531..56c423d58 100644 --- a/examples/dolly-v2/quantize.cpp +++ b/examples/dolly-v2/quantize.cpp @@ -139,7 +139,7 @@ int main(int argc, char ** argv) { // needed to initialize f16 tables { - struct ggml_init_params params = { 0, NULL, false, GGML_DEFAULT_N_THREADS }; + struct ggml_init_params params = { 0, NULL, false, 1 }; struct ggml_context * ctx = ggml_init(params); ggml_free(ctx); } diff --git a/examples/gpt-2/main.cpp b/examples/gpt-2/main.cpp index 08dfb59fe..61fbc6484 100644 --- a/examples/gpt-2/main.cpp +++ b/examples/gpt-2/main.cpp @@ -203,7 +203,7 @@ bool gpt2_model_load(const std::string & fname, gpt2_model & model, gpt_vocab & /*.mem_size =*/ ctx_size, /*.mem_buffer =*/ NULL, /*.no_alloc =*/ false, - /*.n_threads =*/ GGML_DEFAULT_N_THREADS, + /*.n_threads =*/ 1, }; model.ctx = ggml_init(params); diff --git a/examples/gpt-2/quantize.cpp b/examples/gpt-2/quantize.cpp index 27ea72f0b..5c4a2359e 100644 --- a/examples/gpt-2/quantize.cpp +++ b/examples/gpt-2/quantize.cpp @@ -145,7 +145,7 @@ int main(int argc, char ** argv) { // needed to initialize f16 tables { - struct ggml_init_params params = { 0, NULL, false, GGML_DEFAULT_N_THREADS }; + struct ggml_init_params params = { 0, NULL, false, 1 }; struct ggml_context * ctx = ggml_init(params); ggml_free(ctx); } diff --git a/examples/gpt-j/main.cpp b/examples/gpt-j/main.cpp index 395a929b7..76ced4a66 100644 --- a/examples/gpt-j/main.cpp +++ b/examples/gpt-j/main.cpp @@ -202,7 +202,7 @@ bool gptj_model_load(const std::string & fname, gptj_model & model, gpt_vocab & /*.mem_size =*/ ctx_size, /*.mem_buffer =*/ NULL, /*.no_alloc =*/ false, - /*.n_threads =*/ GGML_DEFAULT_N_THREADS, + /*.n_threads =*/ 1, }; model.ctx = ggml_init(params); diff --git a/examples/gpt-j/quantize.cpp b/examples/gpt-j/quantize.cpp index 62550a86c..0dd5c0367 100644 --- a/examples/gpt-j/quantize.cpp +++ b/examples/gpt-j/quantize.cpp @@ -143,7 +143,7 @@ int main(int argc, char ** argv) { // needed to initialize f16 tables { - struct ggml_init_params params = { 0, NULL, false, GGML_DEFAULT_N_THREADS }; + struct ggml_init_params params = { 0, NULL, false, 1 }; struct ggml_context * ctx = ggml_init(params); ggml_free(ctx); } diff --git a/examples/gpt-neox/main.cpp b/examples/gpt-neox/main.cpp index ac2e3b90e..84755da6f 100644 --- a/examples/gpt-neox/main.cpp +++ b/examples/gpt-neox/main.cpp @@ -205,7 +205,7 @@ bool gpt_neox_model_load(const std::string & fname, gpt_neox_model & model, gpt_ /*.mem_size =*/ ctx_size, /*.mem_buffer =*/ NULL, /*.no_alloc =*/ false, - /*.n_threads =*/ GGML_DEFAULT_N_THREADS, + /*.n_threads =*/ 1, }; model.ctx = ggml_init(params); diff --git a/examples/gpt-neox/quantize.cpp b/examples/gpt-neox/quantize.cpp index b0cc09fbf..1a07be2d2 100644 --- a/examples/gpt-neox/quantize.cpp +++ b/examples/gpt-neox/quantize.cpp @@ -139,7 +139,7 @@ int main(int argc, char ** argv) { // needed to initialize f16 tables { - struct ggml_init_params params = { 0, NULL, false, GGML_DEFAULT_N_THREADS }; + struct ggml_init_params params = { 0, NULL, false, 1 }; struct ggml_context * ctx = ggml_init(params); ggml_free(ctx); } diff --git a/examples/mnist/main-mtl.cpp b/examples/mnist/main-mtl.cpp index 1047a3814..41b112b5f 100644 --- a/examples/mnist/main-mtl.cpp +++ b/examples/mnist/main-mtl.cpp @@ -45,7 +45,7 @@ int mnist_eval( /*.mem_size =*/ buf_size, /*.mem_buffer =*/ buf, /*.no_alloc =*/ false, - /*.n_threads =*/ GGML_DEFAULT_N_THREADS, + /*.n_threads =*/ 1, }; struct ggml_context * ctx_work = ggml_init(params); diff --git a/examples/mnist/main.cpp b/examples/mnist/main.cpp index afef54ad6..6eb8419c8 100644 --- a/examples/mnist/main.cpp +++ b/examples/mnist/main.cpp @@ -80,7 +80,7 @@ bool mnist_model_load(const std::string & fname, mnist_model & model) { /*.mem_size =*/ ctx_size + 1024*1024, /*.mem_buffer =*/ NULL, /*.no_alloc =*/ false, - /*.n_threads =*/ GGML_DEFAULT_N_THREADS, + /*.n_threads =*/ 1, }; model.ctx = ggml_init(params); diff --git a/examples/mpt/main.cpp b/examples/mpt/main.cpp index 1ab737a02..6294185fe 100644 --- a/examples/mpt/main.cpp +++ b/examples/mpt/main.cpp @@ -298,7 +298,7 @@ bool mpt_model_load(const std::string & fname, mpt_model & model, gpt_vocab & vo /*.mem_size =*/ ctx_size, /*.mem_buffer =*/ NULL, /*.no_alloc =*/ false, - /*.n_threads =*/ GGML_DEFAULT_N_THREADS, + /*.n_threads =*/ 1, }; model.ctx = ggml_init(params); diff --git a/examples/mpt/quantize.cpp b/examples/mpt/quantize.cpp index f4c900a2f..45c3eaf66 100644 --- a/examples/mpt/quantize.cpp +++ b/examples/mpt/quantize.cpp @@ -144,7 +144,7 @@ int main(int argc, char ** argv) { // needed to initialize f16 tables { - struct ggml_init_params params = {0, NULL, false, GGML_DEFAULT_N_THREADS }; + struct ggml_init_params params = {0, NULL, false, 1 }; struct ggml_context * ctx = ggml_init(params); ggml_free(ctx); } diff --git a/examples/replit/main.cpp b/examples/replit/main.cpp index a1e3b383e..59929dd8c 100644 --- a/examples/replit/main.cpp +++ b/examples/replit/main.cpp @@ -280,7 +280,7 @@ bool replit_model_load(const std::string & fname, replit_model & model, replit_t /*.mem_size =*/ ctx_size, /*.mem_buffer =*/ NULL, /*.no_alloc =*/ false, - /*.n_threads =*/ GGML_DEFAULT_N_THREADS, + /*.n_threads =*/ 1, }; model.ctx = ggml_init(params); diff --git a/examples/replit/quantize.cpp b/examples/replit/quantize.cpp index 24bbf86ed..329e5b56a 100644 --- a/examples/replit/quantize.cpp +++ b/examples/replit/quantize.cpp @@ -140,7 +140,7 @@ int main(int argc, char ** argv) { // needed to initialize f16 tables { - struct ggml_init_params params = {0, NULL, false, GGML_DEFAULT_N_THREADS}; + struct ggml_init_params params = {0, NULL, false, 1}; struct ggml_context * ctx = ggml_init(params); ggml_free(ctx); } diff --git a/examples/starcoder/main.cpp b/examples/starcoder/main.cpp index 5cfd9b8d0..5686e1196 100644 --- a/examples/starcoder/main.cpp +++ b/examples/starcoder/main.cpp @@ -226,7 +226,7 @@ bool starcoder_model_load(const std::string & fname, starcoder_model & model, gp /*.mem_size =*/ ctx_size, /*.mem_buffer =*/ NULL, /*.no_alloc =*/ false, - /*.n_threads =*/ GGML_DEFAULT_N_THREADS, + /*.n_threads =*/ 1, }; model.ctx = ggml_init(params); diff --git a/examples/starcoder/quantize.cpp b/examples/starcoder/quantize.cpp index edafacb3f..b95e24939 100644 --- a/examples/starcoder/quantize.cpp +++ b/examples/starcoder/quantize.cpp @@ -145,7 +145,7 @@ int main(int argc, char ** argv) { // needed to initialize f16 tables { - struct ggml_init_params params = { 0, NULL, false, GGML_DEFAULT_N_THREADS }; + struct ggml_init_params params = { 0, NULL, false, 1 }; struct ggml_context * ctx = ggml_init(params); ggml_free(ctx); } diff --git a/examples/starcoder/starcoder-mmap.cpp b/examples/starcoder/starcoder-mmap.cpp index b240abcae..8c606833d 100644 --- a/examples/starcoder/starcoder-mmap.cpp +++ b/examples/starcoder/starcoder-mmap.cpp @@ -352,7 +352,7 @@ bool starcoder_model_load(const std::string & fname, starcoder_model & model, gp /*.mem_size =*/ ctx_size, /*.mem_buffer =*/ NULL, /*.no_alloc =*/ true, - /*.n_threads =*/ GGML_DEFAULT_N_THREADS, + /*.n_threads =*/ 1, }; model.ctx = ggml_init(params); @@ -451,7 +451,7 @@ bool starcoder_model_load(const std::string & fname, starcoder_model & model, gp c_params.mem_size = model.cache.buf.size; c_params.mem_buffer = model.cache.buf.addr; c_params.no_alloc = false; - c_params.n_threads = GGML_DEFAULT_N_THREADS; + c_params.n_threads = 1; model.cache.ctx = ggml_init(c_params); diff --git a/examples/whisper/quantize.cpp b/examples/whisper/quantize.cpp index 82030d386..d4258dbb3 100644 --- a/examples/whisper/quantize.cpp +++ b/examples/whisper/quantize.cpp @@ -184,7 +184,7 @@ int main(int argc, char ** argv) { // needed to initialize f16 tables { - struct ggml_init_params params = { 0, NULL, false, GGML_DEFAULT_N_THREADS }; + struct ggml_init_params params = { 0, NULL, false, 1 }; struct ggml_context * ctx = ggml_init(params); ggml_free(ctx); } diff --git a/examples/whisper/whisper.cpp b/examples/whisper/whisper.cpp index de1f47bf5..ca546f638 100644 --- a/examples/whisper/whisper.cpp +++ b/examples/whisper/whisper.cpp @@ -741,7 +741,7 @@ static bool kv_cache_init( /*.mem_size =*/ cache.buf.size(), /*.mem_buffer =*/ cache.buf.data(), /*.no_alloc =*/ false, - /*.threads =*/ GGML_DEFAULT_N_THREADS, + /*.threads =*/ 1, }; cache.ctx = ggml_init(params); @@ -778,7 +778,7 @@ static bool kv_cache_reinit(struct whisper_kv_cache & cache) { /*.mem_size =*/ cache.buf.size(), /*.mem_buffer =*/ cache.buf.data(), /*.no_alloc =*/ false, - /*.threads =*/ GGML_DEFAULT_N_THREADS, + /*.threads =*/ 1, }; cache.ctx = ggml_init(params); @@ -1138,7 +1138,7 @@ static bool whisper_model_load(struct whisper_model_loader * loader, whisper_con /*.mem_size =*/ wctx.model.buf->size(), /*.mem_buffer =*/ wctx.model.buf->data(), /*.no_alloc =*/ false, - /*.threads =*/ GGML_DEFAULT_N_THREADS, + /*.threads =*/ 1, }; model.ctx = ggml_init(params); diff --git a/src/ggml.c b/src/ggml.c index cf5f18416..2974afca4 100644 --- a/src/ggml.c +++ b/src/ggml.c @@ -102,7 +102,7 @@ typedef void * thread_ret_t; #endif /*#define GGML_PERF*/ -#define GGML_DEBUG 10 +#define GGML_DEBUG 0 #define GGML_GELU_FP16 #define GGML_GELU_QUICK_FP16 #define GGML_SILU_FP16 @@ -233,7 +233,6 @@ inline static void* ggml_aligned_malloc(size_t size) { #include "ggml-opencl.h" #endif #elif defined(GGML_USE_OPENBLAS) -#define GGML_BLAS_USE_MKL #if defined(GGML_BLAS_USE_MKL) #include #else @@ -18182,7 +18181,7 @@ enum ggml_opt_result ggml_opt( .mem_size = 16*1024*1024, .mem_buffer = NULL, .no_alloc = false, - .n_threads = 1, + .n_threads = GGML_DEFAULT_N_THREADS, }; ctx = ggml_init(params_ctx); diff --git a/tests/test-opt.c b/tests/test-opt.c index b24f74647..eae8273fc 100644 --- a/tests/test-opt.c +++ b/tests/test-opt.c @@ -122,7 +122,7 @@ int main(void) { .mem_size = 1024*1024*1024, .mem_buffer = NULL, .no_alloc = false, - .n_threads = 1, + .n_threads = GGML_DEFAULT_N_THREADS, }; struct ggml_context * ctx = ggml_init(params); From 2b8dcb781f81bb53271329a8c754335af7ba0882 Mon Sep 17 00:00:00 2001 From: goerch Date: Thu, 20 Jul 2023 14:00:53 +0200 Subject: [PATCH 3/9] Possible fix for non Windows OS --- include/ggml/pthreads.h | 199 +-------------------------------------- include/ggml/tpool.h | 201 ++++++++++++++++++++++++++++++++++++++++ src/ggml.c | 4 +- 3 files changed, 205 insertions(+), 199 deletions(-) create mode 100644 include/ggml/tpool.h diff --git a/include/ggml/pthreads.h b/include/ggml/pthreads.h index c67fa3341..bb0e7dcf3 100644 --- a/include/ggml/pthreads.h +++ b/include/ggml/pthreads.h @@ -1,3 +1,5 @@ +#pragma once + #include #include #include @@ -249,200 +251,3 @@ static unsigned int pcthread_get_num_procs() return sysinfo.dwNumberOfProcessors; } -typedef void (*thread_func_t)(void *arg); - -struct tpool_work { - thread_func_t func; - void *arg; - struct tpool_work *next; -}; -typedef struct tpool_work tpool_work_t; - -static tpool_work_t *tpool_work_create(thread_func_t func, void *arg) -{ - tpool_work_t *work; - - if (func == NULL) - return NULL; - - work = malloc(sizeof(*work)); - work->func = func; - work->arg = arg; - work->next = NULL; - return work; -} - -static void tpool_work_destroy(tpool_work_t *work) -{ - if (work == NULL) - return; - free(work); -} - -struct tpool { - tpool_work_t *work_first; - tpool_work_t *work_last; - pthread_mutex_t work_mutex; - pthread_cond_t work_cond; - pthread_cond_t working_cond; - size_t working_cnt; - size_t thread_cnt; - bool stop; -}; -typedef struct tpool tpool_t; - -static tpool_work_t *tpool_work_get(tpool_t *tm) -{ - tpool_work_t *work; - - if (tm == NULL) - return NULL; - - work = tm->work_first; - if (work == NULL) - return NULL; - - if (work->next == NULL) { - tm->work_first = NULL; - tm->work_last = NULL; - } else { - tm->work_first = work->next; - } - - return work; -} - -static void *tpool_worker(void *arg) -{ - tpool_t *tm = arg; - tpool_work_t *work; - - // printf("pthreads %p starts\n", arg); - while (1) { - pthread_mutex_lock(&tm->work_mutex); - while (tm->work_first == NULL && !tm->stop) - pthread_cond_wait(&tm->work_cond, &tm->work_mutex); - if (tm->stop) - break; - work = tpool_work_get(tm); - tm->working_cnt++; - pthread_mutex_unlock(&tm->work_mutex); - - // printf("pthreads %p works\n", arg); - if (work != NULL) { - work->func(work->arg); - tpool_work_destroy(work); - } - // printf("pthreads %p waits\n", arg); - - pthread_mutex_lock(&tm->work_mutex); - tm->working_cnt--; - if (tm->working_cnt == 0 && tm->work_first == NULL) - pthread_cond_signal(&tm->working_cond); - pthread_mutex_unlock(&tm->work_mutex); - } - - // printf("pthreads %p stops\n", arg); - tm->thread_cnt--; - pthread_mutex_unlock(&tm->work_mutex); - pthread_cond_signal(&tm->working_cond); - return NULL; -} - -static tpool_t *tpool_create(size_t num) -{ - tpool_t *tm; - pthread_t thread; - size_t i; - - if (num == 0) - num = 2; - - tm = calloc(1, sizeof(*tm)); - tm->thread_cnt = num; - - pthread_mutex_init(&tm->work_mutex, NULL); - pthread_cond_init(&tm->work_cond, NULL); - pthread_cond_init(&tm->working_cond, NULL); - - tm->work_first = NULL; - tm->work_last = NULL; - - for (i=0; iwork_mutex); - while (tm->working_cnt != 0 || tm->work_first != NULL) { - pthread_cond_wait(&tm->working_cond, &tm->work_mutex); - } - pthread_mutex_unlock(&tm->work_mutex); -} - -static void tpool_destroy(tpool_t *tm) -{ - tpool_work_t *work; - tpool_work_t *work2; - - if (tm == NULL) - return; - - pthread_mutex_lock(&tm->work_mutex); - work = tm->work_first; - while (work != NULL) { - work2 = work->next; - tpool_work_destroy(work); - work = work2; - } - tm->stop = true; - pthread_mutex_unlock(&tm->work_mutex); - pthread_cond_broadcast(&tm->work_cond); - - tpool_wait(tm); - - pthread_mutex_lock(&tm->work_mutex); - while (tm->thread_cnt > 0) - pthread_cond_wait(&tm->working_cond, &tm->work_mutex); - pthread_mutex_unlock(&tm->work_mutex); - - pthread_mutex_destroy(&tm->work_mutex); - pthread_cond_destroy(&tm->work_cond); - pthread_cond_destroy(&tm->working_cond); - - free(tm); -} - -static bool tpool_add_work(tpool_t *tm, thread_func_t func, void *arg) -{ - tpool_work_t *work; - - if (tm == NULL) - return false; - - work = tpool_work_create(func, arg); - if (work == NULL) - return false; - - pthread_mutex_lock(&tm->work_mutex); - if (tm->work_first == NULL) { - tm->work_first = work; - tm->work_last = tm->work_first; - } else { - tm->work_last->next = work; - tm->work_last = work; - } - - pthread_cond_broadcast(&tm->work_cond); - pthread_mutex_unlock(&tm->work_mutex); - - return true; -} diff --git a/include/ggml/tpool.h b/include/ggml/tpool.h new file mode 100644 index 000000000..a7cdf96db --- /dev/null +++ b/include/ggml/tpool.h @@ -0,0 +1,201 @@ +#pragma once + +#include "pthreads.h" + +typedef void (*thread_func_t)(void *arg); + +struct tpool_work { + thread_func_t func; + void *arg; + struct tpool_work *next; +}; +typedef struct tpool_work tpool_work_t; + +static tpool_work_t *tpool_work_create(thread_func_t func, void *arg) +{ + tpool_work_t *work; + + if (func == NULL) + return NULL; + + work = malloc(sizeof(*work)); + work->func = func; + work->arg = arg; + work->next = NULL; + return work; +} + +static void tpool_work_destroy(tpool_work_t *work) +{ + if (work == NULL) + return; + free(work); +} + +struct tpool { + tpool_work_t *work_first; + tpool_work_t *work_last; + pthread_mutex_t work_mutex; + pthread_cond_t work_cond; + pthread_cond_t working_cond; + size_t working_cnt; + size_t thread_cnt; + bool stop; +}; +typedef struct tpool tpool_t; + +static tpool_work_t *tpool_work_get(tpool_t *tm) +{ + tpool_work_t *work; + + if (tm == NULL) + return NULL; + + work = tm->work_first; + if (work == NULL) + return NULL; + + if (work->next == NULL) { + tm->work_first = NULL; + tm->work_last = NULL; + } else { + tm->work_first = work->next; + } + + return work; +} + +static void *tpool_worker(void *arg) +{ + tpool_t *tm = arg; + tpool_work_t *work; + + // printf("pthreads %p starts\n", arg); + while (1) { + pthread_mutex_lock(&tm->work_mutex); + while (tm->work_first == NULL && !tm->stop) + pthread_cond_wait(&tm->work_cond, &tm->work_mutex); + if (tm->stop) + break; + work = tpool_work_get(tm); + tm->working_cnt++; + pthread_mutex_unlock(&tm->work_mutex); + + // printf("pthreads %p works\n", arg); + if (work != NULL) { + work->func(work->arg); + tpool_work_destroy(work); + } + // printf("pthreads %p waits\n", arg); + + pthread_mutex_lock(&tm->work_mutex); + tm->working_cnt--; + if (tm->working_cnt == 0 && tm->work_first == NULL) + pthread_cond_signal(&tm->working_cond); + pthread_mutex_unlock(&tm->work_mutex); + } + + // printf("pthreads %p stops\n", arg); + tm->thread_cnt--; + pthread_mutex_unlock(&tm->work_mutex); + pthread_cond_signal(&tm->working_cond); + return NULL; +} + +static tpool_t *tpool_create(size_t num) +{ + tpool_t *tm; + pthread_t thread; + size_t i; + + if (num == 0) + num = 2; + + tm = calloc(1, sizeof(*tm)); + tm->thread_cnt = num; + + pthread_mutex_init(&tm->work_mutex, NULL); + pthread_cond_init(&tm->work_cond, NULL); + pthread_cond_init(&tm->working_cond, NULL); + + tm->work_first = NULL; + tm->work_last = NULL; + + for (i=0; iwork_mutex); + while (tm->working_cnt != 0 || tm->work_first != NULL) { + pthread_cond_wait(&tm->working_cond, &tm->work_mutex); + } + pthread_mutex_unlock(&tm->work_mutex); +} + +static void tpool_destroy(tpool_t *tm) +{ + tpool_work_t *work; + tpool_work_t *work2; + + if (tm == NULL) + return; + + pthread_mutex_lock(&tm->work_mutex); + work = tm->work_first; + while (work != NULL) { + work2 = work->next; + tpool_work_destroy(work); + work = work2; + } + tm->stop = true; + pthread_mutex_unlock(&tm->work_mutex); + pthread_cond_broadcast(&tm->work_cond); + + tpool_wait(tm); + + pthread_mutex_lock(&tm->work_mutex); + while (tm->thread_cnt > 0) + pthread_cond_wait(&tm->working_cond, &tm->work_mutex); + pthread_mutex_unlock(&tm->work_mutex); + + pthread_mutex_destroy(&tm->work_mutex); + pthread_cond_destroy(&tm->work_cond); + pthread_cond_destroy(&tm->working_cond); + + free(tm); +} + +static bool tpool_add_work(tpool_t *tm, thread_func_t func, void *arg) +{ + tpool_work_t *work; + + if (tm == NULL) + return false; + + work = tpool_work_create(func, arg); + if (work == NULL) + return false; + + pthread_mutex_lock(&tm->work_mutex); + if (tm->work_first == NULL) { + tm->work_first = work; + tm->work_last = tm->work_first; + } else { + tm->work_last->next = work; + tm->work_last = work; + } + + pthread_cond_broadcast(&tm->work_cond); + pthread_mutex_unlock(&tm->work_mutex); + + return true; +} diff --git a/src/ggml.c b/src/ggml.c index 2974afca4..11faa09ef 100644 --- a/src/ggml.c +++ b/src/ggml.c @@ -75,7 +75,6 @@ static int sched_yield (void) { } #include "pthreads.h" - #else #include #include @@ -85,9 +84,10 @@ typedef void * thread_ret_t; #include #include #include - #endif +#include "tpool.h" + // __FMA__ and __F16C__ are not defined in MSVC, however they are implied with AVX2/AVX512 #if defined(_MSC_VER) && (defined(__AVX2__) || defined(__AVX512F__)) #ifndef __FMA__ From c163f27ad247658a178062c97badb1564a90085a Mon Sep 17 00:00:00 2001 From: goerch Date: Thu, 20 Jul 2023 14:04:49 +0200 Subject: [PATCH 4/9] Another trial --- include/ggml/tpool.h | 5 +++++ src/ggml.c | 3 +++ 2 files changed, 8 insertions(+) diff --git a/include/ggml/tpool.h b/include/ggml/tpool.h index a7cdf96db..7ec0d5420 100644 --- a/include/ggml/tpool.h +++ b/include/ggml/tpool.h @@ -1,6 +1,11 @@ #pragma once +#if defined(_WIN32) +#include #include "pthreads.h" +#else +#include +#endif typedef void (*thread_func_t)(void *arg); diff --git a/src/ggml.c b/src/ggml.c index 11faa09ef..f3960b9e6 100644 --- a/src/ggml.c +++ b/src/ggml.c @@ -75,7 +75,9 @@ static int sched_yield (void) { } #include "pthreads.h" + #else + #include #include @@ -84,6 +86,7 @@ typedef void * thread_ret_t; #include #include #include + #endif #include "tpool.h" From a427e10cf79c540bcdc216cfe39a3d3aa8eff335 Mon Sep 17 00:00:00 2001 From: goerch Date: Thu, 20 Jul 2023 14:07:30 +0200 Subject: [PATCH 5/9] Rename header file --- include/ggml/{pthreads.h => pthreads-win32.h} | 0 include/ggml/tpool.h | 2 +- src/ggml.c | 2 +- 3 files changed, 2 insertions(+), 2 deletions(-) rename include/ggml/{pthreads.h => pthreads-win32.h} (100%) diff --git a/include/ggml/pthreads.h b/include/ggml/pthreads-win32.h similarity index 100% rename from include/ggml/pthreads.h rename to include/ggml/pthreads-win32.h diff --git a/include/ggml/tpool.h b/include/ggml/tpool.h index 7ec0d5420..20c21be73 100644 --- a/include/ggml/tpool.h +++ b/include/ggml/tpool.h @@ -2,7 +2,7 @@ #if defined(_WIN32) #include -#include "pthreads.h" +#include "pthreads-win32.h" #else #include #endif diff --git a/src/ggml.c b/src/ggml.c index f3960b9e6..dce4b5b3a 100644 --- a/src/ggml.c +++ b/src/ggml.c @@ -74,7 +74,7 @@ static int sched_yield (void) { return 0; } -#include "pthreads.h" +#include "pthreads-win32.h" #else From 85756a6e92c89c008c9c4545fb2be8f952cfbacc Mon Sep 17 00:00:00 2001 From: goerch Date: Thu, 20 Jul 2023 14:11:56 +0200 Subject: [PATCH 6/9] Some more renaming required --- include/ggml/{pthreads-win32.h => pthread-win32.h} | 0 include/ggml/tpool.h | 4 ++-- src/ggml.c | 2 +- 3 files changed, 3 insertions(+), 3 deletions(-) rename include/ggml/{pthreads-win32.h => pthread-win32.h} (100%) diff --git a/include/ggml/pthreads-win32.h b/include/ggml/pthread-win32.h similarity index 100% rename from include/ggml/pthreads-win32.h rename to include/ggml/pthread-win32.h diff --git a/include/ggml/tpool.h b/include/ggml/tpool.h index 20c21be73..68a656dc7 100644 --- a/include/ggml/tpool.h +++ b/include/ggml/tpool.h @@ -2,9 +2,9 @@ #if defined(_WIN32) #include -#include "pthreads-win32.h" +#include "pthread-win32.h" #else -#include +#include #endif typedef void (*thread_func_t)(void *arg); diff --git a/src/ggml.c b/src/ggml.c index dce4b5b3a..9a9d56a07 100644 --- a/src/ggml.c +++ b/src/ggml.c @@ -74,7 +74,7 @@ static int sched_yield (void) { return 0; } -#include "pthreads-win32.h" +#include "pthread-win32.h" #else From 4fa00ba7bd1b6a5d40db0310b413fa1280f98b98 Mon Sep 17 00:00:00 2001 From: goerch Date: Thu, 20 Jul 2023 15:26:09 +0200 Subject: [PATCH 7/9] Uniform order for signal and unlock. --- .gitignore | 4 ++++ include/ggml/tpool.h | 4 ++-- 2 files changed, 6 insertions(+), 2 deletions(-) diff --git a/.gitignore b/.gitignore index c7a8f76b0..65996175a 100644 --- a/.gitignore +++ b/.gitignore @@ -29,3 +29,7 @@ zig-out/ zig-cache/ *.dot + +.gitignore/ +build-clang/ +build-icx/ diff --git a/include/ggml/tpool.h b/include/ggml/tpool.h index 68a656dc7..2b95e978a 100644 --- a/include/ggml/tpool.h +++ b/include/ggml/tpool.h @@ -102,8 +102,8 @@ static void *tpool_worker(void *arg) // printf("pthreads %p stops\n", arg); tm->thread_cnt--; - pthread_mutex_unlock(&tm->work_mutex); pthread_cond_signal(&tm->working_cond); + pthread_mutex_unlock(&tm->work_mutex); return NULL; } @@ -162,8 +162,8 @@ static void tpool_destroy(tpool_t *tm) work = work2; } tm->stop = true; - pthread_mutex_unlock(&tm->work_mutex); pthread_cond_broadcast(&tm->work_cond); + pthread_mutex_unlock(&tm->work_mutex); tpool_wait(tm); From 3e852d64dfd9538e6de48617ee10faefba718b82 Mon Sep 17 00:00:00 2001 From: goerch Date: Thu, 20 Jul 2023 16:35:05 +0200 Subject: [PATCH 8/9] Fixing gitignore oops --- .gitignore | 4 ---- 1 file changed, 4 deletions(-) diff --git a/.gitignore b/.gitignore index 65996175a..c7a8f76b0 100644 --- a/.gitignore +++ b/.gitignore @@ -29,7 +29,3 @@ zig-out/ zig-cache/ *.dot - -.gitignore/ -build-clang/ -build-icx/ From ae8f0517f638e5ae0a9fa1901b4fbed5c6f621fd Mon Sep 17 00:00:00 2001 From: goerch Date: Sun, 23 Jul 2023 12:12:49 +0200 Subject: [PATCH 9/9] Reorder unlocking and signalling --- .gitignore | 4 ++++ include/ggml/tpool.h | 15 ++++++--------- 2 files changed, 10 insertions(+), 9 deletions(-) diff --git a/.gitignore b/.gitignore index c7a8f76b0..65996175a 100644 --- a/.gitignore +++ b/.gitignore @@ -29,3 +29,7 @@ zig-out/ zig-cache/ *.dot + +.gitignore/ +build-clang/ +build-icx/ diff --git a/include/ggml/tpool.h b/include/ggml/tpool.h index 2b95e978a..d1afcbb56 100644 --- a/include/ggml/tpool.h +++ b/include/ggml/tpool.h @@ -75,7 +75,6 @@ static void *tpool_worker(void *arg) tpool_t *tm = arg; tpool_work_t *work; - // printf("pthreads %p starts\n", arg); while (1) { pthread_mutex_lock(&tm->work_mutex); while (tm->work_first == NULL && !tm->stop) @@ -86,24 +85,22 @@ static void *tpool_worker(void *arg) tm->working_cnt++; pthread_mutex_unlock(&tm->work_mutex); - // printf("pthreads %p works\n", arg); if (work != NULL) { work->func(work->arg); tpool_work_destroy(work); } - // printf("pthreads %p waits\n", arg); pthread_mutex_lock(&tm->work_mutex); tm->working_cnt--; - if (tm->working_cnt == 0 && tm->work_first == NULL) - pthread_cond_signal(&tm->working_cond); + bool predicate = tm->working_cnt == 0 && tm->work_first == NULL; pthread_mutex_unlock(&tm->work_mutex); + if(predicate) + pthread_cond_signal(&tm->working_cond); } - // printf("pthreads %p stops\n", arg); tm->thread_cnt--; - pthread_cond_signal(&tm->working_cond); pthread_mutex_unlock(&tm->work_mutex); + pthread_cond_signal(&tm->working_cond); return NULL; } @@ -162,8 +159,8 @@ static void tpool_destroy(tpool_t *tm) work = work2; } tm->stop = true; - pthread_cond_broadcast(&tm->work_cond); pthread_mutex_unlock(&tm->work_mutex); + pthread_cond_broadcast(&tm->work_cond); tpool_wait(tm); @@ -199,8 +196,8 @@ static bool tpool_add_work(tpool_t *tm, thread_func_t func, void *arg) tm->work_last = work; } - pthread_cond_broadcast(&tm->work_cond); pthread_mutex_unlock(&tm->work_mutex); + pthread_cond_broadcast(&tm->work_cond); return true; }