revert spin hint + sched_yield

mqy · mqy · commit d485c34573ac · 2023-04-07T04:28:09.000+08:00
diff --git a/ggml.c b/ggml.c
@@ -16,7 +16,6 @@
 #include <stdlib.h>
 #include <string.h>
 #include <stdint.h>
-#include <sched.h>
 #include <inttypes.h>
 #include <stdio.h>
 #include <float.h>
@@ -9243,21 +9242,6 @@ typedef pthread_t ggml_thread_t;
 // To rollback quickly, set `-DDISABLE_GGML_COMPUTE_SPIN_V2` to `CFLAGS` in Makefile.
 // TODO(mqy): cleanup feature flag DISABLE_GGML_COMPUTE_SPIN_V2.
 
-// Spin loop hint for some architectures.
-#ifndef DISABLE_GGML_COMPUTE_SPIN_V2
-
-#ifndef DISABLE_SPIN_HINT
-static inline void spin_hint(void) {
-#if defined(__x86_64__)
-    __asm__ __volatile__ ("pause");
-#elif defined(__aarch64__)
-    __asm__ __volatile__ ("wfe");
-#endif
-}
-#define NUM_SPIN_BEFOR_SCHED_YIELD 100
-#endif
-#endif
-
 struct ggml_compute_state_shared {
 #ifdef DISABLE_GGML_COMPUTE_SPIN_V2
     ggml_lock_t spin;
@@ -9295,9 +9279,6 @@ static thread_ret_t ggml_graph_compute_thread(void * data) {
     const int n_threads = state->shared->n_threads;
 #endif
 
-#ifndef DISABLE_SPIN_HINT
-    int spin_counter = 0;
-#endif
     while (true) {
 #ifndef DISABLE_GGML_COMPUTE_SPIN_V2
         int flag = atomic_load(&state->shared->flag);
@@ -9310,14 +9291,6 @@ static thread_ret_t ggml_graph_compute_thread(void * data) {
                 atomic_fetch_sub(&state->shared->flag, 1); // done
             }
         }
-
-#ifndef DISABLE_SPIN_HINT
-        spin_hint();
-        if (++spin_counter > NUM_SPIN_BEFOR_SCHED_YIELD) {
-            spin_counter = 0;
-            sched_yield();
-        }
-#endif
 #else
         if (atomic_fetch_add(&state->shared->n_ready, 1) == n_threads - 1) {
             atomic_store(&state->shared->has_work, false);
@@ -9689,18 +9662,7 @@ void ggml_graph_compute(struct ggml_context * ctx, struct ggml_cgraph * cgraph)
         // wait for thread pool
         if (node->n_tasks > 1) {
 #ifndef DISABLE_GGML_COMPUTE_SPIN_V2
-#ifndef DISABLE_SPIN_HINT
-            int spin_count = 0;
-#endif
-            while (atomic_load(&state_shared.flag) != 0) {
-#ifndef DISABLE_SPIN_HINT
-                spin_hint();
-                if (++spin_count > NUM_SPIN_BEFOR_SCHED_YIELD) {
-                    spin_count = 0;
-                    sched_yield();
-                }
-#endif
-            }
+            while (atomic_load(&state_shared.flag) != 0) {}
 #else
             if (atomic_fetch_add(&state_shared.n_ready, 1) == n_threads - 1) {
                 atomic_store(&state_shared.has_work, false);
@@ -9767,18 +9729,7 @@ void ggml_graph_compute(struct ggml_context * ctx, struct ggml_cgraph * cgraph)
         // wait for thread pool
         if (node->n_tasks > 1) {
 #ifndef DISABLE_GGML_COMPUTE_SPIN_V2
-#ifndef DISABLE_SPIN_HINT
-            int spin_count = 0;
-#endif
-            while (atomic_load(&state_shared.flag) != 0) {
-#ifndef DISABLE_SPIN_HINT
-                spin_hint();
-                 if (++spin_count > NUM_SPIN_BEFOR_SCHED_YIELD) {
-                    spin_count = 0;
-                    sched_yield();
-                }
-#endif
-            }
+            while (atomic_load(&state_shared.flag) != 0) {}
 #else
             if (atomic_fetch_add(&state_shared.n_ready, 1) == n_threads - 1) {
                 atomic_store(&state_shared.has_work, false);