Skip to content

Commit 4f0a333

Browse files
[Profile] fix threading issue (#55704)
I forgot about the existence of threads, so had hard-coded this to only support one thread. Clearly that is not sufficient though, so use the semaphore here as it is intended to be used. Fixes #55703 --------- Co-authored-by: Ian Butterworth <[email protected]>
1 parent 8cae8d1 commit 4f0a333

File tree

2 files changed

+12
-15
lines changed

2 files changed

+12
-15
lines changed

src/signals-unix.c

Lines changed: 10 additions & 14 deletions
Original file line numberDiff line numberDiff line change
@@ -426,6 +426,7 @@ pthread_mutex_t in_signal_lock; // shared with jl_delete_thread
426426
static bt_context_t *signal_context; // protected by in_signal_lock
427427
static int exit_signal_cond = -1;
428428
static int signal_caught_cond = -1;
429+
static int signals_inflight = 0;
429430

430431
int jl_thread_suspend_and_get_state(int tid, int timeout, bt_context_t *ctx)
431432
{
@@ -438,7 +439,7 @@ int jl_thread_suspend_and_get_state(int tid, int timeout, bt_context_t *ctx)
438439
pthread_mutex_unlock(&in_signal_lock);
439440
return 0;
440441
}
441-
if (jl_atomic_load(&ptls2->signal_request) != 0) {
442+
while (signals_inflight) {
442443
// something is wrong, or there is already a usr2 in flight elsewhere
443444
// try to wait for it to finish or wait for timeout
444445
struct pollfd event = {signal_caught_cond, POLLIN, 0};
@@ -450,25 +451,16 @@ int jl_thread_suspend_and_get_state(int tid, int timeout, bt_context_t *ctx)
450451
pthread_mutex_unlock(&in_signal_lock);
451452
return 0;
452453
}
453-
}
454-
// check for any stale signal_caught_cond events
455-
struct pollfd event = {signal_caught_cond, POLLIN, 0};
456-
do {
457-
err = poll(&event, 1, 0);
458-
} while (err == -1 && errno == EINTR);
459-
if (err == -1) {
460-
pthread_mutex_unlock(&in_signal_lock);
461-
return 0;
462-
}
463-
if ((event.revents & POLLIN) != 0) {
464454
// consume it before continuing
465455
eventfd_t got;
466456
do {
467457
err = read(signal_caught_cond, &got, sizeof(eventfd_t));
468458
} while (err == -1 && errno == EINTR);
469459
if (err != sizeof(eventfd_t)) abort();
470-
assert(got == 1); (void) got;
460+
assert(signals_inflight >= got);
461+
signals_inflight -= got;
471462
}
463+
signals_inflight++;
472464
sig_atomic_t request = jl_atomic_exchange(&ptls2->signal_request, 1);
473465
assert(request == 0 || request == -1);
474466
request = 1;
@@ -485,6 +477,7 @@ int jl_thread_suspend_and_get_state(int tid, int timeout, bt_context_t *ctx)
485477
if (err == -1) {
486478
// not ready after timeout: try to cancel this request
487479
if (jl_atomic_cmpswap(&ptls2->signal_request, &request, 0)) {
480+
signals_inflight--;
488481
pthread_mutex_unlock(&in_signal_lock);
489482
return 0;
490483
}
@@ -494,7 +487,9 @@ int jl_thread_suspend_and_get_state(int tid, int timeout, bt_context_t *ctx)
494487
err = read(signal_caught_cond, &got, sizeof(eventfd_t));
495488
} while (err == -1 && errno == EINTR);
496489
if (err != sizeof(eventfd_t)) abort();
497-
assert(got == 1); (void) got;
490+
assert(signals_inflight >= got);
491+
signals_inflight -= got;
492+
signals_inflight++;
498493
// Now the other thread is waiting on exit_signal_cond (verify that here by
499494
// checking it is 0, and add an acquire barrier for good measure)
500495
request = jl_atomic_load_acquire(&ptls2->signal_request);
@@ -521,6 +516,7 @@ static void jl_try_deliver_sigint(void)
521516
jl_safepoint_enable_sigint();
522517
jl_wake_libuv();
523518
pthread_mutex_lock(&in_signal_lock);
519+
signals_inflight++;
524520
jl_atomic_store_release(&ptls2->signal_request, 2);
525521
// This also makes sure `sleep` is aborted.
526522
pthread_kill(ptls2->system_id, SIGUSR2);

stdlib/Profile/test/runtests.jl

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -168,7 +168,8 @@ let cmd = Base.julia_cmd()
168168
println("done")
169169
print(Profile.len_data())
170170
"""
171-
p = open(`$cmd -e $script`)
171+
# use multiple threads here to ensure that profiling works with threading
172+
p = open(`$cmd -t2 -e $script`)
172173
t = Timer(120) do t
173174
# should be under 10 seconds, so give it 2 minutes then report failure
174175
println("KILLING debuginfo registration test BY PROFILE TEST WATCHDOG\n")

0 commit comments

Comments
 (0)