Skip to content

[OpenMP] Use new OMPT state and sync kinds for barrier events #95602

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 6 commits into from
Jul 16, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 2 additions & 0 deletions openmp/runtime/src/include/omp-tools.h.var
Original file line number Diff line number Diff line change
Expand Up @@ -78,6 +78,8 @@
/* implicit barrier at the end of worksharing */ \
macro (ompt_state_wait_barrier_implicit, 0x013) /* implicit barrier */ \
macro (ompt_state_wait_barrier_explicit, 0x014) /* explicit barrier */ \
macro (ompt_state_wait_barrier_implementation, 0x015) /* implementation barrier */ \
macro (ompt_state_wait_barrier_teams, 0x016) /* teams barrier */ \
\
/* task wait states (32..63) */ \
macro (ompt_state_wait_taskwait, 0x020) /* waiting at a taskwait */ \
Expand Down
47 changes: 36 additions & 11 deletions openmp/runtime/src/kmp_barrier.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -1805,7 +1805,25 @@ static int __kmp_barrier_template(enum barrier_type bt, int gtid, int is_split,
// It is OK to report the barrier state after the barrier begin callback.
// According to the OMPT specification, a compliant implementation may
// even delay reporting this state until the barrier begins to wait.
this_thr->th.ompt_thread_info.state = ompt_state_wait_barrier;
auto *ompt_thr_info = &this_thr->th.ompt_thread_info;
switch (barrier_kind) {
case ompt_sync_region_barrier_explicit:
ompt_thr_info->state = ompt_state_wait_barrier_explicit;
break;
case ompt_sync_region_barrier_implicit_workshare:
ompt_thr_info->state = ompt_state_wait_barrier_implicit_workshare;
break;
case ompt_sync_region_barrier_implicit_parallel:
ompt_thr_info->state = ompt_state_wait_barrier_implicit_parallel;
break;
case ompt_sync_region_barrier_teams:
ompt_thr_info->state = ompt_state_wait_barrier_teams;
break;
case ompt_sync_region_barrier_implementation:
[[fallthrough]];
default:
ompt_thr_info->state = ompt_state_wait_barrier_implementation;
}
}
#endif

Expand Down Expand Up @@ -2213,20 +2231,24 @@ void __kmp_join_barrier(int gtid) {
codeptr = team->t.ompt_team_info.master_return_address;
my_task_data = OMPT_CUR_TASK_DATA(this_thr);
my_parallel_data = OMPT_CUR_TEAM_DATA(this_thr);
ompt_sync_region_t sync_kind = ompt_sync_region_barrier_implicit_parallel;
ompt_state_t ompt_state = ompt_state_wait_barrier_implicit_parallel;
if (this_thr->th.ompt_thread_info.parallel_flags & ompt_parallel_league) {
sync_kind = ompt_sync_region_barrier_teams;
ompt_state = ompt_state_wait_barrier_teams;
}
if (ompt_enabled.ompt_callback_sync_region) {
ompt_callbacks.ompt_callback(ompt_callback_sync_region)(
ompt_sync_region_barrier_implicit, ompt_scope_begin, my_parallel_data,
my_task_data, codeptr);
sync_kind, ompt_scope_begin, my_parallel_data, my_task_data, codeptr);
}
if (ompt_enabled.ompt_callback_sync_region_wait) {
ompt_callbacks.ompt_callback(ompt_callback_sync_region_wait)(
ompt_sync_region_barrier_implicit, ompt_scope_begin, my_parallel_data,
my_task_data, codeptr);
sync_kind, ompt_scope_begin, my_parallel_data, my_task_data, codeptr);
}
if (!KMP_MASTER_TID(ds_tid))
this_thr->th.ompt_thread_info.task_data = *OMPT_CUR_TASK_DATA(this_thr);
#endif
this_thr->th.ompt_thread_info.state = ompt_state_wait_barrier_implicit;
this_thr->th.ompt_thread_info.state = ompt_state;
}
#endif

Expand Down Expand Up @@ -2488,8 +2510,10 @@ void __kmp_fork_barrier(int gtid, int tid) {
}

#if OMPT_SUPPORT
ompt_state_t ompt_state = this_thr->th.ompt_thread_info.state;
if (ompt_enabled.enabled &&
this_thr->th.ompt_thread_info.state == ompt_state_wait_barrier_implicit) {
(ompt_state == ompt_state_wait_barrier_teams ||
ompt_state == ompt_state_wait_barrier_implicit_parallel)) {
int ds_tid = this_thr->th.th_info.ds.ds_tid;
ompt_data_t *task_data = (team)
? OMPT_CUR_TASK_DATA(this_thr)
Expand All @@ -2501,15 +2525,16 @@ void __kmp_fork_barrier(int gtid, int tid) {
(ompt_callbacks.ompt_callback(ompt_callback_sync_region_wait) ||
ompt_callbacks.ompt_callback(ompt_callback_sync_region)))
codeptr = team ? team->t.ompt_team_info.master_return_address : NULL;
ompt_sync_region_t sync_kind = ompt_sync_region_barrier_implicit_parallel;
if (this_thr->th.ompt_thread_info.parallel_flags & ompt_parallel_league)
sync_kind = ompt_sync_region_barrier_teams;
if (ompt_enabled.ompt_callback_sync_region_wait) {
ompt_callbacks.ompt_callback(ompt_callback_sync_region_wait)(
ompt_sync_region_barrier_implicit, ompt_scope_end, NULL, task_data,
codeptr);
sync_kind, ompt_scope_end, NULL, task_data, codeptr);
}
if (ompt_enabled.ompt_callback_sync_region) {
ompt_callbacks.ompt_callback(ompt_callback_sync_region)(
ompt_sync_region_barrier_implicit, ompt_scope_end, NULL, task_data,
codeptr);
sync_kind, ompt_scope_end, NULL, task_data, codeptr);
}
#endif
if (!KMP_MASTER_TID(ds_tid) && ompt_enabled.ompt_callback_implicit_task) {
Expand Down
17 changes: 10 additions & 7 deletions openmp/runtime/src/kmp_runtime.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -7745,7 +7745,7 @@ int __kmp_invoke_task_func(int gtid) {
);
#if OMPT_SUPPORT
*exit_frame_p = NULL;
this_thr->th.ompt_thread_info.parallel_flags |= ompt_parallel_team;
this_thr->th.ompt_thread_info.parallel_flags = ompt_parallel_team;
#endif

#if KMP_STATS_ENABLED
Expand Down Expand Up @@ -7843,7 +7843,7 @@ int __kmp_invoke_teams_master(int gtid) {
#endif
__kmp_teams_master(gtid);
#if OMPT_SUPPORT
this_thr->th.ompt_thread_info.parallel_flags |= ompt_parallel_league;
this_thr->th.ompt_thread_info.parallel_flags = ompt_parallel_league;
#endif
__kmp_run_after_invoked_task(gtid, 0, this_thr, team);
return 1;
Expand Down Expand Up @@ -8126,8 +8126,10 @@ void __kmp_internal_join(ident_t *id, int gtid, kmp_team_t *team) {

__kmp_join_barrier(gtid); /* wait for everyone */
#if OMPT_SUPPORT
ompt_state_t ompt_state = this_thr->th.ompt_thread_info.state;
if (ompt_enabled.enabled &&
this_thr->th.ompt_thread_info.state == ompt_state_wait_barrier_implicit) {
(ompt_state == ompt_state_wait_barrier_teams ||
ompt_state == ompt_state_wait_barrier_implicit_parallel)) {
int ds_tid = this_thr->th.th_info.ds.ds_tid;
ompt_data_t *task_data = OMPT_CUR_TASK_DATA(this_thr);
this_thr->th.ompt_thread_info.state = ompt_state_overhead;
Expand All @@ -8138,15 +8140,16 @@ void __kmp_internal_join(ident_t *id, int gtid, kmp_team_t *team) {
ompt_callbacks.ompt_callback(ompt_callback_sync_region)))
codeptr = OMPT_CUR_TEAM_INFO(this_thr)->master_return_address;

ompt_sync_region_t sync_kind = ompt_sync_region_barrier_implicit_parallel;
if (this_thr->th.ompt_thread_info.parallel_flags & ompt_parallel_league)
sync_kind = ompt_sync_region_barrier_teams;
if (ompt_enabled.ompt_callback_sync_region_wait) {
ompt_callbacks.ompt_callback(ompt_callback_sync_region_wait)(
ompt_sync_region_barrier_implicit, ompt_scope_end, NULL, task_data,
codeptr);
sync_kind, ompt_scope_end, NULL, task_data, codeptr);
}
if (ompt_enabled.ompt_callback_sync_region) {
ompt_callbacks.ompt_callback(ompt_callback_sync_region)(
ompt_sync_region_barrier_implicit, ompt_scope_end, NULL, task_data,
codeptr);
sync_kind, ompt_scope_end, NULL, task_data, codeptr);
}
#endif
if (!KMP_MASTER_TID(ds_tid) && ompt_enabled.ompt_callback_implicit_task) {
Expand Down
16 changes: 10 additions & 6 deletions openmp/runtime/src/kmp_wait_release.h
Original file line number Diff line number Diff line change
Expand Up @@ -323,19 +323,21 @@ static void __ompt_implicit_task_end(kmp_info_t *this_thr,
ompt_state_t ompt_state,
ompt_data_t *tId) {
int ds_tid = this_thr->th.th_info.ds.ds_tid;
if (ompt_state == ompt_state_wait_barrier_implicit) {
if (ompt_state == ompt_state_wait_barrier_implicit_parallel ||
ompt_state == ompt_state_wait_barrier_teams) {
this_thr->th.ompt_thread_info.state = ompt_state_overhead;
#if OMPT_OPTIONAL
void *codeptr = NULL;
ompt_sync_region_t sync_kind = ompt_sync_region_barrier_implicit_parallel;
if (this_thr->th.ompt_thread_info.parallel_flags & ompt_parallel_league)
sync_kind = ompt_sync_region_barrier_teams;
if (ompt_enabled.ompt_callback_sync_region_wait) {
ompt_callbacks.ompt_callback(ompt_callback_sync_region_wait)(
ompt_sync_region_barrier_implicit, ompt_scope_end, NULL, tId,
codeptr);
sync_kind, ompt_scope_end, NULL, tId, codeptr);
}
if (ompt_enabled.ompt_callback_sync_region) {
ompt_callbacks.ompt_callback(ompt_callback_sync_region)(
ompt_sync_region_barrier_implicit, ompt_scope_end, NULL, tId,
codeptr);
sync_kind, ompt_scope_end, NULL, tId, codeptr);
}
#endif
if (!KMP_MASTER_TID(ds_tid)) {
Expand Down Expand Up @@ -455,7 +457,9 @@ final_spin=FALSE)
ompt_data_t *tId;
if (ompt_enabled.enabled) {
ompt_entry_state = this_thr->th.ompt_thread_info.state;
if (!final_spin || ompt_entry_state != ompt_state_wait_barrier_implicit ||
if (!final_spin ||
(ompt_entry_state != ompt_state_wait_barrier_implicit_parallel &&
ompt_entry_state != ompt_state_wait_barrier_teams) ||
KMP_MASTER_TID(this_thr->th.th_info.ds.ds_tid)) {
ompt_lw_taskteam_t *team = NULL;
if (this_thr->th.th_team)
Expand Down
15 changes: 8 additions & 7 deletions openmp/runtime/src/ompt-specific.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -503,22 +503,23 @@ static uint64_t __ompt_get_unique_id_internal() {

ompt_sync_region_t __ompt_get_barrier_kind(enum barrier_type bt,
kmp_info_t *thr) {
if (bt == bs_forkjoin_barrier)
return ompt_sync_region_barrier_implicit;
if (bt == bs_forkjoin_barrier) {
if (thr->th.ompt_thread_info.parallel_flags & ompt_parallel_league)
return ompt_sync_region_barrier_teams;
else
return ompt_sync_region_barrier_implicit_parallel;
}

if (bt != bs_plain_barrier)
if (bt != bs_plain_barrier || !thr->th.th_ident)
return ompt_sync_region_barrier_implementation;

if (!thr->th.th_ident)
return ompt_sync_region_barrier;

kmp_int32 flags = thr->th.th_ident->flags;

if ((flags & KMP_IDENT_BARRIER_EXPL) != 0)
return ompt_sync_region_barrier_explicit;

if ((flags & KMP_IDENT_BARRIER_IMPL) != 0)
return ompt_sync_region_barrier_implicit;
return ompt_sync_region_barrier_implicit_workshare;

return ompt_sync_region_barrier_implementation;
}
Loading
Loading