Skip to content

[OpenMP][OMPT] Indicate loop schedule for worksharing-loop events #97429

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 3 commits into from
Jul 3, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
4 changes: 2 additions & 2 deletions openmp/runtime/src/kmp_csupport.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -2006,13 +2006,13 @@ void __kmpc_for_static_fini(ident_t *loc, kmp_int32 global_tid) {

#if OMPT_SUPPORT && OMPT_OPTIONAL
if (ompt_enabled.ompt_callback_work) {
ompt_work_t ompt_work_type = ompt_work_loop;
ompt_work_t ompt_work_type = ompt_work_loop_static;
ompt_team_info_t *team_info = __ompt_get_teaminfo(0, NULL);
ompt_task_info_t *task_info = __ompt_get_task_info_object(0);
// Determine workshare type
if (loc != NULL) {
if ((loc->flags & KMP_IDENT_WORK_LOOP) != 0) {
ompt_work_type = ompt_work_loop;
ompt_work_type = ompt_work_loop_static;
} else if ((loc->flags & KMP_IDENT_WORK_SECTIONS) != 0) {
ompt_work_type = ompt_work_sections;
} else if ((loc->flags & KMP_IDENT_WORK_DISTRIBUTE) != 0) {
Expand Down
9 changes: 5 additions & 4 deletions openmp/runtime/src/kmp_dispatch.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -1164,8 +1164,9 @@ __kmp_dispatch_init(ident_t *loc, int gtid, enum sched_type schedule, T lb,
ompt_team_info_t *team_info = __ompt_get_teaminfo(0, NULL);
ompt_task_info_t *task_info = __ompt_get_task_info_object(0);
ompt_callbacks.ompt_callback(ompt_callback_work)(
ompt_work_loop, ompt_scope_begin, &(team_info->parallel_data),
&(task_info->task_data), pr->u.p.tc, OMPT_LOAD_RETURN_ADDRESS(gtid));
ompt_get_work_schedule(pr->schedule), ompt_scope_begin,
&(team_info->parallel_data), &(task_info->task_data), pr->u.p.tc,
OMPT_LOAD_RETURN_ADDRESS(gtid));
}
#endif
KMP_PUSH_PARTITIONED_TIMER(OMP_loop_dynamic);
Expand Down Expand Up @@ -2121,8 +2122,8 @@ int __kmp_dispatch_next_algorithm(int gtid,
ompt_team_info_t *team_info = __ompt_get_teaminfo(0, NULL); \
ompt_task_info_t *task_info = __ompt_get_task_info_object(0); \
ompt_callbacks.ompt_callback(ompt_callback_work)( \
ompt_work_loop, ompt_scope_end, &(team_info->parallel_data), \
&(task_info->task_data), 0, codeptr); \
ompt_get_work_schedule(pr->schedule), ompt_scope_end, \
&(team_info->parallel_data), &(task_info->task_data), 0, codeptr); \
} \
}
#define OMPT_LOOP_DISPATCH(lb, ub, st, status) \
Expand Down
4 changes: 2 additions & 2 deletions openmp/runtime/src/kmp_sched.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -103,7 +103,7 @@ static void __kmp_for_static_init(ident_t *loc, kmp_int32 global_tid,
#if OMPT_SUPPORT && OMPT_OPTIONAL
ompt_team_info_t *team_info = NULL;
ompt_task_info_t *task_info = NULL;
ompt_work_t ompt_work_type = ompt_work_loop;
ompt_work_t ompt_work_type = ompt_work_loop_static;

static kmp_int8 warn = 0;

Expand All @@ -114,7 +114,7 @@ static void __kmp_for_static_init(ident_t *loc, kmp_int32 global_tid,
// Determine workshare type
if (loc != NULL) {
if ((loc->flags & KMP_IDENT_WORK_LOOP) != 0) {
ompt_work_type = ompt_work_loop;
ompt_work_type = ompt_work_loop_static;
} else if ((loc->flags & KMP_IDENT_WORK_SECTIONS) != 0) {
ompt_work_type = ompt_work_sections;
} else if ((loc->flags & KMP_IDENT_WORK_DISTRIBUTE) != 0) {
Expand Down
19 changes: 19 additions & 0 deletions openmp/runtime/src/ompt-specific.h
Original file line number Diff line number Diff line change
Expand Up @@ -130,6 +130,25 @@ inline const char *ompt_get_runtime_version() {
return &__kmp_version_lib_ver[KMP_VERSION_MAGIC_LEN];
}

inline ompt_work_t ompt_get_work_schedule(enum sched_type schedule) {
switch (SCHEDULE_WITHOUT_MODIFIERS(schedule)) {
case kmp_sch_static_chunked:
case kmp_sch_static_balanced:
case kmp_sch_static_greedy:
return ompt_work_loop_static;
case kmp_sch_dynamic_chunked:
case kmp_sch_static_steal:
return ompt_work_loop_dynamic;
case kmp_sch_guided_iterative_chunked:
case kmp_sch_guided_analytical_chunked:
case kmp_sch_guided_chunked:
case kmp_sch_guided_simd:
return ompt_work_loop_guided;
default:
return ompt_work_loop_other;
}
}

class OmptReturnAddressGuard {
private:
bool SetAddress{false};
Expand Down
183 changes: 45 additions & 138 deletions openmp/runtime/test/ompt/callback.h
Original file line number Diff line number Diff line change
Expand Up @@ -47,6 +47,36 @@ static const char* ompt_cancel_flag_t_values[] = {
"ompt_cancel_discarded_task"
};

static const char *ompt_work_t_values[] = {"undefined",
"ompt_work_loop",
"ompt_work_sections",
"ompt_work_single_executor",
"ompt_work_single_other",
"ompt_work_workshare",
"ompt_work_distribute",
"ompt_work_taskloop",
"ompt_work_scope",
"ompt_work_workdistribute",
"ompt_work_loop_static",
"ompt_work_loop_dynamic",
"ompt_work_loop_guided",
"ompt_work_loop_other"};

static const char *ompt_work_events_t_values[] = {"undefined",
"ompt_event_loop",
"ompt_event_sections",
"ompt_event_single_in_block",
"ompt_event_single_others",
"ompt_event_workshare",
"ompt_event_distribute",
"ompt_event_taskloop",
"ompt_event_scope",
"ompt_event_workdistribute",
"ompt_event_loop_static",
"ompt_event_loop_dynamic",
"ompt_event_loop_guided",
"ompt_event_loop_other"};

static const char *ompt_dependence_type_t_values[36] = {
"ompt_dependence_type_UNDEFINED",
"ompt_dependence_type_in", // 1
Expand Down Expand Up @@ -852,144 +882,21 @@ on_ompt_callback_work(
{
switch(endpoint)
{
case ompt_scope_begin:
switch(wstype)
{
case ompt_work_loop:
case ompt_work_loop_static:
case ompt_work_loop_dynamic:
case ompt_work_loop_guided:
case ompt_work_loop_other:
// TODO: add schedule attribute for the different work_loop types.
// e.g., ", schedule=%s", ..., ompt_schedule_values[wstype]
printf("%" PRIu64 ":" _TOOL_PREFIX
" ompt_event_loop_begin: parallel_id=%" PRIu64
", parent_task_id=%" PRIu64 ", codeptr_ra=%p, count=%" PRIu64
"\n",
ompt_get_thread_data()->value, parallel_data->value,
task_data->value, codeptr_ra, count);
break;
case ompt_work_sections:
printf("%" PRIu64 ":" _TOOL_PREFIX
" ompt_event_sections_begin: parallel_id=%" PRIu64
", parent_task_id=%" PRIu64 ", codeptr_ra=%p, count=%" PRIu64
"\n",
ompt_get_thread_data()->value, parallel_data->value,
task_data->value, codeptr_ra, count);
break;
case ompt_work_single_executor:
printf("%" PRIu64 ":" _TOOL_PREFIX
" ompt_event_single_in_block_begin: parallel_id=%" PRIu64
", parent_task_id=%" PRIu64 ", codeptr_ra=%p, count=%" PRIu64
"\n",
ompt_get_thread_data()->value, parallel_data->value,
task_data->value, codeptr_ra, count);
break;
case ompt_work_single_other:
printf("%" PRIu64 ":" _TOOL_PREFIX
" ompt_event_single_others_begin: parallel_id=%" PRIu64
", task_id=%" PRIu64 ", codeptr_ra=%p, count=%" PRIu64 "\n",
ompt_get_thread_data()->value, parallel_data->value,
task_data->value, codeptr_ra, count);
break;
case ompt_work_workshare:
//impl
break;
case ompt_work_distribute:
printf("%" PRIu64 ":" _TOOL_PREFIX
" ompt_event_distribute_begin: parallel_id=%" PRIu64
", parent_task_id=%" PRIu64 ", codeptr_ra=%p, count=%" PRIu64
"\n",
ompt_get_thread_data()->value, parallel_data->value,
task_data->value, codeptr_ra, count);
break;
case ompt_work_taskloop:
//impl
printf("%" PRIu64 ":" _TOOL_PREFIX
" ompt_event_taskloop_begin: parallel_id=%" PRIu64
", parent_task_id=%" PRIu64 ", codeptr_ra=%p, count=%" PRIu64
"\n",
ompt_get_thread_data()->value, parallel_data->value,
task_data->value, codeptr_ra, count);
break;
case ompt_work_scope:
printf("%" PRIu64 ":" _TOOL_PREFIX
" ompt_event_scope_begin: parallel_id=%" PRIu64
", parent_task_id=%" PRIu64 ", codeptr_ra=%p, count=%" PRIu64
"\n",
ompt_get_thread_data()->value, parallel_data->value,
task_data->value, codeptr_ra, count);
break;
}
break;
case ompt_scope_end:
switch(wstype)
{
case ompt_work_loop:
case ompt_work_loop_static:
case ompt_work_loop_dynamic:
case ompt_work_loop_guided:
case ompt_work_loop_other:
printf("%" PRIu64 ":" _TOOL_PREFIX
" ompt_event_loop_end: parallel_id=%" PRIu64
", task_id=%" PRIu64 ", codeptr_ra=%p, count=%" PRIu64 "\n",
ompt_get_thread_data()->value, parallel_data->value,
task_data->value, codeptr_ra, count);
break;
case ompt_work_sections:
printf("%" PRIu64 ":" _TOOL_PREFIX
" ompt_event_sections_end: parallel_id=%" PRIu64
", task_id=%" PRIu64 ", codeptr_ra=%p, count=%" PRIu64 "\n",
ompt_get_thread_data()->value, parallel_data->value,
task_data->value, codeptr_ra, count);
break;
case ompt_work_single_executor:
printf("%" PRIu64 ":" _TOOL_PREFIX
" ompt_event_single_in_block_end: parallel_id=%" PRIu64
", task_id=%" PRIu64 ", codeptr_ra=%p, count=%" PRIu64 "\n",
ompt_get_thread_data()->value, parallel_data->value,
task_data->value, codeptr_ra, count);
break;
case ompt_work_single_other:
printf("%" PRIu64 ":" _TOOL_PREFIX
" ompt_event_single_others_end: parallel_id=%" PRIu64
", task_id=%" PRIu64 ", codeptr_ra=%p, count=%" PRIu64 "\n",
ompt_get_thread_data()->value, parallel_data->value,
task_data->value, codeptr_ra, count);
break;
case ompt_work_workshare:
//impl
break;
case ompt_work_distribute:
printf("%" PRIu64 ":" _TOOL_PREFIX
" ompt_event_distribute_end: parallel_id=%" PRIu64
", parent_task_id=%" PRIu64 ", codeptr_ra=%p, count=%" PRIu64
"\n",
ompt_get_thread_data()->value, parallel_data->value,
task_data->value, codeptr_ra, count);
break;
case ompt_work_taskloop:
//impl
printf("%" PRIu64 ":" _TOOL_PREFIX
" ompt_event_taskloop_end: parallel_id=%" PRIu64
", parent_task_id=%" PRIu64 ", codeptr_ra=%p, count=%" PRIu64
"\n",
ompt_get_thread_data()->value, parallel_data->value,
task_data->value, codeptr_ra, count);
break;
case ompt_work_scope:
printf("%" PRIu64 ":" _TOOL_PREFIX
" ompt_event_scope_end: parallel_id=%" PRIu64
", parent_task_id=%" PRIu64 ", codeptr_ra=%p, count=%" PRIu64
"\n",
ompt_get_thread_data()->value, parallel_data->value,
task_data->value, codeptr_ra, count);
break;
}
break;
case ompt_scope_beginend:
printf("ompt_scope_beginend should never be passed to %s\n", __func__);
exit(-1);
case ompt_scope_begin:
printf("%" PRIu64 ":" _TOOL_PREFIX " %s_begin: parallel_id=%" PRIu64
", task_id=%" PRIu64 ", codeptr_ra=%p, count=%" PRIu64 "\n",
ompt_get_thread_data()->value, ompt_work_events_t_values[wstype],
parallel_data->value, task_data->value, codeptr_ra, count);
break;
case ompt_scope_end:
printf("%" PRIu64 ":" _TOOL_PREFIX " %s_end: parallel_id=%" PRIu64
", task_id=%" PRIu64 ", codeptr_ra=%p, count=%" PRIu64 "\n",
ompt_get_thread_data()->value, ompt_work_events_t_values[wstype],
parallel_data->value, task_data->value, codeptr_ra, count);
break;
case ompt_scope_beginend:
printf("ompt_scope_beginend should never be passed to %s\n", __func__);
exit(-1);
}
}

Expand Down
10 changes: 5 additions & 5 deletions openmp/runtime/test/ompt/synchronization/ordered_dependences.c
Original file line number Diff line number Diff line change
Expand Up @@ -7,7 +7,7 @@
int main() {
int a[10][10];
#pragma omp parallel num_threads(2)
#pragma omp for ordered(2)
#pragma omp for ordered(2) schedule(static)
for (int i = 0; i < 2; i++)
for (int j = 0; j < 2; j++) {
a[i][j] = i + j + 1;
Expand All @@ -23,8 +23,8 @@ int main() {
}
// CHECK: 0: NULL_POINTER=[[NULL:.*$]]

// CHECK: {{^}}[[MASTER:[0-9]+]]: ompt_event_loop_begin:
// CHECK-SAME: parallel_id={{[0-9]+}}, parent_task_id=[[ITASK:[0-9]+]],
// CHECK: {{^}}[[MASTER:[0-9]+]]: ompt_event_loop_static_begin:
// CHECK-SAME: parallel_id={{[0-9]+}}, task_id=[[ITASK:[0-9]+]],

// CHECK: {{^}}[[MASTER]]: ompt_event_dependences: task_id=[[ITASK]],
// CHECK-SAME: deps=[(0, ompt_dependence_type_source), (0,
Expand All @@ -38,8 +38,8 @@ int main() {
// CHECK-SAME: deps=[(0, ompt_dependence_type_source), (1,
// CHECK-SAME: ompt_dependence_type_source)], ndeps=2

// CHECK: {{^}}[[WORKER:[0-9]+]]: ompt_event_loop_begin:
// CHECK-SAME: parallel_id={{[0-9]+}}, parent_task_id=[[ITASK:[0-9]+]],
// CHECK: {{^}}[[WORKER:[0-9]+]]: ompt_event_loop_static_begin:
// CHECK-SAME: parallel_id={{[0-9]+}}, task_id=[[ITASK:[0-9]+]],

// CHECK: {{^}}[[WORKER]]: ompt_event_dependences: task_id=[[ITASK]],
// CHECK-SAME: deps=[(0, ompt_dependence_type_sink), (0,
Expand Down
4 changes: 2 additions & 2 deletions openmp/runtime/test/ompt/tasks/taskloop.c
Original file line number Diff line number Diff line change
Expand Up @@ -37,7 +37,7 @@ int main() {
// CHECK-SAME: parallel_id=[[PARALLEL_ID]], task_id=[[IMPLICIT_TASK_ID1]]
// CHECK: {{^}}[[MASTER_ID]]: ompt_event_taskloop_begin:
// CHECK-SAME: parallel_id=[[PARALLEL_ID]]
// CHECK-SAME: parent_task_id=[[IMPLICIT_TASK_ID1]]
// CHECK-SAME: task_id=[[IMPLICIT_TASK_ID1]]
// CHECK-SAME: codeptr_ra=[[RETURN_ADDRESS:0x[0-f]+]], count=2
// CHECK: {{^}}[[MASTER_ID]]: ompt_event_task_create:
// CHECK-SAME: parent_task_id=[[IMPLICIT_TASK_ID1]]
Expand All @@ -52,7 +52,7 @@ int main() {
// CHECK-NOT: {{^}}[[MASTER_ID]]: ompt_event_task_create:
// CHECK: {{^}}[[MASTER_ID]]: ompt_event_taskloop_end:
// CHECK-SAME: parallel_id=[[PARALLEL_ID]]
// CHECK-SAME: parent_task_id=[[IMPLICIT_TASK_ID1]]
// CHECK-SAME: task_id=[[IMPLICIT_TASK_ID1]]
// CHECK-SAME: count=2
// CHECK-DAG: {{^}}[[MASTER_ID]]: ompt_event_wait_taskgroup_begin:
// CHECK: {{^}}[[MASTER_ID]]: ompt_event_wait_taskgroup_end:
Expand Down
2 changes: 1 addition & 1 deletion openmp/runtime/test/ompt/tasks/taskloop_dispatch.c
Original file line number Diff line number Diff line change
Expand Up @@ -32,7 +32,7 @@ int main() {

// CHECK: {{^}}[[MASTER_ID]]: ompt_event_taskloop_begin:
// CHECK-SAME: parallel_id=[[PARALLEL_ID]]
// CHECK-SAME: parent_task_id=[[IMPLICIT_TASK_ID1]]
// CHECK-SAME: task_id=[[IMPLICIT_TASK_ID1]]
// CHECK-SAME: codeptr_ra=[[RETURN_ADDRESS:0x[0-f]+]], count=16

// CHECK: {{^}}[[MASTER_ID]]: ompt_event_task_create:
Expand Down
8 changes: 4 additions & 4 deletions openmp/runtime/test/ompt/teams/distribute_dispatch.c
Original file line number Diff line number Diff line change
Expand Up @@ -24,28 +24,28 @@ int main() {

// CHECK: {{^}}[[THREAD_ID0:[0-9]+]]: ompt_event_distribute_begin:
// CHECK-SAME: parallel_id=[[PARALLEL_ID0:[0-9]+]]
// CHECK-SAME: parent_task_id=[[TASK_ID0:[0-9]+]]
// CHECK-SAME: task_id=[[TASK_ID0:[0-9]+]]
// CHECK: {{^}}[[THREAD_ID0]]: ompt_event_distribute_chunk_begin:
// CHECK-SAME: parallel_id=[[PARALLEL_ID0]], task_id=[[TASK_ID0]]
// CHECK-SAME: chunk_start={{[0-9]+}}, chunk_iterations=16

// CHECK: {{^}}[[THREAD_ID1:[0-9]+]]: ompt_event_distribute_begin:
// CHECK-SAME: parallel_id=[[PARALLEL_ID1:[0-9]+]]
// CHECK-SAME: parent_task_id=[[TASK_ID1:[0-9]+]]
// CHECK-SAME: task_id=[[TASK_ID1:[0-9]+]]
// CHECK: {{^}}[[THREAD_ID1]]: ompt_event_distribute_chunk_begin:
// CHECK-SAME: parallel_id=[[PARALLEL_ID1]], task_id=[[TASK_ID1]]
// CHECK-SAME: chunk_start={{[0-9]+}}, chunk_iterations=16

// CHECK: {{^}}[[THREAD_ID2:[0-9]+]]: ompt_event_distribute_begin:
// CHECK-SAME: parallel_id=[[PARALLEL_ID2:[0-9]+]]
// CHECK-SAME: parent_task_id=[[TASK_ID2:[0-9]+]]
// CHECK-SAME: task_id=[[TASK_ID2:[0-9]+]]
// CHECK: {{^}}[[THREAD_ID2]]: ompt_event_distribute_chunk_begin:
// CHECK-SAME: parallel_id=[[PARALLEL_ID2]], task_id=[[TASK_ID2]]
// CHECK-SAME: chunk_start={{[0-9]+}}, chunk_iterations=16

// CHECK: {{^}}[[THREAD_ID3:[0-9]+]]: ompt_event_distribute_begin:
// CHECK-SAME: parallel_id=[[PARALLEL_ID3:[0-9]+]]
// CHECK-SAME: parent_task_id=[[TASK_ID3:[0-9]+]]
// CHECK-SAME: task_id=[[TASK_ID3:[0-9]+]]
// CHECK: {{^}}[[THREAD_ID3]]: ompt_event_distribute_chunk_begin:
// CHECK-SAME: parallel_id=[[PARALLEL_ID3]], task_id=[[TASK_ID3]]
// CHECK-SAME: chunk_start={{[0-9]+}}, chunk_iterations=16
3 changes: 3 additions & 0 deletions openmp/runtime/test/ompt/worksharing/for/auto.c
Original file line number Diff line number Diff line change
Expand Up @@ -4,4 +4,7 @@
// XFAIL: gcc

#define SCHEDULE auto
// The runtime uses guided schedule for auto,
// which is a reason choice
#define SCHED_OUTPUT "guided"
#include "base.h"
3 changes: 3 additions & 0 deletions openmp/runtime/test/ompt/worksharing/for/auto_serialized.c
Original file line number Diff line number Diff line change
Expand Up @@ -4,4 +4,7 @@
// XFAIL: gcc

#define SCHEDULE auto
// The runtime uses static schedule for serialized loop,
// which is a reason choice
#define SCHED_OUTPUT "static"
#include "base_serialized.h"
3 changes: 3 additions & 0 deletions openmp/runtime/test/ompt/worksharing/for/auto_split.c
Original file line number Diff line number Diff line change
Expand Up @@ -5,4 +5,7 @@
// XFAIL: gcc

#define SCHEDULE auto
// The runtime uses guided schedule for auto,
// which is a reason choice
#define SCHED_OUTPUT "guided"
#include "base_split.h"
Loading
Loading