Skip to content

Commit 610fea6

Browse files
[OpenMP] libomp: fixed implementation of OMP 5.1 inoutset task dependence type
Refactored code of dependence processing and added new inoutset dependence type. Compiler can set dependence flag to 0x8 when call __kmpc_omp_task_with_deps. All dependence flags library gets so far and corresponding dependence types: 1 - IN, 2 - OUT, 3 - INOUT, 4 - MUTEXINOUTSET, 8 - INOUTSET. Differential Revision: https://reviews.llvm.org/D97085
1 parent 0ad051b commit 610fea6

File tree

7 files changed

+368
-116
lines changed

7 files changed

+368
-116
lines changed

openmp/runtime/src/kmp.h

Lines changed: 17 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -2252,15 +2252,24 @@ typedef union kmp_depnode kmp_depnode_t;
22522252
typedef struct kmp_depnode_list kmp_depnode_list_t;
22532253
typedef struct kmp_dephash_entry kmp_dephash_entry_t;
22542254

2255+
#define KMP_DEP_IN 0x1
2256+
#define KMP_DEP_OUT 0x2
2257+
#define KMP_DEP_INOUT 0x3
2258+
#define KMP_DEP_MTX 0x4
2259+
#define KMP_DEP_SET 0x8
22552260
// Compiler sends us this info:
22562261
typedef struct kmp_depend_info {
22572262
kmp_intptr_t base_addr;
22582263
size_t len;
2259-
struct {
2260-
bool in : 1;
2261-
bool out : 1;
2262-
bool mtx : 1;
2263-
} flags;
2264+
union {
2265+
kmp_uint8 flag;
2266+
struct {
2267+
unsigned in : 1;
2268+
unsigned out : 1;
2269+
unsigned mtx : 1;
2270+
unsigned set : 1;
2271+
} flags;
2272+
};
22642273
} kmp_depend_info_t;
22652274

22662275
// Internal structures to work with task dependencies:
@@ -2294,9 +2303,9 @@ union KMP_ALIGN_CACHE kmp_depnode {
22942303
struct kmp_dephash_entry {
22952304
kmp_intptr_t addr;
22962305
kmp_depnode_t *last_out;
2297-
kmp_depnode_list_t *last_ins;
2298-
kmp_depnode_list_t *last_mtxs;
2299-
kmp_int32 last_flag;
2306+
kmp_depnode_list_t *last_set;
2307+
kmp_depnode_list_t *prev_set;
2308+
kmp_uint8 last_flag;
23002309
kmp_lock_t *mtx_lock; /* is referenced by depnodes w/mutexinoutset dep */
23012310
kmp_dephash_entry_t *next_in_bucket;
23022311
};

openmp/runtime/src/kmp_taskdeps.cpp

Lines changed: 76 additions & 88 deletions
Original file line numberDiff line numberDiff line change
@@ -149,9 +149,6 @@ static kmp_dephash_t *__kmp_dephash_create(kmp_info_t *thread,
149149
return h;
150150
}
151151

152-
#define ENTRY_LAST_INS 0
153-
#define ENTRY_LAST_MTXS 1
154-
155152
static kmp_dephash_entry *__kmp_dephash_find(kmp_info_t *thread,
156153
kmp_dephash_t **hash,
157154
kmp_intptr_t addr) {
@@ -178,9 +175,9 @@ static kmp_dephash_entry *__kmp_dephash_find(kmp_info_t *thread,
178175
#endif
179176
entry->addr = addr;
180177
entry->last_out = NULL;
181-
entry->last_ins = NULL;
182-
entry->last_mtxs = NULL;
183-
entry->last_flag = ENTRY_LAST_INS;
178+
entry->last_set = NULL;
179+
entry->prev_set = NULL;
180+
entry->last_flag = 0;
184181
entry->mtx_lock = NULL;
185182
entry->next_in_bucket = h->buckets[bucket];
186183
h->buckets[bucket] = entry;
@@ -313,96 +310,81 @@ __kmp_process_deps(kmp_int32 gtid, kmp_depnode_t *node, kmp_dephash_t **hash,
313310
kmp_dephash_entry_t *info =
314311
__kmp_dephash_find(thread, hash, dep->base_addr);
315312
kmp_depnode_t *last_out = info->last_out;
316-
kmp_depnode_list_t *last_ins = info->last_ins;
317-
kmp_depnode_list_t *last_mtxs = info->last_mtxs;
318-
319-
if (dep->flags.out) { // out --> clean lists of ins and mtxs if any
320-
if (last_ins || last_mtxs) {
321-
if (info->last_flag == ENTRY_LAST_INS) { // INS were last
322-
npredecessors +=
323-
__kmp_depnode_link_successor(gtid, thread, task, node, last_ins);
324-
} else { // MTXS were last
325-
npredecessors +=
326-
__kmp_depnode_link_successor(gtid, thread, task, node, last_mtxs);
327-
}
328-
__kmp_depnode_list_free(thread, last_ins);
329-
__kmp_depnode_list_free(thread, last_mtxs);
330-
info->last_ins = NULL;
331-
info->last_mtxs = NULL;
313+
kmp_depnode_list_t *last_set = info->last_set;
314+
kmp_depnode_list_t *prev_set = info->prev_set;
315+
316+
if (dep->flags.out) { // out or inout --> clean lists if any
317+
if (last_set) {
318+
npredecessors +=
319+
__kmp_depnode_link_successor(gtid, thread, task, node, last_set);
320+
__kmp_depnode_list_free(thread, last_set);
321+
__kmp_depnode_list_free(thread, prev_set);
322+
info->last_set = NULL;
323+
info->prev_set = NULL;
324+
info->last_flag = 0; // no sets in this dephash entry
332325
} else {
333326
npredecessors +=
334327
__kmp_depnode_link_successor(gtid, thread, task, node, last_out);
335328
}
336329
__kmp_node_deref(thread, last_out);
337-
if (dep_barrier) {
330+
if (!dep_barrier) {
331+
info->last_out = __kmp_node_ref(node);
332+
} else {
338333
// if this is a sync point in the serial sequence, then the previous
339334
// outputs are guaranteed to be completed after the execution of this
340335
// task so the previous output nodes can be cleared.
341336
info->last_out = NULL;
342-
} else {
343-
info->last_out = __kmp_node_ref(node);
344337
}
345-
} else if (dep->flags.in) {
346-
// in --> link node to either last_out or last_mtxs, clean earlier deps
347-
if (last_mtxs) {
348-
npredecessors +=
349-
__kmp_depnode_link_successor(gtid, thread, task, node, last_mtxs);
350-
__kmp_node_deref(thread, last_out);
351-
info->last_out = NULL;
352-
if (info->last_flag == ENTRY_LAST_MTXS && last_ins) { // MTXS were last
353-
// clean old INS before creating new list
354-
__kmp_depnode_list_free(thread, last_ins);
355-
info->last_ins = NULL;
356-
}
357-
} else {
338+
} else { // either IN or MTX or SET
339+
if (info->last_flag == 0 || info->last_flag == dep->flag) {
340+
// last_set either didn't exist or of same dep kind
358341
// link node as successor of the last_out if any
359342
npredecessors +=
360343
__kmp_depnode_link_successor(gtid, thread, task, node, last_out);
361-
}
362-
info->last_flag = ENTRY_LAST_INS;
363-
info->last_ins = __kmp_add_node(thread, info->last_ins, node);
364-
} else {
365-
KMP_DEBUG_ASSERT(dep->flags.mtx == 1);
366-
// mtx --> link node to either last_out or last_ins, clean earlier deps
367-
if (last_ins) {
344+
// link node as successor of all nodes in the prev_set if any
345+
npredecessors +=
346+
__kmp_depnode_link_successor(gtid, thread, task, node, prev_set);
347+
} else { // last_set is of different dep kind, make it prev_set
348+
// link node as successor of all nodes in the last_set
368349
npredecessors +=
369-
__kmp_depnode_link_successor(gtid, thread, task, node, last_ins);
350+
__kmp_depnode_link_successor(gtid, thread, task, node, last_set);
351+
// clean last_out if any
370352
__kmp_node_deref(thread, last_out);
371353
info->last_out = NULL;
372-
if (info->last_flag == ENTRY_LAST_INS && last_mtxs) { // INS were last
373-
// clean old MTXS before creating new list
374-
__kmp_depnode_list_free(thread, last_mtxs);
375-
info->last_mtxs = NULL;
376-
}
377-
} else {
378-
// link node as successor of the last_out if any
379-
npredecessors +=
380-
__kmp_depnode_link_successor(gtid, thread, task, node, last_out);
381-
}
382-
info->last_flag = ENTRY_LAST_MTXS;
383-
info->last_mtxs = __kmp_add_node(thread, info->last_mtxs, node);
384-
if (info->mtx_lock == NULL) {
385-
info->mtx_lock = (kmp_lock_t *)__kmp_allocate(sizeof(kmp_lock_t));
386-
__kmp_init_lock(info->mtx_lock);
354+
// clean prev_set if any
355+
__kmp_depnode_list_free(thread, prev_set);
356+
// move last_set to prev_set, new last_set will be allocated
357+
info->prev_set = last_set;
358+
info->last_set = NULL;
387359
}
388-
KMP_DEBUG_ASSERT(node->dn.mtx_num_locks < MAX_MTX_DEPS);
389-
kmp_int32 m;
390-
// Save lock in node's array
391-
for (m = 0; m < MAX_MTX_DEPS; ++m) {
392-
// sort pointers in decreasing order to avoid potential livelock
393-
if (node->dn.mtx_locks[m] < info->mtx_lock) {
394-
KMP_DEBUG_ASSERT(node->dn.mtx_locks[node->dn.mtx_num_locks] == NULL);
395-
for (int n = node->dn.mtx_num_locks; n > m; --n) {
396-
// shift right all lesser non-NULL pointers
397-
KMP_DEBUG_ASSERT(node->dn.mtx_locks[n - 1] != NULL);
398-
node->dn.mtx_locks[n] = node->dn.mtx_locks[n - 1];
360+
info->last_flag = dep->flag; // store dep kind of the last_set
361+
info->last_set = __kmp_add_node(thread, info->last_set, node);
362+
363+
// check if we are processing MTX dependency
364+
if (dep->flag == KMP_DEP_MTX) {
365+
if (info->mtx_lock == NULL) {
366+
info->mtx_lock = (kmp_lock_t *)__kmp_allocate(sizeof(kmp_lock_t));
367+
__kmp_init_lock(info->mtx_lock);
368+
}
369+
KMP_DEBUG_ASSERT(node->dn.mtx_num_locks < MAX_MTX_DEPS);
370+
kmp_int32 m;
371+
// Save lock in node's array
372+
for (m = 0; m < MAX_MTX_DEPS; ++m) {
373+
// sort pointers in decreasing order to avoid potential livelock
374+
if (node->dn.mtx_locks[m] < info->mtx_lock) {
375+
KMP_DEBUG_ASSERT(!node->dn.mtx_locks[node->dn.mtx_num_locks]);
376+
for (int n = node->dn.mtx_num_locks; n > m; --n) {
377+
// shift right all lesser non-NULL pointers
378+
KMP_DEBUG_ASSERT(node->dn.mtx_locks[n - 1] != NULL);
379+
node->dn.mtx_locks[n] = node->dn.mtx_locks[n - 1];
380+
}
381+
node->dn.mtx_locks[m] = info->mtx_lock;
382+
break;
399383
}
400-
node->dn.mtx_locks[m] = info->mtx_lock;
401-
break;
402384
}
385+
KMP_DEBUG_ASSERT(m < MAX_MTX_DEPS); // must break from loop
386+
node->dn.mtx_num_locks++;
403387
}
404-
KMP_DEBUG_ASSERT(m < MAX_MTX_DEPS); // must break from loop
405-
node->dn.mtx_num_locks++;
406388
}
407389
}
408390
KA_TRACE(30, ("__kmp_process_deps<%d>: T#%d found %d predecessors\n", filter,
@@ -433,27 +415,25 @@ static bool __kmp_check_deps(kmp_int32 gtid, kmp_depnode_t *node,
433415
// TODO: Different algorithm for large dep_list ( > 10 ? )
434416
for (i = 0; i < ndeps; i++) {
435417
if (dep_list[i].base_addr != 0) {
418+
KMP_DEBUG_ASSERT(
419+
dep_list[i].flag == KMP_DEP_IN || dep_list[i].flag == KMP_DEP_OUT ||
420+
dep_list[i].flag == KMP_DEP_INOUT ||
421+
dep_list[i].flag == KMP_DEP_MTX || dep_list[i].flag == KMP_DEP_SET);
436422
for (int j = i + 1; j < ndeps; j++) {
437423
if (dep_list[i].base_addr == dep_list[j].base_addr) {
438-
dep_list[i].flags.in |= dep_list[j].flags.in;
439-
dep_list[i].flags.out |=
440-
(dep_list[j].flags.out ||
441-
(dep_list[i].flags.in && dep_list[j].flags.mtx) ||
442-
(dep_list[i].flags.mtx && dep_list[j].flags.in));
443-
dep_list[i].flags.mtx =
444-
dep_list[i].flags.mtx | dep_list[j].flags.mtx &&
445-
!dep_list[i].flags.out;
424+
if (dep_list[i].flag != dep_list[j].flag) {
425+
// two different dependences on same address work identical to OUT
426+
dep_list[i].flag = KMP_DEP_OUT;
427+
}
446428
dep_list[j].base_addr = 0; // Mark j element as void
447429
}
448430
}
449-
if (dep_list[i].flags.mtx) {
431+
if (dep_list[i].flag == KMP_DEP_MTX) {
450432
// limit number of mtx deps to MAX_MTX_DEPS per node
451433
if (n_mtxs < MAX_MTX_DEPS && task != NULL) {
452434
++n_mtxs;
453435
} else {
454-
dep_list[i].flags.in = 1; // downgrade mutexinoutset to inout
455-
dep_list[i].flags.out = 1;
456-
dep_list[i].flags.mtx = 0;
436+
dep_list[i].flag = KMP_DEP_OUT; // downgrade mutexinoutset to inout
457437
}
458438
}
459439
}
@@ -562,6 +542,8 @@ kmp_int32 __kmpc_omp_task_with_deps(ident_t *loc_ref, kmp_int32 gtid,
562542
ompt_deps[i].dependence_type = ompt_dependence_type_in;
563543
else if (dep_list[i].flags.mtx)
564544
ompt_deps[i].dependence_type = ompt_dependence_type_mutexinoutset;
545+
else if (dep_list[i].flags.set)
546+
ompt_deps[i].dependence_type = ompt_dependence_type_inoutset;
565547
}
566548
for (i = 0; i < ndeps_noalias; i++) {
567549
ompt_deps[ndeps + i].variable.ptr = (void *)noalias_dep_list[i].base_addr;
@@ -574,6 +556,8 @@ kmp_int32 __kmpc_omp_task_with_deps(ident_t *loc_ref, kmp_int32 gtid,
574556
else if (noalias_dep_list[i].flags.mtx)
575557
ompt_deps[ndeps + i].dependence_type =
576558
ompt_dependence_type_mutexinoutset;
559+
else if (noalias_dep_list[i].flags.set)
560+
ompt_deps[ndeps + i].dependence_type = ompt_dependence_type_inoutset;
577561
}
578562
ompt_callbacks.ompt_callback(ompt_callback_dependences)(
579563
&(new_taskdata->ompt_task_info.task_data), ompt_deps, ompt_ndeps);
@@ -723,6 +707,8 @@ void __kmpc_omp_wait_deps(ident_t *loc_ref, kmp_int32 gtid, kmp_int32 ndeps,
723707
else if (dep_list[i].flags.mtx)
724708
ompt_deps[ndeps + i].dependence_type =
725709
ompt_dependence_type_mutexinoutset;
710+
else if (dep_list[i].flags.set)
711+
ompt_deps[ndeps + i].dependence_type = ompt_dependence_type_inoutset;
726712
}
727713
for (i = 0; i < ndeps_noalias; i++) {
728714
ompt_deps[ndeps + i].variable.ptr = (void *)noalias_dep_list[i].base_addr;
@@ -735,6 +721,8 @@ void __kmpc_omp_wait_deps(ident_t *loc_ref, kmp_int32 gtid, kmp_int32 ndeps,
735721
else if (noalias_dep_list[i].flags.mtx)
736722
ompt_deps[ndeps + i].dependence_type =
737723
ompt_dependence_type_mutexinoutset;
724+
else if (noalias_dep_list[i].flags.set)
725+
ompt_deps[ndeps + i].dependence_type = ompt_dependence_type_inoutset;
738726
}
739727
ompt_callbacks.ompt_callback(ompt_callback_dependences)(
740728
taskwait_task_data, ompt_deps, ompt_ndeps);

openmp/runtime/src/kmp_taskdeps.h

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -58,8 +58,8 @@ static inline void __kmp_dephash_free_entries(kmp_info_t *thread,
5858
kmp_dephash_entry_t *next;
5959
for (kmp_dephash_entry_t *entry = h->buckets[i]; entry; entry = next) {
6060
next = entry->next_in_bucket;
61-
__kmp_depnode_list_free(thread, entry->last_ins);
62-
__kmp_depnode_list_free(thread, entry->last_mtxs);
61+
__kmp_depnode_list_free(thread, entry->last_set);
62+
__kmp_depnode_list_free(thread, entry->prev_set);
6363
__kmp_node_deref(thread, entry->last_out);
6464
if (entry->mtx_lock) {
6565
__kmp_destroy_lock(entry->mtx_lock);

openmp/runtime/test/tasking/hidden_helper_task/common.h

Lines changed: 8 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -13,11 +13,14 @@ using kmp_intptr_t = intptr_t;
1313
typedef struct kmp_depend_info {
1414
kmp_intptr_t base_addr;
1515
size_t len;
16-
struct {
17-
bool in : 1;
18-
bool out : 1;
19-
bool mtx : 1;
20-
} flags;
16+
union {
17+
unsigned char flag;
18+
struct {
19+
bool in : 1;
20+
bool out : 1;
21+
bool mtx : 1;
22+
} flags;
23+
};
2124
} kmp_depend_info_t;
2225

2326
typedef union kmp_cmplrdata {

openmp/runtime/test/tasking/hidden_helper_task/depend.cpp

Lines changed: 4 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -65,7 +65,7 @@ int main(int argc, char *argv[]) {
6565

6666
kmp_depend_info_t depinfo1;
6767
depinfo1.base_addr = reinterpret_cast<intptr_t>(&data);
68-
depinfo1.flags.out = 1;
68+
depinfo1.flag = 2; // OUT
6969
depinfo1.len = 4;
7070

7171
__kmpc_omp_task_with_deps(nullptr, gtid, task1, 1, &depinfo1, 0, nullptr);
@@ -80,8 +80,7 @@ int main(int argc, char *argv[]) {
8080

8181
kmp_depend_info_t depinfo2;
8282
depinfo2.base_addr = reinterpret_cast<intptr_t>(&data);
83-
depinfo2.flags.in = 1;
84-
depinfo2.flags.out = 1;
83+
depinfo2.flag = 3; // INOUT
8584
depinfo2.len = 4;
8685

8786
__kmpc_omp_task_with_deps(nullptr, gtid, task2, 1, &depinfo2, 0, nullptr);
@@ -96,8 +95,7 @@ int main(int argc, char *argv[]) {
9695

9796
kmp_depend_info_t depinfo3;
9897
depinfo3.base_addr = reinterpret_cast<intptr_t>(&data);
99-
depinfo3.flags.in = 1;
100-
depinfo3.flags.out = 1;
98+
depinfo3.flag = 3; // INOUT
10199
depinfo3.len = 4;
102100

103101
__kmpc_omp_task_with_deps(nullptr, gtid, task3, 1, &depinfo3, 0, nullptr);
@@ -112,8 +110,7 @@ int main(int argc, char *argv[]) {
112110

113111
kmp_depend_info_t depinfo4;
114112
depinfo4.base_addr = reinterpret_cast<intptr_t>(&data);
115-
depinfo4.flags.in = 1;
116-
depinfo4.flags.out = 1;
113+
depinfo4.flag = 3; // INOUT
117114
depinfo4.len = 4;
118115

119116
__kmpc_omp_task_with_deps(nullptr, gtid, task4, 1, &depinfo4, 0, nullptr);

openmp/runtime/test/tasking/hidden_helper_task/gtid.cpp

Lines changed: 3 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -81,8 +81,7 @@ int main(int argc, char *argv[]) {
8181

8282
kmp_depend_info_t depinfo1;
8383
depinfo1.base_addr = reinterpret_cast<intptr_t>(&depvar);
84-
depinfo1.flags.in = 1;
85-
depinfo1.flags.out = 1;
84+
depinfo1.flag = 3; // INOUT
8685
depinfo1.len = 4;
8786

8887
__kmpc_omp_task_with_deps(nullptr, gtid, task1, 1, &depinfo1, 0, nullptr);
@@ -96,8 +95,7 @@ int main(int argc, char *argv[]) {
9695

9796
kmp_depend_info_t depinfo2;
9897
depinfo2.base_addr = reinterpret_cast<intptr_t>(&depvar);
99-
depinfo2.flags.in = 1;
100-
depinfo2.flags.out = 1;
98+
depinfo2.flag = 3; // INOUT
10199
depinfo2.len = 4;
102100

103101
__kmpc_omp_task_with_deps(nullptr, gtid, task2, 1, &depinfo2, 0, nullptr);
@@ -111,8 +109,7 @@ int main(int argc, char *argv[]) {
111109

112110
kmp_depend_info_t depinfo3;
113111
depinfo3.base_addr = reinterpret_cast<intptr_t>(&depvar);
114-
depinfo3.flags.in = 1;
115-
depinfo3.flags.out = 1;
112+
depinfo3.flag = 3; // INOUT
116113
depinfo3.len = 4;
117114

118115
__kmpc_omp_task_with_deps(nullptr, gtid, task3, 1, &depinfo3, 0, nullptr);

0 commit comments

Comments
 (0)