@@ -149,9 +149,6 @@ static kmp_dephash_t *__kmp_dephash_create(kmp_info_t *thread,
149
149
return h;
150
150
}
151
151
152
- #define ENTRY_LAST_INS 0
153
- #define ENTRY_LAST_MTXS 1
154
-
155
152
static kmp_dephash_entry *__kmp_dephash_find (kmp_info_t *thread,
156
153
kmp_dephash_t **hash,
157
154
kmp_intptr_t addr) {
@@ -178,9 +175,9 @@ static kmp_dephash_entry *__kmp_dephash_find(kmp_info_t *thread,
178
175
#endif
179
176
entry->addr = addr;
180
177
entry->last_out = NULL ;
181
- entry->last_ins = NULL ;
182
- entry->last_mtxs = NULL ;
183
- entry->last_flag = ENTRY_LAST_INS ;
178
+ entry->last_set = NULL ;
179
+ entry->prev_set = NULL ;
180
+ entry->last_flag = 0 ;
184
181
entry->mtx_lock = NULL ;
185
182
entry->next_in_bucket = h->buckets [bucket];
186
183
h->buckets [bucket] = entry;
@@ -313,96 +310,81 @@ __kmp_process_deps(kmp_int32 gtid, kmp_depnode_t *node, kmp_dephash_t **hash,
313
310
kmp_dephash_entry_t *info =
314
311
__kmp_dephash_find (thread, hash, dep->base_addr );
315
312
kmp_depnode_t *last_out = info->last_out ;
316
- kmp_depnode_list_t *last_ins = info->last_ins ;
317
- kmp_depnode_list_t *last_mtxs = info->last_mtxs ;
318
-
319
- if (dep->flags .out ) { // out --> clean lists of ins and mtxs if any
320
- if (last_ins || last_mtxs) {
321
- if (info->last_flag == ENTRY_LAST_INS) { // INS were last
322
- npredecessors +=
323
- __kmp_depnode_link_successor (gtid, thread, task, node, last_ins);
324
- } else { // MTXS were last
325
- npredecessors +=
326
- __kmp_depnode_link_successor (gtid, thread, task, node, last_mtxs);
327
- }
328
- __kmp_depnode_list_free (thread, last_ins);
329
- __kmp_depnode_list_free (thread, last_mtxs);
330
- info->last_ins = NULL ;
331
- info->last_mtxs = NULL ;
313
+ kmp_depnode_list_t *last_set = info->last_set ;
314
+ kmp_depnode_list_t *prev_set = info->prev_set ;
315
+
316
+ if (dep->flags .out ) { // out or inout --> clean lists if any
317
+ if (last_set) {
318
+ npredecessors +=
319
+ __kmp_depnode_link_successor (gtid, thread, task, node, last_set);
320
+ __kmp_depnode_list_free (thread, last_set);
321
+ __kmp_depnode_list_free (thread, prev_set);
322
+ info->last_set = NULL ;
323
+ info->prev_set = NULL ;
324
+ info->last_flag = 0 ; // no sets in this dephash entry
332
325
} else {
333
326
npredecessors +=
334
327
__kmp_depnode_link_successor (gtid, thread, task, node, last_out);
335
328
}
336
329
__kmp_node_deref (thread, last_out);
337
- if (dep_barrier) {
330
+ if (!dep_barrier) {
331
+ info->last_out = __kmp_node_ref (node);
332
+ } else {
338
333
// if this is a sync point in the serial sequence, then the previous
339
334
// outputs are guaranteed to be completed after the execution of this
340
335
// task so the previous output nodes can be cleared.
341
336
info->last_out = NULL ;
342
- } else {
343
- info->last_out = __kmp_node_ref (node);
344
337
}
345
- } else if (dep->flags .in ) {
346
- // in --> link node to either last_out or last_mtxs, clean earlier deps
347
- if (last_mtxs) {
348
- npredecessors +=
349
- __kmp_depnode_link_successor (gtid, thread, task, node, last_mtxs);
350
- __kmp_node_deref (thread, last_out);
351
- info->last_out = NULL ;
352
- if (info->last_flag == ENTRY_LAST_MTXS && last_ins) { // MTXS were last
353
- // clean old INS before creating new list
354
- __kmp_depnode_list_free (thread, last_ins);
355
- info->last_ins = NULL ;
356
- }
357
- } else {
338
+ } else { // either IN or MTX or SET
339
+ if (info->last_flag == 0 || info->last_flag == dep->flag ) {
340
+ // last_set either didn't exist or of same dep kind
358
341
// link node as successor of the last_out if any
359
342
npredecessors +=
360
343
__kmp_depnode_link_successor (gtid, thread, task, node, last_out);
361
- }
362
- info->last_flag = ENTRY_LAST_INS;
363
- info->last_ins = __kmp_add_node (thread, info->last_ins , node);
364
- } else {
365
- KMP_DEBUG_ASSERT (dep->flags .mtx == 1 );
366
- // mtx --> link node to either last_out or last_ins, clean earlier deps
367
- if (last_ins) {
344
+ // link node as successor of all nodes in the prev_set if any
345
+ npredecessors +=
346
+ __kmp_depnode_link_successor (gtid, thread, task, node, prev_set);
347
+ } else { // last_set is of different dep kind, make it prev_set
348
+ // link node as successor of all nodes in the last_set
368
349
npredecessors +=
369
- __kmp_depnode_link_successor (gtid, thread, task, node, last_ins);
350
+ __kmp_depnode_link_successor (gtid, thread, task, node, last_set);
351
+ // clean last_out if any
370
352
__kmp_node_deref (thread, last_out);
371
353
info->last_out = NULL ;
372
- if (info->last_flag == ENTRY_LAST_INS && last_mtxs) { // INS were last
373
- // clean old MTXS before creating new list
374
- __kmp_depnode_list_free (thread, last_mtxs);
375
- info->last_mtxs = NULL ;
376
- }
377
- } else {
378
- // link node as successor of the last_out if any
379
- npredecessors +=
380
- __kmp_depnode_link_successor (gtid, thread, task, node, last_out);
381
- }
382
- info->last_flag = ENTRY_LAST_MTXS;
383
- info->last_mtxs = __kmp_add_node (thread, info->last_mtxs , node);
384
- if (info->mtx_lock == NULL ) {
385
- info->mtx_lock = (kmp_lock_t *)__kmp_allocate (sizeof (kmp_lock_t ));
386
- __kmp_init_lock (info->mtx_lock );
354
+ // clean prev_set if any
355
+ __kmp_depnode_list_free (thread, prev_set);
356
+ // move last_set to prev_set, new last_set will be allocated
357
+ info->prev_set = last_set;
358
+ info->last_set = NULL ;
387
359
}
388
- KMP_DEBUG_ASSERT (node->dn .mtx_num_locks < MAX_MTX_DEPS);
389
- kmp_int32 m;
390
- // Save lock in node's array
391
- for (m = 0 ; m < MAX_MTX_DEPS; ++m) {
392
- // sort pointers in decreasing order to avoid potential livelock
393
- if (node->dn .mtx_locks [m] < info->mtx_lock ) {
394
- KMP_DEBUG_ASSERT (node->dn .mtx_locks [node->dn .mtx_num_locks ] == NULL );
395
- for (int n = node->dn .mtx_num_locks ; n > m; --n) {
396
- // shift right all lesser non-NULL pointers
397
- KMP_DEBUG_ASSERT (node->dn .mtx_locks [n - 1 ] != NULL );
398
- node->dn .mtx_locks [n] = node->dn .mtx_locks [n - 1 ];
360
+ info->last_flag = dep->flag ; // store dep kind of the last_set
361
+ info->last_set = __kmp_add_node (thread, info->last_set , node);
362
+
363
+ // check if we are processing MTX dependency
364
+ if (dep->flag == KMP_DEP_MTX) {
365
+ if (info->mtx_lock == NULL ) {
366
+ info->mtx_lock = (kmp_lock_t *)__kmp_allocate (sizeof (kmp_lock_t ));
367
+ __kmp_init_lock (info->mtx_lock );
368
+ }
369
+ KMP_DEBUG_ASSERT (node->dn .mtx_num_locks < MAX_MTX_DEPS);
370
+ kmp_int32 m;
371
+ // Save lock in node's array
372
+ for (m = 0 ; m < MAX_MTX_DEPS; ++m) {
373
+ // sort pointers in decreasing order to avoid potential livelock
374
+ if (node->dn .mtx_locks [m] < info->mtx_lock ) {
375
+ KMP_DEBUG_ASSERT (!node->dn .mtx_locks [node->dn .mtx_num_locks ]);
376
+ for (int n = node->dn .mtx_num_locks ; n > m; --n) {
377
+ // shift right all lesser non-NULL pointers
378
+ KMP_DEBUG_ASSERT (node->dn .mtx_locks [n - 1 ] != NULL );
379
+ node->dn .mtx_locks [n] = node->dn .mtx_locks [n - 1 ];
380
+ }
381
+ node->dn .mtx_locks [m] = info->mtx_lock ;
382
+ break ;
399
383
}
400
- node->dn .mtx_locks [m] = info->mtx_lock ;
401
- break ;
402
384
}
385
+ KMP_DEBUG_ASSERT (m < MAX_MTX_DEPS); // must break from loop
386
+ node->dn .mtx_num_locks ++;
403
387
}
404
- KMP_DEBUG_ASSERT (m < MAX_MTX_DEPS); // must break from loop
405
- node->dn .mtx_num_locks ++;
406
388
}
407
389
}
408
390
KA_TRACE (30 , (" __kmp_process_deps<%d>: T#%d found %d predecessors\n " , filter,
@@ -433,27 +415,25 @@ static bool __kmp_check_deps(kmp_int32 gtid, kmp_depnode_t *node,
433
415
// TODO: Different algorithm for large dep_list ( > 10 ? )
434
416
for (i = 0 ; i < ndeps; i++) {
435
417
if (dep_list[i].base_addr != 0 ) {
418
+ KMP_DEBUG_ASSERT (
419
+ dep_list[i].flag == KMP_DEP_IN || dep_list[i].flag == KMP_DEP_OUT ||
420
+ dep_list[i].flag == KMP_DEP_INOUT ||
421
+ dep_list[i].flag == KMP_DEP_MTX || dep_list[i].flag == KMP_DEP_SET);
436
422
for (int j = i + 1 ; j < ndeps; j++) {
437
423
if (dep_list[i].base_addr == dep_list[j].base_addr ) {
438
- dep_list[i].flags .in |= dep_list[j].flags .in ;
439
- dep_list[i].flags .out |=
440
- (dep_list[j].flags .out ||
441
- (dep_list[i].flags .in && dep_list[j].flags .mtx ) ||
442
- (dep_list[i].flags .mtx && dep_list[j].flags .in ));
443
- dep_list[i].flags .mtx =
444
- dep_list[i].flags .mtx | dep_list[j].flags .mtx &&
445
- !dep_list[i].flags .out ;
424
+ if (dep_list[i].flag != dep_list[j].flag ) {
425
+ // two different dependences on same address work identical to OUT
426
+ dep_list[i].flag = KMP_DEP_OUT;
427
+ }
446
428
dep_list[j].base_addr = 0 ; // Mark j element as void
447
429
}
448
430
}
449
- if (dep_list[i].flags . mtx ) {
431
+ if (dep_list[i].flag == KMP_DEP_MTX ) {
450
432
// limit number of mtx deps to MAX_MTX_DEPS per node
451
433
if (n_mtxs < MAX_MTX_DEPS && task != NULL ) {
452
434
++n_mtxs;
453
435
} else {
454
- dep_list[i].flags .in = 1 ; // downgrade mutexinoutset to inout
455
- dep_list[i].flags .out = 1 ;
456
- dep_list[i].flags .mtx = 0 ;
436
+ dep_list[i].flag = KMP_DEP_OUT; // downgrade mutexinoutset to inout
457
437
}
458
438
}
459
439
}
@@ -562,6 +542,8 @@ kmp_int32 __kmpc_omp_task_with_deps(ident_t *loc_ref, kmp_int32 gtid,
562
542
ompt_deps[i].dependence_type = ompt_dependence_type_in;
563
543
else if (dep_list[i].flags .mtx )
564
544
ompt_deps[i].dependence_type = ompt_dependence_type_mutexinoutset;
545
+ else if (dep_list[i].flags .set )
546
+ ompt_deps[i].dependence_type = ompt_dependence_type_inoutset;
565
547
}
566
548
for (i = 0 ; i < ndeps_noalias; i++) {
567
549
ompt_deps[ndeps + i].variable .ptr = (void *)noalias_dep_list[i].base_addr ;
@@ -574,6 +556,8 @@ kmp_int32 __kmpc_omp_task_with_deps(ident_t *loc_ref, kmp_int32 gtid,
574
556
else if (noalias_dep_list[i].flags .mtx )
575
557
ompt_deps[ndeps + i].dependence_type =
576
558
ompt_dependence_type_mutexinoutset;
559
+ else if (noalias_dep_list[i].flags .set )
560
+ ompt_deps[ndeps + i].dependence_type = ompt_dependence_type_inoutset;
577
561
}
578
562
ompt_callbacks.ompt_callback (ompt_callback_dependences)(
579
563
&(new_taskdata->ompt_task_info .task_data ), ompt_deps, ompt_ndeps);
@@ -723,6 +707,8 @@ void __kmpc_omp_wait_deps(ident_t *loc_ref, kmp_int32 gtid, kmp_int32 ndeps,
723
707
else if (dep_list[i].flags .mtx )
724
708
ompt_deps[ndeps + i].dependence_type =
725
709
ompt_dependence_type_mutexinoutset;
710
+ else if (dep_list[i].flags .set )
711
+ ompt_deps[ndeps + i].dependence_type = ompt_dependence_type_inoutset;
726
712
}
727
713
for (i = 0 ; i < ndeps_noalias; i++) {
728
714
ompt_deps[ndeps + i].variable .ptr = (void *)noalias_dep_list[i].base_addr ;
@@ -735,6 +721,8 @@ void __kmpc_omp_wait_deps(ident_t *loc_ref, kmp_int32 gtid, kmp_int32 ndeps,
735
721
else if (noalias_dep_list[i].flags .mtx )
736
722
ompt_deps[ndeps + i].dependence_type =
737
723
ompt_dependence_type_mutexinoutset;
724
+ else if (noalias_dep_list[i].flags .set )
725
+ ompt_deps[ndeps + i].dependence_type = ompt_dependence_type_inoutset;
738
726
}
739
727
ompt_callbacks.ompt_callback (ompt_callback_dependences)(
740
728
taskwait_task_data, ompt_deps, ompt_ndeps);
0 commit comments