Skip to content

Commit ced245d

Browse files
authored
Merge pull request #2590 from jjhursey/topic/osc-pt2pt-1-thread-fixes
Topic/osc pt2pt 1 thread fixes
2 parents d8c1a3d + eec1d5b commit ced245d

File tree

3 files changed

+39
-4
lines changed

3 files changed

+39
-4
lines changed

ompi/mca/osc/pt2pt/osc_pt2pt_active_target.c

Lines changed: 7 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -10,7 +10,7 @@
1010
* All rights reserved.
1111
* Copyright (c) 2007-2016 Los Alamos National Security, LLC. All rights
1212
* reserved.
13-
* Copyright (c) 2010 IBM Corporation. All rights reserved.
13+
* Copyright (c) 2010-2016 IBM Corporation. All rights reserved.
1414
* Copyright (c) 2012-2013 Sandia National Laboratories. All rights reserved.
1515
* Copyright (c) 2015 Research Organization for Information Science
1616
* and Technology (RIST). All rights reserved.
@@ -227,6 +227,12 @@ int ompi_osc_pt2pt_start (ompi_group_t *group, int assert, ompi_win_t *win)
227227
/* haven't processed any post messages yet */
228228
sync->sync_expected = sync->num_peers;
229229

230+
/* If the previous epoch was from Fence, then eager_send_active is still
231+
* set to true at this time, but it shoulnd't be true until we get our
232+
* incoming Posts. So reset to 'false' for this new epoch.
233+
*/
234+
sync->eager_send_active = false;
235+
230236
OPAL_OUTPUT_VERBOSE((50, ompi_osc_base_framework.framework_output,
231237
"ompi_osc_pt2pt_start entering with group size %d...",
232238
sync->num_peers));

ompi/mca/osc/pt2pt/osc_pt2pt_comm.c

Lines changed: 21 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -15,6 +15,7 @@
1515
* Copyright (c) 2015 Research Organization for Information Science
1616
* and Technology (RIST). All rights reserved.
1717
* Copyright (c) 2016 FUJITSU LIMITED. All rights reserved.
18+
* Copyright (c) 2016 IBM Corporation. All rights reserved.
1819
* $COPYRIGHT$
1920
*
2021
* Additional copyrights may follow
@@ -336,7 +337,16 @@ static inline int ompi_osc_pt2pt_put_w_req (const void *origin_addr, int origin_
336337

337338
if (is_long_msg) {
338339
/* wait for eager sends to be active before starting a long put */
339-
ompi_osc_pt2pt_sync_wait_expected (pt2pt_sync);
340+
if (pt2pt_sync->type == OMPI_OSC_PT2PT_SYNC_TYPE_LOCK) {
341+
OPAL_THREAD_LOCK(&pt2pt_sync->lock);
342+
ompi_osc_pt2pt_peer_t *peer = ompi_osc_pt2pt_peer_lookup (module, target);
343+
while (!(peer->flags & OMPI_OSC_PT2PT_PEER_FLAG_EAGER)) {
344+
opal_condition_wait(&pt2pt_sync->cond, &pt2pt_sync->lock);
345+
}
346+
OPAL_THREAD_UNLOCK(&pt2pt_sync->lock);
347+
} else {
348+
ompi_osc_pt2pt_sync_wait_expected (pt2pt_sync);
349+
}
340350
}
341351

342352
OPAL_OUTPUT_VERBOSE((50, ompi_osc_base_framework.framework_output,
@@ -495,7 +505,16 @@ ompi_osc_pt2pt_accumulate_w_req (const void *origin_addr, int origin_count,
495505

496506
if (is_long_msg) {
497507
/* wait for synchronization before posting a long message */
498-
ompi_osc_pt2pt_sync_wait_expected (pt2pt_sync);
508+
if (pt2pt_sync->type == OMPI_OSC_PT2PT_SYNC_TYPE_LOCK) {
509+
OPAL_THREAD_LOCK(&pt2pt_sync->lock);
510+
ompi_osc_pt2pt_peer_t *peer = ompi_osc_pt2pt_peer_lookup (module, target);
511+
while (!(peer->flags & OMPI_OSC_PT2PT_PEER_FLAG_EAGER)) {
512+
opal_condition_wait(&pt2pt_sync->cond, &pt2pt_sync->lock);
513+
}
514+
OPAL_THREAD_UNLOCK(&pt2pt_sync->lock);
515+
} else {
516+
ompi_osc_pt2pt_sync_wait_expected (pt2pt_sync);
517+
}
499518
}
500519

501520
header = (ompi_osc_pt2pt_header_acc_t*) ptr;

ompi/mca/osc/pt2pt/osc_pt2pt_passive_target.c

Lines changed: 11 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -10,7 +10,7 @@
1010
* All rights reserved.
1111
* Copyright (c) 2007-2016 Los Alamos National Security, LLC. All rights
1212
* reserved.
13-
* Copyright (c) 2010 IBM Corporation. All rights reserved.
13+
* Copyright (c) 2010-2016 IBM Corporation. All rights reserved.
1414
* Copyright (c) 2012-2013 Sandia National Laboratories. All rights reserved.
1515
* Copyright (c) 2015 Intel, Inc. All rights reserved.
1616
* Copyright (c) 2015-2016 Research Organization for Information Science
@@ -421,6 +421,16 @@ static int ompi_osc_pt2pt_unlock_internal (int target, ompi_win_t *win)
421421
/* wait for unlock acks. this signals remote completion of fragments */
422422
ompi_osc_pt2pt_sync_wait_expected (lock);
423423

424+
/* It is possible for the unlock to finish too early before the data
425+
* is actually present in the recv buffer (for non-contiguous datatypes)
426+
* So make sure to wait for all of the fragments to arrive.
427+
*/
428+
OPAL_THREAD_LOCK(&module->lock);
429+
while (module->outgoing_frag_count < module->outgoing_frag_signal_count) {
430+
opal_condition_wait(&module->cond, &module->lock);
431+
}
432+
OPAL_THREAD_UNLOCK(&module->lock);
433+
424434
OPAL_OUTPUT_VERBOSE((25, ompi_osc_base_framework.framework_output,
425435
"ompi_osc_pt2pt_unlock: unlock of %d complete", target));
426436
} else {

0 commit comments

Comments
 (0)