Skip to content

Fix the leak of fragments for persistent sends (issue #6565) #6621

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 1 commit into from
May 3, 2019
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
6 changes: 3 additions & 3 deletions ompi/mca/pml/ob1/pml_ob1_rdmafrag.h
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,7 @@
* Copyright (c) 2004-2005 The Trustees of Indiana University and Indiana
* University Research and Technology
* Corporation. All rights reserved.
* Copyright (c) 2004-2013 The University of Tennessee and The University
* Copyright (c) 2004-2019 The University of Tennessee and The University
* of Tennessee Research Foundation. All rights
* reserved.
* Copyright (c) 2004-2005 High Performance Computing Center Stuttgart,
Expand Down Expand Up @@ -46,7 +46,8 @@ struct mca_pml_ob1_rdma_frag_t {
mca_bml_base_btl_t *rdma_bml;
mca_pml_ob1_hdr_t rdma_hdr;
mca_pml_ob1_rdma_state_t rdma_state;
size_t rdma_length;
size_t rdma_length; /* how much the fragment will transfer */
opal_atomic_size_t rdma_bytes_remaining; /* how much is left to be transferred */
void *rdma_req;
uint32_t retries;
mca_pml_ob1_rdma_frag_callback_t cbfunc;
Expand All @@ -71,7 +72,6 @@ OBJ_CLASS_DECLARATION(mca_pml_ob1_rdma_frag_t);

#define MCA_PML_OB1_RDMA_FRAG_RETURN(frag) \
do { \
/* return fragment */ \
if (frag->local_handle) { \
mca_bml_base_deregister_mem (frag->rdma_bml, frag->local_handle); \
frag->local_handle = NULL; \
Expand Down
4 changes: 0 additions & 4 deletions ompi/mca/pml/ob1/pml_ob1_recvfrag.c
Original file line number Diff line number Diff line change
Expand Up @@ -577,10 +577,6 @@ void mca_pml_ob1_recv_frag_callback_ack(mca_btl_base_module_t* btl,
* then throttle sends */
if(hdr->hdr_common.hdr_flags & MCA_PML_OB1_HDR_FLAGS_NORDMA) {
if (NULL != sendreq->rdma_frag) {
if (NULL != sendreq->rdma_frag->local_handle) {
mca_bml_base_deregister_mem (sendreq->req_rdma[0].bml_btl, sendreq->rdma_frag->local_handle);
sendreq->rdma_frag->local_handle = NULL;
}
MCA_PML_OB1_RDMA_FRAG_RETURN(sendreq->rdma_frag);
sendreq->rdma_frag = NULL;
}
Expand Down
10 changes: 7 additions & 3 deletions ompi/mca/pml/ob1/pml_ob1_recvreq.c
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,7 @@
* Copyright (c) 2004-2005 The Trustees of Indiana University and Indiana
* University Research and Technology
* Corporation. All rights reserved.
* Copyright (c) 2004-2018 The University of Tennessee and The University
* Copyright (c) 2004-2019 The University of Tennessee and The University
* of Tennessee Research Foundation. All rights
* reserved.
* Copyright (c) 2004-2008 High Performance Computing Center Stuttgart,
Expand Down Expand Up @@ -319,7 +319,12 @@ static int mca_pml_ob1_recv_request_ack(
return OMPI_SUCCESS;
}

/* let know to shedule function there is no need to put ACK flag */
/* let know to shedule function there is no need to put ACK flag. If not all message went over
* RDMA then we cancel the GET protocol in order to switch back to send/recv. In this case send
* back the remote send request, the peer kept a poointer to the frag locally. In the future we
* might want to cancel the fragment itself, in which case we will have to send back the remote
* fragment instead of the remote request.
*/
recvreq->req_ack_sent = true;
return mca_pml_ob1_recv_request_ack_send(proc, hdr->hdr_src_req.lval,
recvreq, recvreq->req_send_offset, 0,
Expand Down Expand Up @@ -658,7 +663,6 @@ void mca_pml_ob1_recv_request_progress_rget( mca_pml_ob1_recv_request_t* recvreq
int rc;

prev_sent = offset = 0;
bytes_remaining = hdr->hdr_rndv.hdr_msg_length;
recvreq->req_recv.req_bytes_packed = hdr->hdr_rndv.hdr_msg_length;
recvreq->req_send_offset = 0;
recvreq->req_rdma_offset = 0;
Expand Down
19 changes: 12 additions & 7 deletions ompi/mca/pml/ob1/pml_ob1_sendreq.c
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,7 @@
* Copyright (c) 2004-2005 The Trustees of Indiana University and Indiana
* University Research and Technology
* Corporation. All rights reserved.
* Copyright (c) 2004-2018 The University of Tennessee and The University
* Copyright (c) 2004-2019 The University of Tennessee and The University
* of Tennessee Research Foundation. All rights
* reserved.
* Copyright (c) 2004-2008 High Performance Computing Center Stuttgart,
Expand Down Expand Up @@ -41,7 +41,6 @@
#include "ompi/mca/bml/base/base.h"
#include "ompi/memchecker.h"


OBJ_CLASS_INSTANCE(mca_pml_ob1_send_range_t, opal_free_list_item_t,
NULL, NULL);

Expand Down Expand Up @@ -148,10 +147,7 @@ static void mca_pml_ob1_send_request_destruct(mca_pml_ob1_send_request_t* req)
{
OBJ_DESTRUCT(&req->req_send_ranges);
OBJ_DESTRUCT(&req->req_send_range_lock);
if (req->rdma_frag) {
MCA_PML_OB1_RDMA_FRAG_RETURN(req->rdma_frag);
req->rdma_frag = NULL;
}
assert( NULL == req->rdma_frag );
}

OBJ_CLASS_INSTANCE( mca_pml_ob1_send_request_t,
Expand Down Expand Up @@ -262,12 +258,20 @@ mca_pml_ob1_rget_completion (mca_pml_ob1_rdma_frag_t *frag, int64_t rdma_length)
{
mca_pml_ob1_send_request_t *sendreq = (mca_pml_ob1_send_request_t *) frag->rdma_req;
mca_bml_base_btl_t *bml_btl = frag->rdma_bml;
size_t frag_remaining;

/* count bytes of user data actually delivered and check for request completion */
if (OPAL_LIKELY(0 < rdma_length)) {
OPAL_THREAD_ADD_FETCH_SIZE_T(&sendreq->req_bytes_delivered, (size_t) rdma_length);
frag_remaining = OPAL_THREAD_SUB_FETCH_SIZE_T(&frag->rdma_bytes_remaining, (size_t)rdma_length);
SPC_USER_OR_MPI(sendreq->req_send.req_base.req_ompi.req_status.MPI_TAG, (ompi_spc_value_t)rdma_length,
OMPI_SPC_BYTES_SENT_USER, OMPI_SPC_BYTES_SENT_MPI);

if( 0 == frag_remaining ) { /* this frag is now completed. Update the request and be done */
OPAL_THREAD_ADD_FETCH_SIZE_T(&sendreq->req_bytes_delivered, frag->rdma_length);
if( sendreq->rdma_frag == frag )
sendreq->rdma_frag = NULL;
MCA_PML_OB1_RDMA_FRAG_RETURN(frag);
}
}

send_request_pml_complete_check(sendreq);
Expand Down Expand Up @@ -701,6 +705,7 @@ int mca_pml_ob1_send_request_start_rdma( mca_pml_ob1_send_request_t* sendreq,
frag->rdma_req = sendreq;
frag->rdma_bml = bml_btl;
frag->rdma_length = size;
frag->rdma_bytes_remaining = size;
frag->cbfunc = mca_pml_ob1_rget_completion;
/* do not store the local handle in the fragment. it will be released by mca_pml_ob1_free_rdma_resources */

Expand Down
5 changes: 1 addition & 4 deletions ompi/mca/pml/ob1/pml_ob1_sendreq.h
Original file line number Diff line number Diff line change
Expand Up @@ -216,10 +216,7 @@ static inline void mca_pml_ob1_send_request_fini (mca_pml_ob1_send_request_t *se
{
/* Let the base handle the reference counts */
MCA_PML_BASE_SEND_REQUEST_FINI((&(sendreq)->req_send));
if (sendreq->rdma_frag) {
MCA_PML_OB1_RDMA_FRAG_RETURN (sendreq->rdma_frag);
sendreq->rdma_frag = NULL;
}
assert( NULL == sendreq->rdma_frag );
}

/*
Expand Down