Skip to content

Commit f15d620

Browse files
Merge pull request #4832 from ggouaillardet/topic/vader_process_vm
btl/vader: handle unexpected short read/write in process_vm_{read,write}v
2 parents 5ed2fc2 + 9fedf28 commit f15d620

File tree

2 files changed

+45
-12
lines changed

2 files changed

+45
-12
lines changed

opal/mca/btl/vader/btl_vader_get.c

Lines changed: 31 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -2,6 +2,8 @@
22
/*
33
* Copyright (c) 2010-2014 Los Alamos National Security, LLC. All rights
44
* reserved.
5+
* Copyright (c) 2018 Research Organization for Information Science
6+
* and Technology (RIST). All rights reserved.
57
* $COPYRIGHT$
68
*
79
* Additional copyrights may follow
@@ -23,6 +25,7 @@
2325
#include "opal/sys/cma.h"
2426
#endif /* OPAL_CMA_NEED_SYSCALL_DEFS */
2527

28+
2629
#endif
2730

2831
/**
@@ -71,11 +74,34 @@ int mca_btl_vader_get_cma (mca_btl_base_module_t *btl, mca_btl_base_endpoint_t *
7174
struct iovec dst_iov = {.iov_base = local_address, .iov_len = size};
7275
ssize_t ret;
7376

74-
ret = process_vm_readv (endpoint->segment_data.other.seg_ds->seg_cpid, &dst_iov, 1, &src_iov, 1, 0);
75-
if (ret != (ssize_t)size) {
76-
opal_output(0, "Read %ld, expected %lu, errno = %d\n", (long)ret, (unsigned long)size, errno);
77-
return OPAL_ERROR;
78-
}
77+
/*
78+
* According to the man page :
79+
* "On success, process_vm_readv() returns the number of bytes read and
80+
* process_vm_writev() returns the number of bytes written. This return
81+
* value may be less than the total number of requested bytes, if a
82+
* partial read/write occurred. (Partial transfers apply at the
83+
* granularity of iovec elements. These system calls won't perform a
84+
* partial transfer that splits a single iovec element.)".
85+
* So since we use a single iovec element, the returned size should either
86+
* be 0 or size, and the do loop should not be needed here.
87+
* We tried on various Linux kernels with size > 2 GB, and surprisingly,
88+
* the returned value is always 0x7ffff000 (fwiw, it happens to be the size
89+
* of the larger number of pages that fits a signed 32 bits integer).
90+
* We do not know whether this is a bug from the kernel, the libc or even
91+
* the man page, but for the time being, we do as is process_vm_readv() could
92+
* return any value.
93+
*/
94+
do {
95+
ret = process_vm_readv (endpoint->segment_data.other.seg_ds->seg_cpid, &dst_iov, 1, &src_iov, 1, 0);
96+
if (0 > ret) {
97+
opal_output(0, "Read %ld, expected %lu, errno = %d\n", (long)ret, (unsigned long)size, errno);
98+
return OPAL_ERROR;
99+
}
100+
src_iov.iov_base = (void *)((char *)src_iov.iov_base + ret);
101+
src_iov.iov_len -= ret;
102+
dst_iov.iov_base = (void *)((char *)dst_iov.iov_base + ret);
103+
dst_iov.iov_len -= ret;
104+
} while (0 < src_iov.iov_len);
79105

80106
/* always call the callback function */
81107
cbfunc (btl, endpoint, local_address, local_handle, cbcontext, cbdata, OPAL_SUCCESS);

opal/mca/btl/vader/btl_vader_put.c

Lines changed: 14 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -2,8 +2,8 @@
22
/*
33
* Copyright (c) 2010-2014 Los Alamos National Security, LLC. All rights
44
* reserved.
5-
* Copyright (c) 2014 Research Organization for Information Science
6-
* and Technology (RIST). All rights reserved.
5+
* Copyright (c) 2014-2018 Research Organization for Information Science
6+
* and Technology (RIST). All rights reserved.
77
* $COPYRIGHT$
88
*
99
* Additional copyrights may follow
@@ -69,11 +69,18 @@ int mca_btl_vader_put_cma (mca_btl_base_module_t *btl, mca_btl_base_endpoint_t *
6969
struct iovec dst_iov = {.iov_base = (void *)(intptr_t) remote_address, .iov_len = size};
7070
ssize_t ret;
7171

72-
ret = process_vm_writev (endpoint->segment_data.other.seg_ds->seg_cpid, &src_iov, 1, &dst_iov, 1, 0);
73-
if (ret != (ssize_t)size) {
74-
opal_output(0, "Wrote %ld, expected %lu, errno = %d\n", (long)ret, (unsigned long)size, errno);
75-
return OPAL_ERROR;
76-
}
72+
/* This should not be needed, see the rationale in mca_btl_vader_get_cma() */
73+
do {
74+
ret = process_vm_writev (endpoint->segment_data.other.seg_ds->seg_cpid, &src_iov, 1, &dst_iov, 1, 0);
75+
if (0 > ret) {
76+
opal_output(0, "Wrote %ld, expected %lu, errno = %d\n", (long)ret, (unsigned long)size, errno);
77+
return OPAL_ERROR;
78+
}
79+
src_iov.iov_base = (void *)((char *)src_iov.iov_base + ret);
80+
src_iov.iov_len -= ret;
81+
dst_iov.iov_base = (void *)((char *)dst_iov.iov_base + ret);
82+
dst_iov.iov_len -= ret;
83+
} while (0 < src_iov.iov_len);
7784

7885
/* always call the callback function */
7986
cbfunc (btl, endpoint, local_address, local_handle, cbcontext, cbdata, OPAL_SUCCESS);

0 commit comments

Comments
 (0)