|
2 | 2 | /*
|
3 | 3 | * Copyright (c) 2010-2014 Los Alamos National Security, LLC. All rights
|
4 | 4 | * reserved.
|
| 5 | + * Copyright (c) 2018 Research Organization for Information Science |
| 6 | + * and Technology (RIST). All rights reserved. |
5 | 7 | * $COPYRIGHT$
|
6 | 8 | *
|
7 | 9 | * Additional copyrights may follow
|
|
23 | 25 | #include "opal/sys/cma.h"
|
24 | 26 | #endif /* OPAL_CMA_NEED_SYSCALL_DEFS */
|
25 | 27 |
|
| 28 | + |
26 | 29 | #endif
|
27 | 30 |
|
28 | 31 | /**
|
@@ -71,11 +74,34 @@ int mca_btl_vader_get_cma (mca_btl_base_module_t *btl, mca_btl_base_endpoint_t *
|
71 | 74 | struct iovec dst_iov = {.iov_base = local_address, .iov_len = size};
|
72 | 75 | ssize_t ret;
|
73 | 76 |
|
74 |
| - ret = process_vm_readv (endpoint->segment_data.other.seg_ds->seg_cpid, &dst_iov, 1, &src_iov, 1, 0); |
75 |
| - if (ret != (ssize_t)size) { |
76 |
| - opal_output(0, "Read %ld, expected %lu, errno = %d\n", (long)ret, (unsigned long)size, errno); |
77 |
| - return OPAL_ERROR; |
78 |
| - } |
| 77 | + /* |
| 78 | + * According to the man page : |
| 79 | + * "On success, process_vm_readv() returns the number of bytes read and |
| 80 | + * process_vm_writev() returns the number of bytes written. This return |
| 81 | + * value may be less than the total number of requested bytes, if a |
| 82 | + * partial read/write occurred. (Partial transfers apply at the |
| 83 | + * granularity of iovec elements. These system calls won't perform a |
| 84 | + * partial transfer that splits a single iovec element.)". |
| 85 | + * So since we use a single iovec element, the returned size should either |
| 86 | + * be 0 or size, and the do loop should not be needed here. |
| 87 | + * We tried on various Linux kernels with size > 2 GB, and surprisingly, |
| 88 | + * the returned value is always 0x7ffff000 (fwiw, it happens to be the size |
| 89 | + * of the larger number of pages that fits a signed 32 bits integer). |
| 90 | + * We do not know whether this is a bug from the kernel, the libc or even |
| 91 | + * the man page, but for the time being, we do as is process_vm_readv() could |
| 92 | + * return any value. |
| 93 | + */ |
| 94 | + do { |
| 95 | + ret = process_vm_readv (endpoint->segment_data.other.seg_ds->seg_cpid, &dst_iov, 1, &src_iov, 1, 0); |
| 96 | + if (0 > ret) { |
| 97 | + opal_output(0, "Read %ld, expected %lu, errno = %d\n", (long)ret, (unsigned long)size, errno); |
| 98 | + return OPAL_ERROR; |
| 99 | + } |
| 100 | + src_iov.iov_base = (void *)((char *)src_iov.iov_base + ret); |
| 101 | + src_iov.iov_len -= ret; |
| 102 | + dst_iov.iov_base = (void *)((char *)dst_iov.iov_base + ret); |
| 103 | + dst_iov.iov_len -= ret; |
| 104 | + } while (0 < src_iov.iov_len); |
79 | 105 |
|
80 | 106 | /* always call the callback function */
|
81 | 107 | cbfunc (btl, endpoint, local_address, local_handle, cbcontext, cbdata, OPAL_SUCCESS);
|
|
0 commit comments