|
| 1 | +/* -*- Mode: C; c-basic-offset:4 ; indent-tabs-mode:nil -*- */ |
| 2 | +/* |
| 3 | + * Copyright (c) 2021 Google, Inc. All rights reserved. |
| 4 | + * $COPYRIGHT$ |
| 5 | + * |
| 6 | + * Additional copyrights may follow |
| 7 | + * |
| 8 | + * $HEADER$ |
| 9 | + */ |
| 10 | +#include "opal_config.h" |
| 11 | + |
| 12 | +#include "opal/mca/smsc/base/base.h" |
| 13 | +#include "opal/mca/smsc/xpmem/smsc_xpmem_internal.h" |
| 14 | +#include "opal/util/minmax.h" |
| 15 | + |
| 16 | +#include <fcntl.h> |
| 17 | +#include <stdio.h> |
| 18 | +#include <sys/prctl.h> |
| 19 | +#include <sys/stat.h> |
| 20 | +#include <sys/types.h> |
| 21 | +#include <unistd.h> |
| 22 | + |
| 23 | +static int mca_smsc_xpmem_component_register(void); |
| 24 | +static int mca_smsc_xpmem_component_open(void); |
| 25 | +static int mca_smsc_xpmem_component_close(void); |
| 26 | +static int mca_smsc_xpmem_component_query(void); |
| 27 | +static mca_smsc_module_t *mca_smsc_xpmem_component_enable(void); |
| 28 | + |
| 29 | +#define MCA_SMSC_XPMEM_DEFAULT_PRIORITY 42 |
| 30 | +static const int mca_smsc_xpmem_default_priority = MCA_SMSC_XPMEM_DEFAULT_PRIORITY; |
| 31 | + |
| 32 | +mca_smsc_xpmem_component_t mca_smsc_xpmem_component = { |
| 33 | + .super = { |
| 34 | + .smsc_version = { |
| 35 | + MCA_SMSC_DEFAULT_VERSION("xpmem"), |
| 36 | + .mca_open_component = mca_smsc_xpmem_component_open, |
| 37 | + .mca_close_component = mca_smsc_xpmem_component_close, |
| 38 | + .mca_register_component_params = mca_smsc_xpmem_component_register, |
| 39 | + }, |
| 40 | + .priority = MCA_SMSC_XPMEM_DEFAULT_PRIORITY, |
| 41 | + .query = mca_smsc_xpmem_component_query, |
| 42 | + .enable = mca_smsc_xpmem_component_enable, |
| 43 | + }, |
| 44 | +}; |
| 45 | + |
| 46 | +static int mca_smsc_xpmem_component_register(void) |
| 47 | +{ |
| 48 | + mca_smsc_xpmem_component.log_attach_align = 23; |
| 49 | + (void) mca_base_component_var_register(&mca_smsc_xpmem_component.super.smsc_version, |
| 50 | + "log_align", |
| 51 | + "Log base 2 of the alignment to use for xpmem " |
| 52 | + "segments (default: 23, minimum: 12, maximum: 25)", |
| 53 | + MCA_BASE_VAR_TYPE_INT, /*enumerator=*/NULL, /*bind=*/0, |
| 54 | + MCA_BASE_VAR_FLAG_SETTABLE, OPAL_INFO_LVL_5, |
| 55 | + MCA_BASE_VAR_SCOPE_LOCAL, |
| 56 | + &mca_smsc_xpmem_component.log_attach_align); |
| 57 | + |
| 58 | + mca_smsc_xpmem_component.memcpy_chunk_size = 262144; |
| 59 | + (void) mca_base_component_var_register( |
| 60 | + &mca_smsc_xpmem_component.super.smsc_version, "memcpy_chunk_size", |
| 61 | + "Maximum size to copy with a single call to memcpy. On some systems a smaller or larger " |
| 62 | + "number may provide better performance (default: 256k)", |
| 63 | + MCA_BASE_VAR_TYPE_UINT64_T, /*enumerator=*/NULL, /*bind=*/0, MCA_BASE_VAR_FLAG_SETTABLE, |
| 64 | + OPAL_INFO_LVL_5, MCA_BASE_VAR_SCOPE_LOCAL, &mca_smsc_xpmem_component.memcpy_chunk_size); |
| 65 | + |
| 66 | + mca_smsc_base_register_default_params(&mca_smsc_xpmem_component.super, |
| 67 | + mca_smsc_xpmem_default_priority); |
| 68 | + return OPAL_SUCCESS; |
| 69 | +} |
| 70 | + |
| 71 | +static int mca_smsc_xpmem_component_open(void) |
| 72 | +{ |
| 73 | + /* nothing to do */ |
| 74 | + return OPAL_SUCCESS; |
| 75 | +} |
| 76 | + |
| 77 | +static int mca_smsc_xpmem_component_close(void) |
| 78 | +{ |
| 79 | + if (mca_smsc_xpmem_module.vma_module) { |
| 80 | + OBJ_RELEASE(mca_smsc_xpmem_module.vma_module); |
| 81 | + } |
| 82 | + |
| 83 | + return OPAL_SUCCESS; |
| 84 | +} |
| 85 | + |
| 86 | +static int mca_smsc_xpmem_send_modex(void) |
| 87 | +{ |
| 88 | + mca_smsc_xpmem_modex_t modex; |
| 89 | + |
| 90 | + modex.seg_id = mca_smsc_xpmem_component.my_seg_id; |
| 91 | + modex.address_max = mca_smsc_xpmem_component.my_address_max; |
| 92 | + |
| 93 | + int rc; |
| 94 | + OPAL_MODEX_SEND(rc, PMIX_LOCAL, &mca_smsc_xpmem_component.super.smsc_version, &modex, |
| 95 | + sizeof(modex)); |
| 96 | + return rc; |
| 97 | +} |
| 98 | + |
| 99 | +static int mca_smsc_xpmem_component_query(void) |
| 100 | +{ |
| 101 | + /* Any attachment that goes past the Linux TASK_SIZE will always fail. To prevent this we need |
| 102 | + * to determine the value of TASK_SIZE. On x86_64 the value was hard-coded in sm to be |
| 103 | + * 0x7ffffffffffful but this approach does not work with AARCH64 (and possibly other |
| 104 | + * architectures). Since there is really no way to directly determine the value we can (in all |
| 105 | + * cases?) look through the mapping for this process to determine what the largest address is. |
| 106 | + * This should be the top of the stack. No heap allocations should be larger than this value. |
| 107 | + * Since the largest address may differ between processes the value must be shared as part of |
| 108 | + * the modex and stored in the endpoint. */ |
| 109 | + FILE *fh = fopen("/proc/self/maps", "r"); |
| 110 | + if (NULL == fh) { |
| 111 | + opal_output_verbose(MCA_BASE_VERBOSE_COMPONENT, opal_smsc_base_framework.framework_output, |
| 112 | + "mca_smsc_xpmem_component_query: could not open /proc/self/maps for " |
| 113 | + "reading. disabling XPMEM"); |
| 114 | + return OPAL_ERR_NOT_AVAILABLE; |
| 115 | + } |
| 116 | + |
| 117 | + char buffer[1024]; |
| 118 | + uintptr_t address_max = 0; |
| 119 | + while (fgets(buffer, sizeof(buffer), fh)) { |
| 120 | + uintptr_t low, high; |
| 121 | + char *tmp; |
| 122 | + /* each line of /proc/self/maps starts with low-high in hexidecimal (without a 0x) */ |
| 123 | + low = strtoul(buffer, &tmp, 16); |
| 124 | + high = strtoul(tmp + 1, NULL, 16); |
| 125 | + if (address_max < high) { |
| 126 | + address_max = high; |
| 127 | + } |
| 128 | + } |
| 129 | + |
| 130 | + fclose(fh); |
| 131 | + |
| 132 | + if (0 == address_max) { |
| 133 | + opal_output_verbose(MCA_BASE_VERBOSE_COMPONENT, opal_smsc_base_framework.framework_output, |
| 134 | + "mca_smsc_xpmem_component_query: could not determine the address max"); |
| 135 | + return OPAL_ERR_NOT_AVAILABLE; |
| 136 | + } |
| 137 | + |
| 138 | + /* save the calcuated maximum */ |
| 139 | + mca_smsc_xpmem_component.my_address_max = address_max - 1; |
| 140 | + |
| 141 | + /* it is safe to use XPMEM_MAXADDR_SIZE here (which is always (size_t)-1 even though |
| 142 | + * it is not safe for attach */ |
| 143 | + mca_smsc_xpmem_component.my_seg_id = xpmem_make(0, XPMEM_MAXADDR_SIZE, XPMEM_PERMIT_MODE, |
| 144 | + (void *) 0666); |
| 145 | + if (-1 == mca_smsc_xpmem_component.my_seg_id) { |
| 146 | + return OPAL_ERR_NOT_AVAILABLE; |
| 147 | + } |
| 148 | + |
| 149 | + mca_smsc_xpmem_send_modex(); |
| 150 | + |
| 151 | + return OPAL_SUCCESS; |
| 152 | +} |
| 153 | + |
| 154 | +static mca_smsc_module_t *mca_smsc_xpmem_component_enable(void) |
| 155 | +{ |
| 156 | + if (0 > mca_smsc_xpmem_component.super.priority) { |
| 157 | + return NULL; |
| 158 | + } |
| 159 | + |
| 160 | + /* limit segment alignment to be between 4k and 16M */ |
| 161 | + mca_smsc_xpmem_component.log_attach_align |
| 162 | + = opal_min(opal_max(mca_smsc_xpmem_component.log_attach_align, 12), 25); |
| 163 | + |
| 164 | + mca_smsc_xpmem_module.vma_module = mca_rcache_base_vma_module_alloc(); |
| 165 | + |
| 166 | + return &mca_smsc_xpmem_module.super; |
| 167 | +} |
0 commit comments