|
| 1 | +/* |
| 2 | + * Copyright (c) 2023 NVIDIA Corporation. |
| 3 | + * All rights reserved. |
| 4 | + * |
| 5 | + * $COPYRIGHT$ |
| 6 | + * |
| 7 | + * Additional copyrights may follow |
| 8 | + * |
| 9 | + * $HEADER$ |
| 10 | + */ |
| 11 | + |
| 12 | +#include "oshmem_config.h" |
| 13 | + |
| 14 | +#include "oshmem/constants.h" |
| 15 | +#include "oshmem/include/shmem.h" |
| 16 | +#include "oshmem/runtime/params.h" |
| 17 | +#include "oshmem/runtime/runtime.h" |
| 18 | +#include <stdlib.h> |
| 19 | +#include <memory.h> |
| 20 | + |
| 21 | +#include "oshmem/shmem/shmem_api_logger.h" |
| 22 | +#include "oshmem/shmem/shmem_lock.h" |
| 23 | +#include "oshmem/mca/memheap/memheap.h" |
| 24 | +#include "oshmem/mca/memheap/base/base.h" |
| 25 | +#include "oshmem/mca/atomic/atomic.h" |
| 26 | + |
| 27 | +#define OPAL_BITWISE_SIZEOF_LONG (SIZEOF_LONG * 8) |
| 28 | + |
| 29 | + |
| 30 | +/** Use basic MCS distributed lock algorithm for lock */ |
| 31 | +struct shmem_mcs_lock { |
| 32 | + /** has meaning only on MCSQ_TAIL OWNER */ |
| 33 | + int tail; |
| 34 | + /** It has meaning on all PEs */ |
| 35 | + /** The next pointer is a combination of the PE ID and wait signal */ |
| 36 | + int next; |
| 37 | +}; |
| 38 | +typedef struct shmem_mcs_lock shmem_mcs_lock_t; |
| 39 | + |
| 40 | +#define SHMEM_MCSL_TAIL_OWNER(lock_ptr)\ |
| 41 | + (((uintptr_t)(lock_ptr) / sizeof(long)) % shmem_n_pes()) |
| 42 | + |
| 43 | +#define SHMEM_MCSL_NEXT_MASK 0x7FFFFFFFU |
| 44 | +#define SHMEM_MCSL_SIGNAL_MASK 0x80000000U /** Wait signal mask */ |
| 45 | +#define SHMEM_MCSL_NEXT(lock_val) ((lock_val) & SHMEM_MCSL_NEXT_MASK) |
| 46 | +/** Improve readability */ |
| 47 | +#define SHMEM_MCSL_GET_PE(tail_val) ((tail_val) & SHMEM_MCSL_NEXT_MASK) |
| 48 | +#define SHMEM_MCSL_SIGNAL(lock_val) ((lock_val) & SHMEM_MCSL_SIGNAL_MASK) |
| 49 | +#define SHMEM_MCSL_SET_SIGNAL(lock_val) ((lock_val) | SHMEM_MCSL_SIGNAL_MASK) |
| 50 | + |
| 51 | +void |
| 52 | +_shmem_mcs_set_lock(long *lockp) |
| 53 | +{ |
| 54 | + shmem_mcs_lock_t *lock = (shmem_mcs_lock_t *) lockp; |
| 55 | + int mcs_tail_owner = SHMEM_MCSL_TAIL_OWNER(lock); |
| 56 | + int new_tail_req = 0; |
| 57 | + int *tail = &(lock->tail); |
| 58 | + int *next = &(lock->next); |
| 59 | + int my_pe = shmem_my_pe(); |
| 60 | + int curr = 0; |
| 61 | + int out_value = 0; |
| 62 | + int prev_tail = 0; |
| 63 | + int prev_tailpe = 0; |
| 64 | + int tval = 0; |
| 65 | + int tmp_val = 0; |
| 66 | + int retv = 0; |
| 67 | + uint64_t value_tmp = 0; |
| 68 | + |
| 69 | + RUNTIME_CHECK_INIT(); |
| 70 | + /** |
| 71 | + * Initializing next pointer to next mask |
| 72 | + * Done atomically to avoid races as NEXT pointer |
| 73 | + * can be modified by other PEs while acquiring or |
| 74 | + * releasing it. |
| 75 | + */ |
| 76 | + /** |
| 77 | + * Can make this to be shmem_atomic_set to be safe |
| 78 | + * in non-cc architectures |
| 79 | + * has an impact on performance |
| 80 | + */ |
| 81 | + value_tmp = SHMEM_MCSL_NEXT_MASK; |
| 82 | + out_value = SHMEM_MCSL_NEXT_MASK; |
| 83 | + retv = MCA_ATOMIC_CALL(swap(oshmem_ctx_default, (void*)next, |
| 84 | + (void*)&out_value, value_tmp, |
| 85 | + sizeof(int), my_pe)); |
| 86 | + RUNTIME_CHECK_RC(retv); |
| 87 | + MCA_SPML_CALL(quiet(oshmem_ctx_default)); |
| 88 | + |
| 89 | + /** Signal for setting lock */ |
| 90 | + new_tail_req = SHMEM_MCSL_SET_SIGNAL(my_pe); |
| 91 | + /** |
| 92 | + * Swap and make me the new tail and update in tail owner |
| 93 | + * Get the previous tail PE. |
| 94 | + */ |
| 95 | + retv = MCA_ATOMIC_CALL(swap(oshmem_ctx_default, (void *)tail, |
| 96 | + (void*)&prev_tail, |
| 97 | + OSHMEM_ATOMIC_PTR_2_INT(&new_tail_req, |
| 98 | + sizeof(new_tail_req)), |
| 99 | + sizeof(int), mcs_tail_owner)); |
| 100 | + RUNTIME_CHECK_RC(retv); |
| 101 | + |
| 102 | + prev_tailpe = SHMEM_MCSL_GET_PE(prev_tail); |
| 103 | + if (SHMEM_MCSL_SIGNAL(prev_tail)) { |
| 104 | + /** |
| 105 | + * Someone else has got the lock before this PE |
| 106 | + * Adding this PE to the previous tail PE's Next pointer |
| 107 | + * Substract the SIGNAL Bit to avoid changing it. |
| 108 | + */ |
| 109 | + tmp_val = my_pe - SHMEM_MCSL_NEXT_MASK; |
| 110 | + retv = MCA_ATOMIC_CALL(add(oshmem_ctx_default, (void*)next, tmp_val, |
| 111 | + sizeof(int), prev_tailpe)); |
| 112 | + RUNTIME_CHECK_RC(retv); |
| 113 | + /** |
| 114 | + * This value to be changed eventually by predecessor |
| 115 | + * when its lock is released. |
| 116 | + * Need to be done atomically to avoid any races where |
| 117 | + * next pointer is modified by another PE acquiring or |
| 118 | + * releasing this. |
| 119 | + */ |
| 120 | + retv = MCA_ATOMIC_CALL(add(oshmem_ctx_default, (void *)next, |
| 121 | + SHMEM_MCSL_SIGNAL_MASK, sizeof(int), |
| 122 | + my_pe)); |
| 123 | + RUNTIME_CHECK_RC(retv); |
| 124 | + MCA_SPML_CALL(quiet(oshmem_ctx_default)); |
| 125 | + /** Wait for predecessor release lock to this PE signal to false. */ |
| 126 | + retv = MCA_ATOMIC_CALL(fadd(oshmem_ctx_default, (void*)next, |
| 127 | + (void*)&curr, tval, sizeof(int), my_pe)); |
| 128 | + RUNTIME_CHECK_RC(retv); |
| 129 | + |
| 130 | + while (SHMEM_MCSL_SIGNAL(curr)) { |
| 131 | + retv = MCA_SPML_CALL(wait((void*)next, SHMEM_CMP_NE, |
| 132 | + (void*)&curr, SHMEM_INT)); |
| 133 | + RUNTIME_CHECK_RC(retv); |
| 134 | + retv = MCA_ATOMIC_CALL(fadd(oshmem_ctx_default, (void*)next, |
| 135 | + (void*)&curr, tval, sizeof(int), |
| 136 | + my_pe)); |
| 137 | + RUNTIME_CHECK_RC(retv); |
| 138 | + } |
| 139 | + } |
| 140 | +/** else.. this pe has got the lock as no one else had it */ |
| 141 | +} |
| 142 | + |
| 143 | +void |
| 144 | +_shmem_mcs_clear_lock(long *lockp) |
| 145 | +{ |
| 146 | + shmem_mcs_lock_t *lock = (shmem_mcs_lock_t *) lockp; |
| 147 | + int mcs_tail_owner = SHMEM_MCSL_TAIL_OWNER(lock); |
| 148 | + int *tail = &(lock->tail); |
| 149 | + int *next = &(lock->next); |
| 150 | + int my_pe = shmem_my_pe(); |
| 151 | + int next_value = 0; |
| 152 | + int swap_cond = 0; |
| 153 | + int prev_value = 0; |
| 154 | + int tval = 0; |
| 155 | + int val_tmp = 0; |
| 156 | + int nmask = 0; |
| 157 | + int a_val = 0; |
| 158 | + int retv = 0; |
| 159 | + |
| 160 | + /** |
| 161 | + * Can make atomic fetch to be safe in non-cc architectures |
| 162 | + * Has impact on performance |
| 163 | + */ |
| 164 | + retv = MCA_ATOMIC_CALL(fadd(oshmem_ctx_default, (void*)next, |
| 165 | + (void*)&next_value, tval, sizeof(int), |
| 166 | + my_pe)); |
| 167 | + RUNTIME_CHECK_RC(retv); |
| 168 | + MCA_SPML_CALL(quiet(oshmem_ctx_default)); |
| 169 | + |
| 170 | + if (next_value == SHMEM_MCSL_NEXT_MASK) { |
| 171 | + swap_cond = SHMEM_MCSL_SET_SIGNAL(my_pe); |
| 172 | + retv = MCA_ATOMIC_CALL(cswap(oshmem_ctx_default, |
| 173 | + (void *)tail, (uint64_t *)&(prev_value), |
| 174 | + OSHMEM_ATOMIC_PTR_2_INT(&swap_cond, |
| 175 | + sizeof(swap_cond)), |
| 176 | + OSHMEM_ATOMIC_PTR_2_INT(&val_tmp, |
| 177 | + sizeof(val_tmp)), sizeof(int), |
| 178 | + mcs_tail_owner)); |
| 179 | + RUNTIME_CHECK_RC(retv); |
| 180 | + |
| 181 | + /** I am the tail.. and lock is released */ |
| 182 | + if (prev_value == swap_cond) { |
| 183 | + return; |
| 184 | + } |
| 185 | + /** |
| 186 | + * I am not the tail, another PE maybe racing to acquire lock, |
| 187 | + * let them complete setting themselves as our next |
| 188 | + */ |
| 189 | + nmask = SHMEM_MCSL_NEXT_MASK; |
| 190 | + while(next_value == nmask) { |
| 191 | + retv = MCA_SPML_CALL(wait((void*)next, SHMEM_CMP_NE, |
| 192 | + (void*)&nmask, SHMEM_INT)); |
| 193 | + RUNTIME_CHECK_RC(retv); |
| 194 | + retv = MCA_ATOMIC_CALL(fadd(oshmem_ctx_default, (void*)next, |
| 195 | + (void*)&next_value, tval, |
| 196 | + sizeof(int), my_pe)); |
| 197 | + RUNTIME_CHECK_RC(retv); |
| 198 | + } |
| 199 | + } |
| 200 | + /** There is a successor release lock to the successor */ |
| 201 | + a_val = SHMEM_MCSL_SIGNAL_MASK; |
| 202 | + retv = MCA_ATOMIC_CALL(add(oshmem_ctx_default, |
| 203 | + (void *)next, a_val, sizeof(a_val), |
| 204 | + SHMEM_MCSL_NEXT(next_value))); |
| 205 | + RUNTIME_CHECK_RC(retv); |
| 206 | + MCA_SPML_CALL(quiet(oshmem_ctx_default)); |
| 207 | +} |
| 208 | + |
| 209 | +int |
| 210 | +_shmem_mcs_test_lock(long *lockp) |
| 211 | +{ |
| 212 | + shmem_mcs_lock_t *lock = (shmem_mcs_lock_t *) lockp; |
| 213 | + int mcs_tail_owner = SHMEM_MCSL_TAIL_OWNER(lock); |
| 214 | + int new_tail_req = 0; |
| 215 | + int prev_tail = 0; |
| 216 | + int tmp_cond = 0; |
| 217 | + int *tail = &(lock->tail); |
| 218 | + int *next = &(lock->next); |
| 219 | + int my_pe = shmem_my_pe(); |
| 220 | + int retv = 0; |
| 221 | + |
| 222 | + /** Initializing next pointer to next mask */ |
| 223 | + *next = SHMEM_MCSL_NEXT_MASK; |
| 224 | + |
| 225 | + /** Signal for setting lock */ |
| 226 | + new_tail_req = SHMEM_MCSL_SET_SIGNAL(my_pe); |
| 227 | + |
| 228 | + /** Check if previously cleared before swapping */ |
| 229 | + retv = MCA_ATOMIC_CALL(cswap(oshmem_ctx_default, |
| 230 | + (void *)tail, (uint64_t *)&(prev_tail), |
| 231 | + OSHMEM_ATOMIC_PTR_2_INT(&tmp_cond, |
| 232 | + sizeof(tmp_cond)), |
| 233 | + OSHMEM_ATOMIC_PTR_2_INT(&new_tail_req, |
| 234 | + sizeof(new_tail_req)), |
| 235 | + sizeof(int), mcs_tail_owner)); |
| 236 | + RUNTIME_CHECK_RC(retv); |
| 237 | + |
| 238 | + return (0 != prev_tail); |
| 239 | +} |
0 commit comments