Skip to content

Commit 67026ef

Browse files
authored
Merge pull request #11796 from vvenkates27/upstream_main
SHMEM_LOCKS: MCS implementation of SHMEM LOCKS
2 parents e0ec55f + 1396585 commit 67026ef

File tree

8 files changed

+298
-7
lines changed

8 files changed

+298
-7
lines changed

oshmem/runtime/oshmem_shmem_params.c

Lines changed: 17 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -17,9 +17,10 @@
1717
#include "oshmem/constants.h"
1818

1919

20-
int oshmem_shmem_lock_recursive = 0;
21-
int oshmem_shmem_api_verbose = 0;
22-
int oshmem_preconnect_all = 0;
20+
int oshmem_shmem_lock_recursive = 0;
21+
int oshmem_shmem_api_verbose = 0;
22+
int oshmem_shmem_enable_mcs_locks = 1;
23+
int oshmem_preconnect_all = 0;
2324

2425
int oshmem_shmem_register_params(void)
2526
{
@@ -38,6 +39,19 @@ int oshmem_shmem_register_params(void)
3839
MCA_BASE_VAR_SCOPE_READONLY,
3940
&oshmem_shmem_lock_recursive);
4041

42+
(void) mca_base_var_register("oshmem",
43+
"oshmem",
44+
NULL,
45+
"enable_mcs_lock",
46+
"enable mcs locks",
47+
MCA_BASE_VAR_TYPE_INT,
48+
NULL,
49+
1,
50+
MCA_BASE_VAR_FLAG_SETTABLE,
51+
OPAL_INFO_LVL_9,
52+
MCA_BASE_VAR_SCOPE_READONLY,
53+
&oshmem_shmem_enable_mcs_locks);
54+
4155
(void) mca_base_var_register("oshmem",
4256
"oshmem",
4357
NULL,

oshmem/runtime/params.h

Lines changed: 7 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -37,6 +37,13 @@ OSHMEM_DECLSPEC extern int oshmem_shmem_api_verbose;
3737
*/
3838
OSHMEM_DECLSPEC extern int oshmem_preconnect_all;
3939

40+
41+
/**
42+
* Whether to force SHMEM processes to use MCS locking
43+
* for shmem_locks
44+
*/
45+
OSHMEM_DECLSPEC extern int oshmem_shmem_enable_mcs_locks;
46+
4047
END_C_DECLS
4148

4249
#endif /* OSHMEM_RUNTIME_PARAMS_H */

oshmem/shmem/c/Makefile.am

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -13,7 +13,8 @@
1313

1414

1515
OSHMEM_AUX_SOURCES = \
16-
shmem_lock.c
16+
shmem_lock.c \
17+
shmem_mcs_lock.c
1718

1819
OSHMEM_API_SOURCES = \
1920
shmem_init.c \

oshmem/shmem/c/shmem_clear_lock.c

Lines changed: 10 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,6 @@
11
/*
2+
* Copyright (c) 2023 NVIDIA Corporation.
3+
* All rights reserved.
24
* Copyright (c) 2013-2016 Mellanox Technologies, Inc.
35
* All rights reserved.
46
* Copyright (c) 2019 Research Organization for Information Science
@@ -18,6 +20,7 @@
1820
#include "oshmem/shmem/shmem_api_logger.h"
1921
#include "oshmem/runtime/runtime.h"
2022
#include "oshmem/shmem/shmem_lock.h"
23+
#include "oshmem/runtime/params.h"
2124

2225
#if OSHMEM_PROFILING
2326
#include "oshmem/include/pshmem.h"
@@ -27,5 +30,11 @@
2730

2831
void shmem_clear_lock(volatile long *lock)
2932
{
30-
_shmem_clear_lock((void *)lock, sizeof(long));
33+
if (oshmem_shmem_enable_mcs_locks) {
34+
SHMEM_API_VERBOSE(10, "Clear Lock with MCS Lock implementation");
35+
_shmem_mcs_clear_lock((long *)lock);
36+
} else {
37+
SHMEM_API_VERBOSE(10, "Clear Lock with Ticket Lock implementation");
38+
_shmem_clear_lock((void *)lock, sizeof(long));
39+
}
3140
}

oshmem/shmem/c/shmem_mcs_lock.c

Lines changed: 239 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,239 @@
1+
/*
2+
* Copyright (c) 2023 NVIDIA Corporation.
3+
* All rights reserved.
4+
*
5+
* $COPYRIGHT$
6+
*
7+
* Additional copyrights may follow
8+
*
9+
* $HEADER$
10+
*/
11+
12+
#include "oshmem_config.h"
13+
14+
#include "oshmem/constants.h"
15+
#include "oshmem/include/shmem.h"
16+
#include "oshmem/runtime/params.h"
17+
#include "oshmem/runtime/runtime.h"
18+
#include <stdlib.h>
19+
#include <memory.h>
20+
21+
#include "oshmem/shmem/shmem_api_logger.h"
22+
#include "oshmem/shmem/shmem_lock.h"
23+
#include "oshmem/mca/memheap/memheap.h"
24+
#include "oshmem/mca/memheap/base/base.h"
25+
#include "oshmem/mca/atomic/atomic.h"
26+
27+
#define OPAL_BITWISE_SIZEOF_LONG (SIZEOF_LONG * 8)
28+
29+
30+
/** Use basic MCS distributed lock algorithm for lock */
31+
struct shmem_mcs_lock {
32+
/** has meaning only on MCSQ_TAIL OWNER */
33+
int tail;
34+
/** It has meaning on all PEs */
35+
/** The next pointer is a combination of the PE ID and wait signal */
36+
int next;
37+
};
38+
typedef struct shmem_mcs_lock shmem_mcs_lock_t;
39+
40+
#define SHMEM_MCSL_TAIL_OWNER(lock_ptr)\
41+
(((uintptr_t)(lock_ptr) / sizeof(long)) % shmem_n_pes())
42+
43+
#define SHMEM_MCSL_NEXT_MASK 0x7FFFFFFFU
44+
#define SHMEM_MCSL_SIGNAL_MASK 0x80000000U /** Wait signal mask */
45+
#define SHMEM_MCSL_NEXT(lock_val) ((lock_val) & SHMEM_MCSL_NEXT_MASK)
46+
/** Improve readability */
47+
#define SHMEM_MCSL_GET_PE(tail_val) ((tail_val) & SHMEM_MCSL_NEXT_MASK)
48+
#define SHMEM_MCSL_SIGNAL(lock_val) ((lock_val) & SHMEM_MCSL_SIGNAL_MASK)
49+
#define SHMEM_MCSL_SET_SIGNAL(lock_val) ((lock_val) | SHMEM_MCSL_SIGNAL_MASK)
50+
51+
void
52+
_shmem_mcs_set_lock(long *lockp)
53+
{
54+
shmem_mcs_lock_t *lock = (shmem_mcs_lock_t *) lockp;
55+
int mcs_tail_owner = SHMEM_MCSL_TAIL_OWNER(lock);
56+
int new_tail_req = 0;
57+
int *tail = &(lock->tail);
58+
int *next = &(lock->next);
59+
int my_pe = shmem_my_pe();
60+
int curr = 0;
61+
int out_value = 0;
62+
int prev_tail = 0;
63+
int prev_tailpe = 0;
64+
int tval = 0;
65+
int tmp_val = 0;
66+
int retv = 0;
67+
uint64_t value_tmp = 0;
68+
69+
RUNTIME_CHECK_INIT();
70+
/**
71+
* Initializing next pointer to next mask
72+
* Done atomically to avoid races as NEXT pointer
73+
* can be modified by other PEs while acquiring or
74+
* releasing it.
75+
*/
76+
/**
77+
* Can make this to be shmem_atomic_set to be safe
78+
* in non-cc architectures
79+
* has an impact on performance
80+
*/
81+
value_tmp = SHMEM_MCSL_NEXT_MASK;
82+
out_value = SHMEM_MCSL_NEXT_MASK;
83+
retv = MCA_ATOMIC_CALL(swap(oshmem_ctx_default, (void*)next,
84+
(void*)&out_value, value_tmp,
85+
sizeof(int), my_pe));
86+
RUNTIME_CHECK_RC(retv);
87+
MCA_SPML_CALL(quiet(oshmem_ctx_default));
88+
89+
/** Signal for setting lock */
90+
new_tail_req = SHMEM_MCSL_SET_SIGNAL(my_pe);
91+
/**
92+
* Swap and make me the new tail and update in tail owner
93+
* Get the previous tail PE.
94+
*/
95+
retv = MCA_ATOMIC_CALL(swap(oshmem_ctx_default, (void *)tail,
96+
(void*)&prev_tail,
97+
OSHMEM_ATOMIC_PTR_2_INT(&new_tail_req,
98+
sizeof(new_tail_req)),
99+
sizeof(int), mcs_tail_owner));
100+
RUNTIME_CHECK_RC(retv);
101+
102+
prev_tailpe = SHMEM_MCSL_GET_PE(prev_tail);
103+
if (SHMEM_MCSL_SIGNAL(prev_tail)) {
104+
/**
105+
* Someone else has got the lock before this PE
106+
* Adding this PE to the previous tail PE's Next pointer
107+
* Substract the SIGNAL Bit to avoid changing it.
108+
*/
109+
tmp_val = my_pe - SHMEM_MCSL_NEXT_MASK;
110+
retv = MCA_ATOMIC_CALL(add(oshmem_ctx_default, (void*)next, tmp_val,
111+
sizeof(int), prev_tailpe));
112+
RUNTIME_CHECK_RC(retv);
113+
/**
114+
* This value to be changed eventually by predecessor
115+
* when its lock is released.
116+
* Need to be done atomically to avoid any races where
117+
* next pointer is modified by another PE acquiring or
118+
* releasing this.
119+
*/
120+
retv = MCA_ATOMIC_CALL(add(oshmem_ctx_default, (void *)next,
121+
SHMEM_MCSL_SIGNAL_MASK, sizeof(int),
122+
my_pe));
123+
RUNTIME_CHECK_RC(retv);
124+
MCA_SPML_CALL(quiet(oshmem_ctx_default));
125+
/** Wait for predecessor release lock to this PE signal to false. */
126+
retv = MCA_ATOMIC_CALL(fadd(oshmem_ctx_default, (void*)next,
127+
(void*)&curr, tval, sizeof(int), my_pe));
128+
RUNTIME_CHECK_RC(retv);
129+
130+
while (SHMEM_MCSL_SIGNAL(curr)) {
131+
retv = MCA_SPML_CALL(wait((void*)next, SHMEM_CMP_NE,
132+
(void*)&curr, SHMEM_INT));
133+
RUNTIME_CHECK_RC(retv);
134+
retv = MCA_ATOMIC_CALL(fadd(oshmem_ctx_default, (void*)next,
135+
(void*)&curr, tval, sizeof(int),
136+
my_pe));
137+
RUNTIME_CHECK_RC(retv);
138+
}
139+
}
140+
/** else.. this pe has got the lock as no one else had it */
141+
}
142+
143+
void
144+
_shmem_mcs_clear_lock(long *lockp)
145+
{
146+
shmem_mcs_lock_t *lock = (shmem_mcs_lock_t *) lockp;
147+
int mcs_tail_owner = SHMEM_MCSL_TAIL_OWNER(lock);
148+
int *tail = &(lock->tail);
149+
int *next = &(lock->next);
150+
int my_pe = shmem_my_pe();
151+
int next_value = 0;
152+
int swap_cond = 0;
153+
int prev_value = 0;
154+
int tval = 0;
155+
int val_tmp = 0;
156+
int nmask = 0;
157+
int a_val = 0;
158+
int retv = 0;
159+
160+
/**
161+
* Can make atomic fetch to be safe in non-cc architectures
162+
* Has impact on performance
163+
*/
164+
retv = MCA_ATOMIC_CALL(fadd(oshmem_ctx_default, (void*)next,
165+
(void*)&next_value, tval, sizeof(int),
166+
my_pe));
167+
RUNTIME_CHECK_RC(retv);
168+
MCA_SPML_CALL(quiet(oshmem_ctx_default));
169+
170+
if (next_value == SHMEM_MCSL_NEXT_MASK) {
171+
swap_cond = SHMEM_MCSL_SET_SIGNAL(my_pe);
172+
retv = MCA_ATOMIC_CALL(cswap(oshmem_ctx_default,
173+
(void *)tail, (uint64_t *)&(prev_value),
174+
OSHMEM_ATOMIC_PTR_2_INT(&swap_cond,
175+
sizeof(swap_cond)),
176+
OSHMEM_ATOMIC_PTR_2_INT(&val_tmp,
177+
sizeof(val_tmp)), sizeof(int),
178+
mcs_tail_owner));
179+
RUNTIME_CHECK_RC(retv);
180+
181+
/** I am the tail.. and lock is released */
182+
if (prev_value == swap_cond) {
183+
return;
184+
}
185+
/**
186+
* I am not the tail, another PE maybe racing to acquire lock,
187+
* let them complete setting themselves as our next
188+
*/
189+
nmask = SHMEM_MCSL_NEXT_MASK;
190+
while(next_value == nmask) {
191+
retv = MCA_SPML_CALL(wait((void*)next, SHMEM_CMP_NE,
192+
(void*)&nmask, SHMEM_INT));
193+
RUNTIME_CHECK_RC(retv);
194+
retv = MCA_ATOMIC_CALL(fadd(oshmem_ctx_default, (void*)next,
195+
(void*)&next_value, tval,
196+
sizeof(int), my_pe));
197+
RUNTIME_CHECK_RC(retv);
198+
}
199+
}
200+
/** There is a successor release lock to the successor */
201+
a_val = SHMEM_MCSL_SIGNAL_MASK;
202+
retv = MCA_ATOMIC_CALL(add(oshmem_ctx_default,
203+
(void *)next, a_val, sizeof(a_val),
204+
SHMEM_MCSL_NEXT(next_value)));
205+
RUNTIME_CHECK_RC(retv);
206+
MCA_SPML_CALL(quiet(oshmem_ctx_default));
207+
}
208+
209+
int
210+
_shmem_mcs_test_lock(long *lockp)
211+
{
212+
shmem_mcs_lock_t *lock = (shmem_mcs_lock_t *) lockp;
213+
int mcs_tail_owner = SHMEM_MCSL_TAIL_OWNER(lock);
214+
int new_tail_req = 0;
215+
int prev_tail = 0;
216+
int tmp_cond = 0;
217+
int *tail = &(lock->tail);
218+
int *next = &(lock->next);
219+
int my_pe = shmem_my_pe();
220+
int retv = 0;
221+
222+
/** Initializing next pointer to next mask */
223+
*next = SHMEM_MCSL_NEXT_MASK;
224+
225+
/** Signal for setting lock */
226+
new_tail_req = SHMEM_MCSL_SET_SIGNAL(my_pe);
227+
228+
/** Check if previously cleared before swapping */
229+
retv = MCA_ATOMIC_CALL(cswap(oshmem_ctx_default,
230+
(void *)tail, (uint64_t *)&(prev_tail),
231+
OSHMEM_ATOMIC_PTR_2_INT(&tmp_cond,
232+
sizeof(tmp_cond)),
233+
OSHMEM_ATOMIC_PTR_2_INT(&new_tail_req,
234+
sizeof(new_tail_req)),
235+
sizeof(int), mcs_tail_owner));
236+
RUNTIME_CHECK_RC(retv);
237+
238+
return (0 != prev_tail);
239+
}

oshmem/shmem/c/shmem_set_lock.c

Lines changed: 10 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,6 @@
11
/*
2+
* Copyright (c) 2023 NVIDIA Corporation.
3+
* All rights reserved.
24
* Copyright (c) 2013-2016 Mellanox Technologies, Inc.
35
* All rights reserved.
46
* Copyright (c) 2019 Research Organization for Information Science
@@ -18,6 +20,7 @@
1820
#include "oshmem/shmem/shmem_api_logger.h"
1921
#include "oshmem/runtime/runtime.h"
2022
#include "oshmem/shmem/shmem_lock.h"
23+
#include "oshmem/runtime/params.h"
2124

2225
#if OSHMEM_PROFILING
2326
#include "oshmem/include/pshmem.h"
@@ -27,5 +30,11 @@
2730

2831
void shmem_set_lock(volatile long *lock)
2932
{
30-
_shmem_set_lock((void *)lock, sizeof(long));
33+
if (oshmem_shmem_enable_mcs_locks) {
34+
SHMEM_API_VERBOSE(10, "Set Lock with MCS Lock implementation");
35+
_shmem_mcs_set_lock((long *)lock);
36+
} else {
37+
SHMEM_API_VERBOSE(10, "Set Lock with Ticket Lock implementation");
38+
_shmem_set_lock((void *)lock, sizeof(long));
39+
}
3140
}

oshmem/shmem/c/shmem_test_lock.c

Lines changed: 10 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,6 @@
11
/*
2+
* Copyright (c) 2023 NVIDIA Corporation.
3+
* All rights reserved.
24
* Copyright (c) 2013-2016 Mellanox Technologies, Inc.
35
* All rights reserved.
46
* Copyright (c) 2019 Research Organization for Information Science
@@ -18,6 +20,7 @@
1820
#include "oshmem/include/shmem.h"
1921
#include "oshmem/shmem/shmem_api_logger.h"
2022
#include "oshmem/runtime/runtime.h"
23+
#include "oshmem/runtime/params.h"
2124
#include "oshmem/shmem/shmem_lock.h"
2225

2326
#if OSHMEM_PROFILING
@@ -28,5 +31,11 @@
2831

2932
int shmem_test_lock(volatile long *lock)
3033
{
31-
return _shmem_test_lock((void *)lock, sizeof(long));
34+
if (oshmem_shmem_enable_mcs_locks) {
35+
SHMEM_API_VERBOSE(10, "Test lock using MCS Lock implementation");
36+
return _shmem_mcs_test_lock((long *)lock);
37+
} else {
38+
SHMEM_API_VERBOSE(10, "Test_lock using Ticket Lock implementation");
39+
return _shmem_test_lock((void *)lock, sizeof(long));
40+
}
3241
}

oshmem/shmem/shmem_lock.h

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -22,5 +22,8 @@ void _shmem_set_lock(void *lock, int lock_size);
2222
int _shmem_test_lock(void *lock, int lock_size);
2323
void _shmem_clear_lock(void *lock, int lock_size);
2424

25+
void _shmem_mcs_set_lock(long *lock);
26+
void _shmem_mcs_clear_lock(long *lock);
27+
int _shmem_mcs_test_lock(long *lock);
2528

2629
#endif /*SHMEM_LOCK_H*/

0 commit comments

Comments
 (0)