Skip to content

Commit 78ac92a

Browse files
committed
OSHMEM/MCA/SPML/UCX: added support for team management functions
Signed-off-by: Roie Danino <[email protected]>
1 parent 120b71b commit 78ac92a

File tree

2 files changed

+180
-13
lines changed

2 files changed

+180
-13
lines changed

oshmem/mca/spml/ucx/spml_ucx.c

Lines changed: 145 additions & 13 deletions
Original file line numberDiff line numberDiff line change
@@ -1163,6 +1163,8 @@ int mca_spml_ucx_ctx_create(long options, shmem_ctx_t *ctx)
11631163
}
11641164
SHMEM_MUTEX_UNLOCK(mca_spml_ucx.internal_mutex);
11651165
}
1166+
1167+
mca_spml_ucx_team_world_init();
11661168

11671169
(*ctx) = (shmem_ctx_t)ucx_ctx;
11681170
return OSHMEM_SUCCESS;
@@ -1180,6 +1182,9 @@ void mca_spml_ucx_ctx_destroy(shmem_ctx_t ctx)
11801182
if (!mca_spml_ucx.active_array.ctxs_count) {
11811183
opal_progress_unregister(spml_ucx_ctx_progress);
11821184
}
1185+
1186+
mca_spml_ucx_team_world_destroy();
1187+
11831188
SHMEM_MUTEX_UNLOCK(mca_spml_ucx.internal_mutex);
11841189
}
11851190

@@ -1749,59 +1754,186 @@ size_t mca_spml_ucx_test_some_vector(void *ivars, int cmp,
17491754
RUNTIME_SHMEM_NOT_IMPLEMENTED_API_ABORT_RET_SIZE_T();
17501755
}
17511756

1757+
void mca_spml_ucx_team_world_init()
1758+
{
1759+
int rc = mca_spml_ucx_team_split_strided(NULL, 0, 1, oshmem_num_procs(), NULL, 0,
1760+
&SHMEM_TEAM_WORLD);
1761+
1762+
if (rc != OSHMEM_SUCCESS) {
1763+
SPML_UCX_ERROR("mca_spml_ucx_team_split_strided failed (SHMEM_TEAM_WORLD creation)");
1764+
oshmem_shmem_abort(-1);
1765+
}
1766+
}
1767+
1768+
void mca_spml_ucx_team_world_destroy()
1769+
{
1770+
if (SHMEM_TEAM_WORLD != NULL) {
1771+
mca_spml_ucx_team_destroy(SHMEM_TEAM_WORLD);
1772+
SHMEM_TEAM_WORLD = NULL;
1773+
}
1774+
}
1775+
17521776
/* This routine is not implemented */
17531777
int mca_spml_ucx_team_sync(shmem_team_t team)
17541778
{
17551779
return OSHMEM_ERR_NOT_IMPLEMENTED;
17561780
}
17571781

1758-
/* This routine is not implemented */
17591782
int mca_spml_ucx_team_my_pe(shmem_team_t team)
17601783
{
1761-
return OSHMEM_ERR_NOT_IMPLEMENTED;
1784+
mca_spml_ucx_team_t *ucx_team = (mca_spml_ucx_team_t *)team;
1785+
SPML_UCX_VALIDATE_TEAM(team);
1786+
1787+
return ucx_team->my_pe;
17621788
}
17631789

1764-
/* This routine is not implemented */
17651790
int mca_spml_ucx_team_n_pes(shmem_team_t team)
17661791
{
1767-
return OSHMEM_ERR_NOT_IMPLEMENTED;
1792+
mca_spml_ucx_team_t *ucx_team = (mca_spml_ucx_team_t *)team;
1793+
SPML_UCX_VALIDATE_TEAM(team);
1794+
1795+
return ucx_team->n_pes;
17681796
}
17691797

1770-
/* This routine is not implemented */
17711798
int mca_spml_ucx_team_get_config(shmem_team_t team, long config_mask,
17721799
shmem_team_config_t *config)
17731800
{
1774-
return OSHMEM_ERR_NOT_IMPLEMENTED;
1801+
mca_spml_ucx_team_t *ucx_team = (mca_spml_ucx_team_t *)team;
1802+
SPML_UCX_VALIDATE_TEAM(team);
1803+
1804+
memcpy(config, &ucx_team->config, sizeof(shmem_team_config_t));
1805+
1806+
return SHMEM_SUCCESS;
1807+
}
1808+
1809+
static inline int mca_spml_ucx_is_pe_in_strided_team(int src_pe, int start,
1810+
int stride, int size)
1811+
{
1812+
return (src_pe >= start) && (src_pe < start + size * stride)
1813+
&& ((src_pe - start) % stride == 0);
17751814
}
17761815

1777-
/* This routine is not implemented */
17781816
int mca_spml_ucx_team_translate_pe(shmem_team_t src_team, int src_pe,
17791817
shmem_team_t dest_team)
17801818
{
1781-
return OSHMEM_ERR_NOT_IMPLEMENTED;
1819+
mca_spml_ucx_team_t *ucx_src_team = (mca_spml_ucx_team_t*) src_team;
1820+
mca_spml_ucx_team_t *ucx_dest_team = (mca_spml_ucx_team_t*) dest_team;
1821+
int global_pe;
1822+
1823+
if (src_pe == SPML_UCX_PE_NOT_IN_TEAM || (src_team == dest_team)) {
1824+
return src_pe;
1825+
}
1826+
1827+
if (src_team == dest_team) {
1828+
return src_pe;
1829+
}
1830+
1831+
global_pe = ucx_src_team->start + src_pe * ucx_src_team->stride;
1832+
1833+
if (dest_team == SHMEM_TEAM_WORLD) {
1834+
return global_pe;
1835+
}
1836+
1837+
if (!mca_spml_ucx_is_pe_in_strided_team(global_pe, ucx_dest_team->start, ucx_dest_team->stride,
1838+
ucx_dest_team->n_pes)) {
1839+
return SPML_UCX_PE_NOT_IN_TEAM;
1840+
}
1841+
1842+
return (global_pe - ucx_dest_team->start) / ucx_dest_team->stride;
17821843
}
17831844

1784-
/* This routine is not implemented */
17851845
int mca_spml_ucx_team_split_strided(shmem_team_t parent_team, int start, int
17861846
stride, int size, const shmem_team_config_t *config, long config_mask,
17871847
shmem_team_t *new_team)
17881848
{
1789-
return OSHMEM_ERR_NOT_IMPLEMENTED;
1849+
mca_spml_ucx_team_t *ucx_parent_team;
1850+
mca_spml_ucx_team_t *ucx_new_team;
1851+
int my_pe;
1852+
int n_pes;
1853+
1854+
SPML_UCX_ASSERT(((start + size * stride) <= oshmem_num_procs()) && (start < size) && (stride > 0) && (size > 0));
1855+
1856+
if (parent_team == NULL) {
1857+
my_pe = shmem_my_pe();
1858+
} else {
1859+
ucx_parent_team = (mca_spml_ucx_team_t*) parent_team;
1860+
SPML_UCX_VALIDATE_TEAM(parent_team);
1861+
if (mca_spml_ucx_is_pe_in_strided_team(ucx_parent_team->my_pe, start, stride, size)) {
1862+
my_pe = (ucx_parent_team->my_pe - start) / stride;
1863+
} else {
1864+
/* not in team, according to spec it should be SHMEM_TEAM_INVALID but its value is NULL which
1865+
can be also interpreted as 0 (first pe), therefore -1 is used */
1866+
my_pe = SPML_UCX_PE_NOT_IN_TEAM;
1867+
}
1868+
}
1869+
1870+
ucx_new_team = (mca_spml_ucx_team_t *)malloc(sizeof(mca_spml_ucx_team_t));
1871+
ucx_new_team->n_pes = size;
1872+
ucx_new_team->my_pe = my_pe;
1873+
ucx_new_team->team_type = MCA_SPML_UCX_TEAM_TYPE_STRIDED;
1874+
1875+
/* In order to simplify pe translations start and stride are calculated with respect to
1876+
* world_team */
1877+
ucx_new_team->start = ucx_parent_team->start + start;
1878+
ucx_new_team->stride = ucx_parent_team->stride * stride;
1879+
ucx_new_team->config = calloc(1, sizeof(mca_spml_ucx_team_config_t));
1880+
1881+
if (config != NULL) {
1882+
memcpy(&ucx_new_team->config.super, config, sizeof(shmem_team_config_t));
1883+
}
1884+
1885+
ucx_new_team->config = config;
1886+
ucx_new_team->parent = parent_team;
1887+
1888+
*new_team = ucx_new_team
1889+
1890+
return OSHMEM_SUCCESS;
17901891
}
17911892

1792-
/* This routine is not implemented */
17931893
int mca_spml_ucx_team_split_2d(shmem_team_t parent_team, int xrange, const
17941894
shmem_team_config_t *xaxis_config, long xaxis_mask, shmem_team_t
17951895
*xaxis_team, const shmem_team_config_t *yaxis_config, long yaxis_mask,
17961896
shmem_team_t *yaxis_team)
17971897
{
1798-
return OSHMEM_ERR_NOT_IMPLEMENTED;
1898+
mca_spml_ucx_team_t *ucx_parent_team = (mca_spml_ucx_team_t*) parent_team;
1899+
int yrange = ucx_parent_team->n_pes / xrange;
1900+
int pe_x = ucx_parent_team->my_pe % xrange;
1901+
int pe_y = ucx_parent_team->my_pe / xrange;
1902+
int rc;
1903+
1904+
/* Create x-team of my_pe */
1905+
rc = mca_spml_ucx_team_split_strided(parent_team, pe_y * xrange, 1, xrange, xaxis_config, xaxis_mask, xaxis_team);
1906+
1907+
if (rc != OSHMEM_SUCCESS) {
1908+
SPML_UCX_ERROR("mca_spml_ucx_team_split_strided failed (x-axis team creation)");
1909+
return rc;
1910+
}
1911+
1912+
/* Create y-team of my_pe */
1913+
rc = mca_spml_ucx_team_split_strided(parent_team, pe_x, xrange, yrange, yaxis_config, yaxis_mask, yaxis_team);
1914+
if (rc != OSHMEM_SUCCESS) {
1915+
SPML_UCX_ERROR("mca_spml_ucx_team_split_strided failed (y-axis team creation)");
1916+
goto out_free_xaxis;
1917+
}
1918+
1919+
return OSHMEM_SUCCESS;
1920+
1921+
out_free_xaxis:
1922+
mca_spml_ucx_team_destroy(*xaxis_team);
1923+
return rc;
17991924
}
18001925

18011926
/* This routine is not implemented */
18021927
int mca_spml_ucx_team_destroy(shmem_team_t team)
18031928
{
1804-
return OSHMEM_ERR_NOT_IMPLEMENTED;
1929+
mca_spml_ucx_team_t *ucx_team = (mca_spml_ucx_team_t *)team;
1930+
1931+
SPML_UCX_VALIDATE_TEAM(team);
1932+
1933+
free(ucx_team->config);
1934+
free(team);
1935+
1936+
return OSHMEM_SUCCESS;
18051937
}
18061938

18071939
/* This routine is not implemented */

oshmem/mca/spml/ucx/spml_ucx.h

Lines changed: 35 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -48,6 +48,15 @@ BEGIN_C_DECLS
4848
#define SPML_UCX_TRANSP_IDX 0
4949
#define SPML_UCX_TRANSP_CNT 1
5050
#define SPML_UCX_SERVICE_SEG 0
51+
#define SPML_UCX_PE_NOT_IN_TEAM -1
52+
53+
#define SPML_UCX_VALIDATE_TEAM(_team) \
54+
do { \
55+
if (OPAL_UNLIKELY((_team) == SHMEM_TEAM_INVALID)) { \
56+
SPML_UCX_ERROR("Invalid team at %s", __func__); \
57+
return OSHMEM_ERROR; \
58+
} \
59+
} while (0)
5160

5261
enum {
5362
SPML_UCX_STRONG_ORDERING_NONE = 0, /* don't use strong ordering */
@@ -115,6 +124,29 @@ typedef struct mca_spml_ucx_ctx_array {
115124
mca_spml_ucx_ctx_t **ctxs;
116125
} mca_spml_ucx_ctx_array_t;
117126

127+
typedef struct mca_spml_ucx_team_config {
128+
shmem_team_config_t super;
129+
130+
} mca_spml_ucx_team_config_t;
131+
132+
typedef enum {
133+
MCA_SPML_UCX_TEAM_TYPE_STRIDED,
134+
MCA_SPML_UCX_TEAM_TYPE_2D_X,
135+
MCA_SPML_UCX_TEAM_TYPE_2D_Y,
136+
MCA_SPML_UCX_TEAM_TYPE_LAST = MCA_SPML_UCX_TEAM_TYPE_2D_Y
137+
} mca_spml_ucx_team_type_t;
138+
139+
typedef struct mca_spml_ucx_team {
140+
shmem_team_t super;
141+
mca_spml_ucx_team_type_t team_type;
142+
int n_pes;
143+
int my_pe;
144+
int stride;
145+
int start;
146+
mca_spml_ucx_team_config_t *config;
147+
mca_spml_ucx_team_t *parent_team;
148+
} mca_spml_ucx_team_t;
149+
118150
struct mca_spml_ucx {
119151
mca_spml_base_module_t super;
120152
ucp_context_h ucp_context;
@@ -300,6 +332,9 @@ mca_spml_ucx_mem_map_flags_symmetric_rkey(struct mca_spml_ucx *spml_ucx);
300332
extern void mca_spml_ucx_rkey_store_init(mca_spml_ucx_rkey_store_t *store);
301333
extern void mca_spml_ucx_rkey_store_cleanup(mca_spml_ucx_rkey_store_t *store);
302334

335+
void mca_spml_ucx_team_world_init();
336+
void mca_spml_ucx_team_world_destroy();
337+
303338
static inline int
304339
mca_spml_ucx_peer_mkey_get(ucp_peer_t *ucp_peer, int index, spml_ucx_cached_mkey_t **out_rmkey)
305340
{

0 commit comments

Comments
 (0)