Skip to content

ALLOC_WITH_HINT: added inplace realloc - v4.0 #6707

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
3 changes: 2 additions & 1 deletion oshmem/mca/spml/ucx/spml_ucx.c
Original file line number Diff line number Diff line change
Expand Up @@ -554,7 +554,8 @@ static int mca_spml_ucx_ctx_create_common(long options, mca_spml_ucx_ctx_t **ucx
{
ucp_worker_params_t params;
ucp_ep_params_t ep_params;
size_t i, j, nprocs = oshmem_num_procs();
size_t i, nprocs = oshmem_num_procs();
int j;
ucs_status_t err;
spml_ucx_mkey_t *ucx_mkey;
sshmem_mkey_t *mkey;
Expand Down
13 changes: 11 additions & 2 deletions oshmem/mca/sshmem/ucx/sshmem_ucx.h
Original file line number Diff line number Diff line change
Expand Up @@ -49,10 +49,19 @@ sshmem_ucx_shadow_allocator_t *sshmem_ucx_shadow_create(unsigned count);
void sshmem_ucx_shadow_destroy(sshmem_ucx_shadow_allocator_t *allocator);
int sshmem_ucx_shadow_alloc(sshmem_ucx_shadow_allocator_t *allocator,
unsigned count, unsigned *index);

/* Reallocate existing allocated buffer. If possible - used inplace
* reallocation.
* Parameter 'inplace' - out, in case if zero - new buffer was allocated
* (inplace is not possible), user should remove original buffer after data
* is copied, else (if inplace == 0) - no additional action required */
int sshmem_ucx_shadow_realloc(sshmem_ucx_shadow_allocator_t *allocator,
unsigned count, unsigned old_index, unsigned *index,
int *inplace);
int sshmem_ucx_shadow_free(sshmem_ucx_shadow_allocator_t *allocator,
unsigned index);
size_t sshmem_ucx_shadow_size(sshmem_ucx_shadow_allocator_t *allocator,
unsigned index);
unsigned sshmem_ucx_shadow_size(sshmem_ucx_shadow_allocator_t *allocator,
unsigned index);

END_C_DECLS

Expand Down
22 changes: 14 additions & 8 deletions oshmem/mca/sshmem/ucx/sshmem_ucx_module.c
Original file line number Diff line number Diff line change
Expand Up @@ -189,7 +189,6 @@ static uct_ib_device_mem_h alloc_device_mem(mca_spml_ucx_t *spml, size_t size,
uct_md_h uct_md;
void *address;
size_t length;
int ret;

uct_md = ucp_context_find_tl_md(spml->ucp_context, "mlx5");
if (uct_md == NULL) {
Expand Down Expand Up @@ -336,7 +335,7 @@ static unsigned sshmem_ucx_memheap_ptr2index(map_segment_t *s, void *ptr)
return ((char*)ptr - (char*)s->super.va_base) / ALLOC_ELEM_SIZE;
}

void sshmem_ucx_memheap_wordcopy(void *dst, void *src, size_t size)
static void sshmem_ucx_memheap_wordcopy(void *dst, void *src, size_t size)
{
const size_t count = (size + sizeof(uint64_t) - 1) / sizeof(uint64_t);
uint64_t *dst64 = (uint64_t*)dst;
Expand All @@ -353,8 +352,9 @@ static int sshmem_ucx_memheap_realloc(map_segment_t *s, size_t size,
void* old_ptr, void** new_ptr)
{
mca_sshmem_ucx_segment_context_t *ctx = s->context;
unsigned alloc_count, index;
unsigned alloc_count, index, old_index, old_alloc_count;
int res;
int inplace;

if (size > s->seg_size) {
return OSHMEM_ERR_OUT_OF_RESOURCE;
Expand All @@ -371,18 +371,24 @@ static int sshmem_ucx_memheap_realloc(map_segment_t *s, size_t size,
/* Allocate new element. Zero-size allocation should still return a unique
* pointer, so allocate 1 byte */
alloc_count = max((size + ALLOC_ELEM_SIZE - 1) / ALLOC_ELEM_SIZE, 1);
res = sshmem_ucx_shadow_alloc(ctx->shadow_allocator, alloc_count, &index);

if (!old_ptr) {
res = sshmem_ucx_shadow_alloc(ctx->shadow_allocator, alloc_count, &index);
} else {
old_index = sshmem_ucx_memheap_ptr2index(s, old_ptr);
res = sshmem_ucx_shadow_realloc(ctx->shadow_allocator, alloc_count,
old_index, &index, &inplace);
}

if (res != OSHMEM_SUCCESS) {
return res;
}

*new_ptr = sshmem_ucx_memheap_index2ptr(s, index);

/* Copy to new segment and release old*/
if (old_ptr) {
unsigned old_index = sshmem_ucx_memheap_ptr2index(s, old_ptr);
unsigned old_alloc_count = sshmem_ucx_shadow_size(ctx->shadow_allocator,
old_index);
if (old_ptr && !inplace) {
old_alloc_count = sshmem_ucx_shadow_size(ctx->shadow_allocator, old_index);
sshmem_ucx_memheap_wordcopy(*new_ptr, old_ptr,
min(size, old_alloc_count * ALLOC_ELEM_SIZE));
sshmem_ucx_shadow_free(ctx->shadow_allocator, old_index);
Expand Down
66 changes: 64 additions & 2 deletions oshmem/mca/sshmem/ucx/sshmem_ucx_shadow.c
Original file line number Diff line number Diff line change
Expand Up @@ -107,6 +107,68 @@ static void sshmem_ucx_shadow_merge_blocks(sshmem_ucx_shadow_allocator_t *alloca
}
}



int sshmem_ucx_shadow_realloc(sshmem_ucx_shadow_allocator_t *allocator,
unsigned count, unsigned old_index, unsigned *index,
int *inplace)
{
sshmem_ucx_shadow_alloc_elem_t *elem = &allocator->elems[old_index];
unsigned old_count = elem->block_size;
sshmem_ucx_shadow_alloc_elem_t *end;
sshmem_ucx_shadow_alloc_elem_t *next;

assert(count > 0);
assert(!sshmem_ucx_shadow_is_free(elem));

*inplace = 1;

if (count == old_count) {
*index = old_index;
return OSHMEM_SUCCESS;
}

if (count < old_count) {
/* requested block is shorter than allocated block
* then just cut current buffer */
sshmem_ucx_shadow_set_elem(elem + count,
SSHMEM_UCX_SHADOW_ELEM_FLAG_FREE,
elem->block_size - count);
elem->block_size = count;
*index = old_index;
sshmem_ucx_shadow_merge_blocks(allocator);
return OSHMEM_SUCCESS;
}

assert(count > old_count);

end = &allocator->elems[allocator->num_elems];
next = &elem[old_count];
/* try to check if next element is free & has enough length */
if ((next < end) && /* non-last element? */
sshmem_ucx_shadow_is_free(next) && /* next is free */
(old_count + next->block_size >= count))
{
assert(elem < next);
assert(elem + count > next);
assert(elem + count <= end);
assert(next + next->block_size <= end);

if (old_count + next->block_size > count) {
sshmem_ucx_shadow_set_elem(elem + count, SSHMEM_UCX_SHADOW_ELEM_FLAG_FREE,
old_count + next->block_size - count);
}

sshmem_ucx_shadow_set_elem(next, 0, 0);
elem->block_size = count;
*index = old_index;
return OSHMEM_SUCCESS;
}

*inplace = 0;
return sshmem_ucx_shadow_alloc(allocator, count, index);
}

int sshmem_ucx_shadow_free(sshmem_ucx_shadow_allocator_t *allocator,
unsigned index)
{
Expand All @@ -117,8 +179,8 @@ int sshmem_ucx_shadow_free(sshmem_ucx_shadow_allocator_t *allocator,
return OSHMEM_SUCCESS;
}

size_t sshmem_ucx_shadow_size(sshmem_ucx_shadow_allocator_t *allocator,
unsigned index)
unsigned sshmem_ucx_shadow_size(sshmem_ucx_shadow_allocator_t *allocator,
unsigned index)
{
sshmem_ucx_shadow_alloc_elem_t *elem = &allocator->elems[index];

Expand Down