Open
Description
Background information
Running the full mpi4py test suite in the Fedora package getting what looks like a new test failure.
What version of Open MPI are you using? (e.g., v3.0.5, v4.0.2, git branch name and hash, etc.)
openmpi-4.1.2-0.1.rc1.fc36.x86_64
Describe how Open MPI was installed (e.g., from a source/distribution tarball, from a git clone, from an operating system distribution package, etc.)
Fedora package
Please describe the system on which you are running
Fedora rawhide
x86_64 VM
Details of the problem
mpiexec -np 1 gdb python3
(gdb) run test/runtests.py -v --no-builddir --no-threads -k test_win.TestWinCreateDynamicWorld
[[email protected]] Python 3.10.0 (/usr/bin/python3)
[[email protected]] numpy 1.21.1 (/usr/lib64/python3.10/site-packages/numpy)
[[email protected]] MPI 3.1 (Open MPI 4.1.2)
[[email protected]] mpi4py 4.0.0.dev0 (/home/orion/rpmbuild/BUILDROOT/mpi4py-3.1.2-1.fc36.x86_64/usr/lib64/python3.10/site-packages/openmpi/mpi4py)
testAttachDetach (test_win.TestWinCreateDynamicWorld) ...
Thread 1 "python3" received signal SIGSEGV, Segmentation fault.
0x00007ffff4894c22 in ucp_mem_unmap_common (context=context@entry=0x555555c89580, memh=memh@entry=0x0) at core/ucp_mm.c:359
359 alloc_md_memh = UCT_MEM_HANDLE_NULL;
Missing separate debuginfos, use: dnf debuginfo-install python3-3.10.0-4.fc36.x86_64
(gdb) (gdb) list
354 ucs_status_t status;
355
356 ucs_debug("unmapping buffer %p memh %p", memh->address, memh);
357
358 /* Unregister from all memory domains */
359 alloc_md_memh = UCT_MEM_HANDLE_NULL;
360 status = ucp_mem_rereg_mds(context, 0, NULL, 0, 0, memh->alloc_md, memh->mem_type,
361 &alloc_md_memh, memh->uct, &memh->md_map);
362 if (status != UCS_OK) {
363 goto out;
(gdb) print memh
$1 = (ucp_mem_h) 0x0
(gdb) up
#1 0x00007ffff4894d66 in ucp_mem_unmap (context=0x555555c89580, memh=0x0) at core/ucp_mm.c:486
486 status = ucp_mem_unmap_common(context, memh);
(gdb) up
#2 0x00007ffff46fc707 in ompi_osc_ucx_free (win=<optimized out>) at /usr/src/debug/openmpi-4.1.2-0.1.rc1.fc36.x86_64/ompi/mca/osc/ucx/osc_ucx_component.c:881
881 ucp_mem_unmap(mca_osc_ucx_component.ucp_context,
(gdb) list
876 module->comm->c_coll->coll_barrier_module);
877
878 /* MPI_Win_free should detach any memory attached to dynamic windows */
879 for (idx = 0; idx < module->state.dynamic_win_count; idx++) {
880 assert(module->local_dynamic_win_info[idx].refcnt == 1);
881 ucp_mem_unmap(mca_osc_ucx_component.ucp_context,
882 module->local_dynamic_win_info[idx].memh);
883 }
884 module->state.dynamic_win_count = 0;
885
(gdb) print mca_osc_ucx_component
$2 = {super = {osc_version = {mca_major_version = 2, mca_minor_version = 1, mca_release_version = 0, mca_project_name = "ompi", '\000' <repeats 11 times>, mca_project_major_version = 4, mca_project_minor_version = 1,
mca_project_release_version = 2, mca_type_name = "osc", '\000' <repeats 28 times>, mca_type_major_version = 3, mca_type_minor_version = 0, mca_type_release_version = 0, mca_component_name = "ucx", '\000' <repeats 60 times>,
mca_component_major_version = 4, mca_component_minor_version = 1, mca_component_release_version = 2, mca_open_component = 0x7ffff46f9b30 <component_open>, mca_close_component = 0x0, mca_query_component = 0x0,
mca_register_component_params = 0x7ffff46fa750 <component_register>, mca_component_flags = 0, reserved = '\000' <repeats 27 times>}, osc_data = {param_field = 0, reserved = '\000' <repeats 31 times>},
osc_init = 0x7ffff46fa830 <component_init>, osc_query = 0x7ffff46f9b40 <component_query>, osc_select = 0x7ffff46faa50 <component_select>, osc_finalize = 0x7ffff46fa860 <component_finalize>}, ucp_context = 0x555555c89580,
ucp_worker = 0x555555cd4290, enable_mpi_threads = false, requests = {super = {super = {obj_class = 0x7ffff6c18d20 <opal_free_list_t_class>, obj_reference_count = 1}, opal_lifo_head = {data = {counter = 0, item = 0x555555c60c00},
value = 1730765755773919950552821535866880}, opal_lifo_ghost = {super = {obj_class = 0x7ffff6c18d60 <opal_list_item_t_class>, obj_reference_count = 1}, opal_list_next = 0x7ffff47119a0 <mca_osc_ucx_component+352>,
opal_list_prev = 0x0, item_free = 1}}, fl_max_to_alloc = 0, fl_num_allocated = 8, fl_num_per_alloc = 8, fl_num_waiting = 0, fl_frag_size = 160, fl_frag_alignment = 128, fl_payload_buffer_size = 0,
fl_payload_buffer_alignment = 0, fl_frag_class = 0x7ffff47117c0 <ompi_osc_ucx_request_t_class>, fl_mpool = 0x7ffff6c19500 <mca_mpool_malloc_module>, fl_rcache = 0x0, fl_lock = {super = {
obj_class = 0x7ffff6c190e0 <opal_mutex_t_class>, obj_reference_count = 1}, m_lock_pthread = {__data = {__lock = 0, __count = 0, __owner = 0, __nusers = 0, __kind = 0, __spins = 0, __elision = 0, __list = {__prev = 0x0,
__next = 0x0}}, __size = '\000' <repeats 39 times>, __align = 0}, m_lock_atomic = {u = {lock = 0, sparc_lock = 0 '\000', padding = "\000\000\000"}}}, fl_condition = {super = {
obj_class = 0x7ffff6c190a0 <opal_condition_t_class>, obj_reference_count = 1}, c_waiting = 0, c_signaled = 0}, fl_allocations = {super = {obj_class = 0x7ffff6c19020 <opal_list_t_class>, obj_reference_count = 1},
opal_list_sentinel = {super = {obj_class = 0x0, obj_reference_count = 0}, opal_list_next = 0x555555c604c0, opal_list_prev = 0x555555c604c0, item_free = 0}, opal_list_length = 1}, fl_rcache_reg_flags = 17, item_init = 0x0,
ctx = 0x0}, env_initialized = true, num_incomplete_req_ops = 0, num_modules = 1, priority = 60}
(gdb) print module
$3 = (ompi_osc_ucx_module_t *) 0x555555e0e250
(gdb) print *module
$4 = {super = {osc_win_shared_query = 0x0, osc_win_attach = 0x7ffff4703c20 <ompi_osc_ucx_win_attach>, osc_win_detach = 0x7ffff4703f60 <ompi_osc_ucx_win_detach>, osc_free = 0x7ffff46fc5f0 <ompi_osc_ucx_free>,
osc_put = 0x7ffff4700000 <ompi_osc_ucx_put>, osc_get = 0x7ffff4700410 <ompi_osc_ucx_get>, osc_accumulate = 0x7ffff4700820 <ompi_osc_ucx_accumulate>, osc_compare_and_swap = 0x7ffff4702640 <ompi_osc_ucx_compare_and_swap>,
osc_fetch_and_op = 0x7ffff4702ab0 <ompi_osc_ucx_fetch_and_op>, osc_get_accumulate = 0x7ffff4701700 <ompi_osc_ucx_get_accumulate>, osc_rput = 0x7ffff4702fc0 <ompi_osc_ucx_rput>, osc_rget = 0x7ffff47035f0 <ompi_osc_ucx_rget>,
osc_raccumulate = 0x7ffff4701330 <ompi_osc_ucx_raccumulate>, osc_rget_accumulate = 0x7ffff4702260 <ompi_osc_ucx_rget_accumulate>, osc_fence = 0x7ffff46fc970 <ompi_osc_ucx_fence>, osc_start = 0x7ffff46fce00 <ompi_osc_ucx_start>,
osc_complete = 0x7ffff46fd220 <ompi_osc_ucx_complete>, osc_post = 0x7ffff46fde90 <ompi_osc_ucx_post>, osc_wait = 0x7ffff46fa110 <ompi_osc_ucx_wait>, osc_test = 0x7ffff46fa210 <ompi_osc_ucx_test>,
osc_lock = 0x7ffff46fe500 <ompi_osc_ucx_lock>, osc_unlock = 0x7ffff46fec50 <ompi_osc_ucx_unlock>, osc_lock_all = 0x7ffff46fd730 <ompi_osc_ucx_lock_all>, osc_unlock_all = 0x7ffff46fdbd0 <ompi_osc_ucx_unlock_all>,
osc_sync = 0x7ffff46fa580 <ompi_osc_ucx_sync>, osc_flush = 0x7ffff46fa340 <ompi_osc_ucx_flush>, osc_flush_all = 0x7ffff46fcb20 <ompi_osc_ucx_flush_all>, osc_flush_local = 0x7ffff46fa570 <ompi_osc_ucx_flush_local>,
osc_flush_local_all = 0x7ffff46fccb0 <ompi_osc_ucx_flush_local_all>}, comm = 0x5555558966c0, memh = 0x0, flavor = 3, size = 0, state_memh = 0x555555d2fd40, win_info_array = 0x555555d27130, state_info_array = 0x555555df1db0,
disp_unit = 1, disp_units = 0x0, state = {lock = 0, req_flag = 0, acc_lock = 0, complete_count = 0, post_index = 0, post_state = {0 <repeats 32 times>}, dynamic_win_count = 4, dynamic_wins = {{base = 93824995651200, size = 32,
rkey_buffer = "\003", '\000' <repeats 1022 times>}, {base = 93825000933552, size = 16, rkey_buffer = "\003", '\000' <repeats 1022 times>}, {base = 0, size = 0, rkey_buffer = '\000' <repeats 1023 times>}, {
base = 93824995651200, size = 32, rkey_buffer = "\003", '\000' <repeats 1022 times>}, {base = 93825000933552, size = 16, rkey_buffer = "\003", '\000' <repeats 1022 times>}, {base = 0, size = 0,
rkey_buffer = '\000' <repeats 1023 times>} <repeats 27 times>}}, local_dynamic_win_info = {{memh = 0x5555558964b0, refcnt = 2}, {memh = 0x555555896500, refcnt = 1}, {memh = 0x0, refcnt = 0}, {memh = 0x555555d47bb0,
refcnt = 2}, {memh = 0x5555558963e0, refcnt = 1}, {memh = 0x0, refcnt = 0} <repeats 27 times>}, epoch_type = {access = NONE_EPOCH, exposure = NONE_EPOCH}, start_group = 0x0, post_group = 0x0, outstanding_locks = {super = {
obj_class = 0x7ffff6c18fe0 <opal_hash_table_t_class>, obj_reference_count = 1}, ht_table = 0x555555e17230, ht_capacity = 31, ht_size = 0, ht_growth_trigger = 15, ht_density_numer = 1, ht_density_denom = 2, ht_growth_numer = 2,
ht_growth_denom = 1, ht_type_methods = 0x0}, pending_posts = {super = {obj_class = 0x7ffff6c19020 <opal_list_t_class>, obj_reference_count = 1}, opal_list_sentinel = {super = {obj_class = 0x0, obj_reference_count = 0},
opal_list_next = 0x555555e16920, opal_list_prev = 0x555555e16920, item_free = 0}, opal_list_length = 0}, lock_count = 0, post_count = 0, global_ops_num = 0, per_target_ops_nums = 0x555555e17210, req_result = 0,
start_grp_ranks = 0x0, lock_all_is_nocheck = false}
(gdb) print idx
$5 = 2
(gdb) print module->local_dynamic_win_info[2]
$6 = {memh = 0x0, refcnt = 0}
Steps to Reproduce
This is on fedora rawhide with openmpi 4.1.2-0.1.rc1 and ucx-1.11.2-1 and building mpi4py 3.1.2
Setup and versions
- OS version (e.g Linux distro) + CPU architecture (x86_64/aarch64/ppc64le/...)
cat /etc/issue
orcat /etc/redhat-release
+uname -a
Linux vmrawhide-rufous.cora.nwra.com 5.16.0-0.rc0.20211112git5833291ab6de.12.fc36.x86_64 #1 SMP PREEMPT Fri Nov 12 14:14:47 UTC 2021 x86_64 x86_64 x86_64 GNU/Linux
Additional information (depending on the issue)
$ ucx_info -d
#
# Memory domain: tcp
# Component: tcp
# register: unlimited, cost: 0 nsec
# remote key: 0 bytes
#
# Transport: tcp
# Device: lo
# System device: <unknown>
#
# capabilities:
# bandwidth: 11.91/ppn + 0.00 MB/sec
# latency: 10960 nsec
# overhead: 50000 nsec
# put_zcopy: <= 18446744073709551590, up to 6 iov
# put_opt_zcopy_align: <= 1
# put_align_mtu: <= 0
# am_short: <= 8K
# am_bcopy: <= 8K
# am_zcopy: <= 64K, up to 6 iov
# am_opt_zcopy_align: <= 1
# am_align_mtu: <= 0
# am header: <= 8037
# connection: to ep, to iface
# device priority: 1
# device num paths: 1
# max eps: 256
# device address: 18 bytes
# iface address: 2 bytes
# ep address: 10 bytes
# error handling: peer failure, ep_check, keepalive
#
# Transport: tcp
# Device: ens3
# System device: <unknown>
#
# capabilities:
# bandwidth: 11.32/ppn + 0.00 MB/sec
# latency: 10960 nsec
# overhead: 50000 nsec
# put_zcopy: <= 18446744073709551590, up to 6 iov
# put_opt_zcopy_align: <= 1
# put_align_mtu: <= 0
# am_short: <= 8K
# am_bcopy: <= 8K
# am_zcopy: <= 64K, up to 6 iov
# am_opt_zcopy_align: <= 1
# am_align_mtu: <= 0
# am header: <= 8037
# connection: to ep, to iface
# device priority: 0
# device num paths: 1
# max eps: 256
# device address: 6 bytes
# iface address: 2 bytes
# ep address: 10 bytes
# error handling: peer failure, ep_check, keepalive
#
#
# Connection manager: tcp
# max_conn_priv: 2064 bytes
#
# Memory domain: self
# Component: self
# register: unlimited, cost: 0 nsec
# remote key: 0 bytes
#
# Transport: self
# Device: memory0
# System device: <unknown>
#
# capabilities:
# bandwidth: 0.00/ppn + 6911.00 MB/sec
# latency: 0 nsec
# overhead: 10 nsec
# put_short: <= 4294967295
# put_bcopy: unlimited
# get_bcopy: unlimited
# am_short: <= 8K
# am_bcopy: <= 8K
# domain: cpu
# atomic_add: 32, 64 bit
# atomic_and: 32, 64 bit
# atomic_or: 32, 64 bit
# atomic_xor: 32, 64 bit
# atomic_fadd: 32, 64 bit
# atomic_fand: 32, 64 bit
# atomic_for: 32, 64 bit
# atomic_fxor: 32, 64 bit
# atomic_swap: 32, 64 bit
# atomic_cswap: 32, 64 bit
# connection: to iface
# device priority: 0
# device num paths: 1
# max eps: inf
# device address: 0 bytes
# iface address: 8 bytes
# error handling: ep_check
#
#
# Memory domain: sysv
# Component: sysv
# allocate: unlimited
# remote key: 12 bytes
# rkey_ptr is supported
#
# Transport: sysv
# Device: memory
# System device: <unknown>
#
# capabilities:
# bandwidth: 0.00/ppn + 12179.00 MB/sec
# latency: 80 nsec
# overhead: 10 nsec
# put_short: <= 4294967295
# put_bcopy: unlimited
# get_bcopy: unlimited
# am_short: <= 100
# am_bcopy: <= 8256
# domain: cpu
# atomic_add: 32, 64 bit
# atomic_and: 32, 64 bit
# atomic_or: 32, 64 bit
# atomic_xor: 32, 64 bit
# atomic_fadd: 32, 64 bit
# atomic_fand: 32, 64 bit
# atomic_for: 32, 64 bit
# atomic_fxor: 32, 64 bit
# atomic_swap: 32, 64 bit
# atomic_cswap: 32, 64 bit
# connection: to iface
# device priority: 0
# device num paths: 1
# max eps: inf
# device address: 8 bytes
# iface address: 8 bytes
# error handling: ep_check
#
#
# Memory domain: posix
# Component: posix
# allocate: unlimited
# remote key: 24 bytes
# rkey_ptr is supported
#
# Transport: posix
# Device: memory
# System device: <unknown>
#
# capabilities:
# bandwidth: 0.00/ppn + 12179.00 MB/sec
# latency: 80 nsec
# overhead: 10 nsec
# put_short: <= 4294967295
# put_bcopy: unlimited
# get_bcopy: unlimited
# am_short: <= 100
# am_bcopy: <= 8256
# domain: cpu
# atomic_add: 32, 64 bit
# atomic_and: 32, 64 bit
# atomic_or: 32, 64 bit
# atomic_xor: 32, 64 bit
# atomic_fadd: 32, 64 bit
# atomic_fand: 32, 64 bit
# atomic_for: 32, 64 bit
# atomic_fxor: 32, 64 bit
# atomic_swap: 32, 64 bit
# atomic_cswap: 32, 64 bit
# connection: to iface
# device priority: 0
# device num paths: 1
# max eps: inf
# device address: 8 bytes
# iface address: 8 bytes
# error handling: ep_check
#
valgrind doesn't report any other errors before this crash.
Reported to UCX here: openucx/ucx#7674 but this may be more of an open-mpi issue. Not really sure.
Test code looks to be:
class BaseTestWinCreateDynamic(BaseTestWin):
CREATE_FLAVOR = MPI.WIN_FLAVOR_DYNAMIC
def setUp(self):
self.WIN = MPI.Win.Create_dynamic(self.INFO, self.COMM)
def tearDown(self):
self.WIN.Free()
def testGetAttr(self):
base = self.WIN.Get_attr(MPI.WIN_BASE)
size = self.WIN.Get_attr(MPI.WIN_SIZE)
self.assertEqual(base, 0)
self.assertEqual(size, 0)
def testMemory(self):
memory = self.WIN.tomemory()
base = MPI.Get_address(memory)
size = len(memory)
self.assertEqual(base, 0)
self.assertEqual(size, 0)
def testAttributes(self):
base, size, _ = self.WIN.attrs
self.assertEqual(base, 0)
self.assertEqual(size, 0)
@unittest.skipMPI('msmpi(<9.1.0)')
def testAttachDetach(self):
mem1 = MPI.Alloc_mem(8)
mem2 = MPI.Alloc_mem(16)
mem3 = MPI.Alloc_mem(32)
for mem in (mem1, mem2, mem3):
self.WIN.Attach(mem)
self.testMemory()
self.WIN.Detach(mem)
for mem in (mem1, mem2, mem3):
self.WIN.Attach(mem)
self.testMemory()
for mem in (mem1, mem2, mem3):
self.WIN.Detach(mem)
for mem in (mem1, mem2, mem3):
self.WIN.Attach(mem)
self.testMemory()
for mem in (mem3, mem2, mem1):
self.WIN.Detach(mem)
MPI.Free_mem(mem1)
MPI.Free_mem(mem2)
MPI.Free_mem(mem3)
with COMM = MPI.COMM_WORLD