diff --git a/.github/workflows/ci.yml b/.github/workflows/ci-ubuntu-22.04.yml similarity index 61% rename from .github/workflows/ci.yml rename to .github/workflows/ci-ubuntu-22.04.yml index 835133a0..650fe0df 100644 --- a/.github/workflows/ci.yml +++ b/.github/workflows/ci-ubuntu-22.04.yml @@ -1,12 +1,16 @@ -name: CI +name: Ubuntu 22.04 CI on: push jobs: Build: - runs-on: ubuntu-latest + runs-on: ubuntu-22.04 strategy: fail-fast: true + matrix: + mpi: [ 'mpich', 'impi' ] +# mpi: [ 'mpich', 'openmpi', 'impi' ] +# openmpi is borken on ubuntu-22.04 and above see https://github.com/open-mpi/ompi/issues/10726 env: FC: gfortran @@ -18,11 +22,17 @@ jobs: - name: Install Dependencies run: | - sudo apt install -y gfortran-${GCC_V} cmake mpich + sudo apt install -y gfortran-${GCC_V} cmake sudo update-alternatives --install /usr/bin/gfortran gfortran /usr/bin/gfortran-${GCC_V} 100 + - name: Setup MPI ${{ matrix.mpi }} + uses: mpi4py/setup-mpi@v1 + with: + mpi: ${{ matrix.mpi }} + - name: Build and Test run: | + mpirun --version mkdir build cmake -S . -B build -Wdev -DCMAKE_INSTALL_PREFIX:PATH="${HOME}/OpenCoarrays" -DCMAKE_BUILD_TYPE:STRING="Debug" .. cmake --build build -j $(nproc) @@ -30,3 +40,4 @@ jobs: ctest --test-dir build --output-on-failure --schedule-random --repeat-until-fail 1 --timeout 200 cd build make uninstall + echo "Ran with mpi: ${{ matrix.mpi }}" diff --git a/.github/workflows/ci-ubuntu-latest.yml b/.github/workflows/ci-ubuntu-latest.yml new file mode 100644 index 00000000..fafb0dd4 --- /dev/null +++ b/.github/workflows/ci-ubuntu-latest.yml @@ -0,0 +1,45 @@ +name: Ubuntu latest CI + +on: push + +jobs: + Build: + runs-on: ubuntu-latest + strategy: + fail-fast: true + matrix: + mpi: [ 'impi' ] +# mpi: [ 'mpich', 'openmpi', 'impi' ] +# mpich is borken on ubuntu-24.04LTS and 24.10 see https://bugs.launchpad.net/ubuntu/+source/mpich/+bug/2072338 +# openmpi is borken on ubuntu-24.04LTS and 24.10 see https://github.com/open-mpi/ompi/issues/10726 +# ubuntu 24.10 is not available as github runner, so we are left with Intel's mpi for now. + + env: + FC: gfortran + GCC_V: 13 + + steps: + - name: Checkout code + uses: actions/checkout@v3 + + - name: Install Dependencies + run: | + sudo apt install -y gfortran-${GCC_V} cmake + sudo update-alternatives --install /usr/bin/gfortran gfortran /usr/bin/gfortran-${GCC_V} 100 + + - name: Setup MPI ${{ matrix.mpi }} + uses: mpi4py/setup-mpi@v1 + with: + mpi: ${{ matrix.mpi }} + + - name: Build and Test + run: | + mpirun --version + mkdir build + cmake -S . -B build -Wdev -DCMAKE_INSTALL_PREFIX:PATH="${HOME}/OpenCoarrays" -DCMAKE_BUILD_TYPE:STRING="Debug" .. + cmake --build build -j $(nproc) + cmake --build build -t install -j $(nproc) || echo "installation failed" + ctest --test-dir build --output-on-failure --schedule-random --repeat-until-fail 1 --timeout 200 + cd build + make uninstall + echo "Ran with mpi: ${{ matrix.mpi }}" diff --git a/.github/workflows/win-ci.yml b/.github/workflows/win-ci.yml index 1741e922..07ca3d9a 100644 --- a/.github/workflows/win-ci.yml +++ b/.github/workflows/win-ci.yml @@ -42,11 +42,23 @@ jobs: ls "${I_MPI_ROOT}/bin" ls "${I_MPI_ROOT}" ls "${I_MPI_ROOT}/lib" - mpifc.bat -show - mpicc.bat -show + ls "${I_MPI_ROOT}/env" + # cat "${I_MPI_ROOT}/bin/mpifc.bat" + # mpifc.bat -show + # mpicc.bat -show mpifc.bat -version || echo "ifort not installed" mpicc.bat -version || echo "icc not installed" set +o verbose + # echo The following environment variables are used: + # echo CMPLR_ROOT Intel^(R^) Compiler installation directory path + # echo I_MPI_ROOT Intel^(R^) MPI Library installation directory path + # echo I_MPI_{FC,F77,F90} or MPICH_{FC,F77,F90} + # echo the path/name of the underlying compiler to be used. + # echo I_MPI_{FC,F77,F90}_PROFILE or MPI{FC,F77,F90}_PROFILE + # echo name of profile file ^(without extension^) + # echo I_MPI_COMPILER_CONFIG_DIR + # echo folder which contains configuration files *.conf + # echo VT_ROOT Intel^(R^) Trace Collector installation directory path - name: Build and Test run: | diff --git a/CMakeLists.txt b/CMakeLists.txt index f2b110d7..e19e743a 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -734,10 +734,11 @@ function(add_caf_test name num_caf_img test_target) endif() # Add a host file for OMPI get_property(openmpi GLOBAL PROPERTY openmpi) + get_property(N_CPU GLOBAL PROPERTY N_CPU) if ( openmpi ) set(test_parameters --hostfile ${CMAKE_BINARY_DIR}/hostfile) endif() - if ( ((N_CPU LESS num_caf_img) OR (N_CPU EQUAL 0)) ) + if ( ( N_CPU LESS_EQUAL num_caf_img ) OR ( N_CPU EQUAL 0 ) ) message(STATUS "Test ${name} is oversubscribed: ${num_caf_img} CAF images requested with ${N_CPU} system processor available.") if ( openmpi ) if (min_test_imgs) @@ -950,6 +951,8 @@ if(opencoarrays_aware_compiler) add_caf_test(issue-700-allow-multiple-scalar-dim-array-gets 2 issue-700-allow-multiple-scalar-dim-array-gets) add_caf_test(issue-762-mpi-crashing-on-exit 2 issue-762-mpi-crashing-on-exit) + add_caf_test(issue-654-send_by_ref_rank_2 3 issue-654-send_by_ref_rank_2) + # IMAGE FAIL tests if(NOT CMAKE_Fortran_COMPILER_VERSION VERSION_LESS 7.0.0) if(CAF_ENABLE_FAILED_IMAGES) diff --git a/cmake/uninstall.cmake.in b/cmake/uninstall.cmake.in index daeccc8d..0c4caa62 100644 --- a/cmake/uninstall.cmake.in +++ b/cmake/uninstall.cmake.in @@ -17,10 +17,10 @@ endforeach() foreach(file ${files}) message(STATUS "Uninstalling $ENV{DESTDIR}${file}") if(IS_SYMLINK "$ENV{DESTDIR}${file}" OR EXISTS "$ENV{DESTDIR}${file}") - exec_program( - "@CMAKE_COMMAND@" ARGS "-E remove \"$ENV{DESTDIR}${file}\"" + execute_process( + COMMAND "@CMAKE_COMMAND@" -E remove \"$ENV{DESTDIR}${file}\" OUTPUT_VARIABLE rm_out - RETURN_VALUE rm_retval + RESULT_VARIABLE rm_retval ) if(NOT "${rm_retval}" STREQUAL 0) message(FATAL_ERROR "Problem when removing $ENV{DESTDIR}${file}") diff --git a/src/runtime-libraries/mpi/CMakeLists.txt b/src/runtime-libraries/mpi/CMakeLists.txt index 8155b17b..c74b5277 100644 --- a/src/runtime-libraries/mpi/CMakeLists.txt +++ b/src/runtime-libraries/mpi/CMakeLists.txt @@ -140,7 +140,7 @@ install(TARGETS opencoarrays_mod caf_mpi caf_mpi_static EXPORT OpenCoarraysTarge # Determine if we're using Open MPI #---------------------------------- cmake_host_system_information(RESULT N_CPU QUERY NUMBER_OF_LOGICAL_CORES) -set(N_CPU ${N_CPU} PARENT_SCOPE) +set_property(GLOBAL PROPERTY N_CPU ${N_CPU}) cmake_host_system_information(RESULT HOST_NAME QUERY HOSTNAME) set(HOST_NAME ${HOST_NAME} PARENT_SCOPE) execute_process(COMMAND ${MPIEXEC_EXECUTABLE} --version diff --git a/src/runtime-libraries/mpi/mpi_caf.c b/src/runtime-libraries/mpi/mpi_caf.c index 24518dd9..eb73b1a3 100644 --- a/src/runtime-libraries/mpi/mpi_caf.c +++ b/src/runtime-libraries/mpi/mpi_caf.c @@ -881,6 +881,9 @@ PREFIX(init)(int *argc, char ***argv) if (unlikely((ierr != MPI_SUCCESS))) caf_runtime_error("Failure when initializing MPI: %d", ierr); + ierr = MPI_Comm_set_errhandler(MPI_COMM_WORLD, MPI_ERRORS_RETURN); + chk_err(ierr); + /* Duplicate MPI_COMM_WORLD so that no CAF internal functions use it. * This is critical for MPI-interoperability. */ rc = MPI_Comm_dup(MPI_COMM_WORLD, &CAF_COMM_WORLD); @@ -4927,8 +4930,8 @@ PREFIX(get_by_ref)(caf_token_t token, int image_index, gfc_descriptor_t *dst, src, datasize, MPI_BYTE, global_dynamic_win_rank, MPI_Aint_add((MPI_Aint)remote_base_memptr, desc_offset), datasize, MPI_BYTE, global_dynamic_win); - CAF_Win_unlock(global_dynamic_win_rank, global_dynamic_win); chk_err(ierr); + CAF_Win_unlock(global_dynamic_win_rank, global_dynamic_win); } else { @@ -5482,7 +5485,7 @@ static void send_for_ref(caf_reference_t *ref, size_t *i, size_t src_index, mpi_caf_token_t *mpi_token, gfc_descriptor_t *dst, gfc_descriptor_t *src, void *ds, void *sr, - ptrdiff_t dst_byte_offset, ptrdiff_t desc_byte_offset, + ptrdiff_t dst_byte_offset, void *rdesc, ptrdiff_t desc_byte_offset, int dst_kind, int src_kind, size_t dst_dim, size_t src_dim, size_t num, int *stat, int global_dynamic_win_rank, int memptr_win_rank, @@ -5501,7 +5504,9 @@ send_for_ref(caf_reference_t *ref, size_t *i, size_t src_index, size_t next_dst_dim, ref_rank; gfc_max_dim_descriptor_t dst_desc_data; caf_ref_type_t ref_type = ref->type; - caf_array_ref_t array_ref_src = ref->u.a.mode[src_dim]; + caf_array_ref_t array_ref_dst = ref_type != CAF_REF_COMPONENT + ? ref->u.a.mode[dst_dim] + : CAF_ARR_REF_NONE; int ierr; if (unlikely(ref == NULL)) @@ -5511,10 +5516,13 @@ send_for_ref(caf_reference_t *ref, size_t *i, size_t src_index, return; } - dprint("Entering send_for_ref: [i = %zd] src_index = %zd, " - "dst_offset = %zd, desc_offset = %zd, ds_glb = %d, desc_glb = %d\n", - *i, src_index, dst_byte_offset, desc_byte_offset, ds_global, - desc_global); + dprint("Entering send_for_ref: [i = %zd], %s, arr_ref_type: %s, src_index " + "= %zd, dst_offset = %zd, rdesc = %p, desc_offset = %zd, ds_glb = %d, " + "desc_glb " + "= %d, src_desc = %p, dst_desc = %p, ds = %p\n", + *i, caf_ref_type_str[ref_type], caf_array_ref_str[array_ref_dst], + src_index, dst_byte_offset, rdesc, desc_byte_offset, ds_global, + desc_global, src, dst, ds); if (ref->next == NULL) { @@ -5581,7 +5589,7 @@ send_for_ref(caf_reference_t *ref, size_t *i, size_t src_index, dst_type = ref->u.a.static_array_type; /* Intentionally fall through. */ case CAF_REF_ARRAY: - if (array_ref_src == CAF_ARR_REF_NONE) + if (array_ref_dst == CAF_ARR_REF_NONE) { if (ds_global) { @@ -5616,23 +5624,20 @@ send_for_ref(caf_reference_t *ref, size_t *i, size_t src_index, caf_runtime_error(unreachable); } } - caf_array_ref_t array_ref_dst = ref->u.a.mode[dst_dim]; -#if 0 - dprint("image_index = %d, num = %zd, src_dim = %zd, dst_dim = %zd, " - "ref_type = %s, array_ref_src = %s\n", - image_index, num, src_dim, dst_dim, - caf_ref_type_str[ref_type], - caf_array_ref_str[array_ref_src]); -#endif + dprint("num = %zd, src_dim = %zd, dst_dim = %zd, " + "ref_mode = %s, array_ref_type = %s, ds = %p\n", + num, src_dim, dst_dim, caf_ref_type_str[ref_type], + caf_array_ref_str[array_ref_dst], ds); switch (ref_type) { case CAF_REF_COMPONENT: + dst_byte_offset += ref->u.c.offset; if (ref->u.c.caf_token_offset > 0) { - dst_byte_offset += ref->u.c.offset; desc_byte_offset = dst_byte_offset; + rdesc = ds; if (ds_global) { CAF_Win_lock(MPI_LOCK_SHARED, global_dynamic_win_rank, @@ -5658,13 +5663,12 @@ send_for_ref(caf_reference_t *ref, size_t *i, size_t src_index, } else { - dst_byte_offset += ref->u.c.offset; desc_byte_offset += ref->u.c.offset; } send_for_ref(ref->next, i, src_index, mpi_token, dst, src, ds, sr, - dst_byte_offset, desc_byte_offset, dst_kind, src_kind, - dst_dim, 0, 1, stat, global_dynamic_win_rank, - memptr_win_rank, ds_global, desc_global + dst_byte_offset, rdesc, desc_byte_offset, dst_kind, src_kind, + 0, 0, 1, stat, global_dynamic_win_rank, memptr_win_rank, + ds_global, desc_global #ifdef GCC_GE_8 , dst_type @@ -5672,11 +5676,11 @@ send_for_ref(caf_reference_t *ref, size_t *i, size_t src_index, ); return; case CAF_REF_ARRAY: - if (array_ref_src == CAF_ARR_REF_NONE) + if (array_ref_dst == CAF_ARR_REF_NONE) { send_for_ref(ref->next, i, src_index, mpi_token, dst, src, ds, sr, - dst_byte_offset, desc_byte_offset, dst_kind, src_kind, - dst_dim, 0, 1, stat, global_dynamic_win_rank, + dst_byte_offset, rdesc, desc_byte_offset, dst_kind, + src_kind, dst_dim, 0, 1, stat, global_dynamic_win_rank, memptr_win_rank, ds_global, desc_global #ifdef GCC_GE_8 , @@ -5697,11 +5701,13 @@ send_for_ref(caf_reference_t *ref, size_t *i, size_t src_index, /* Get the remote descriptor. */ if (desc_global) { + MPI_Aint disp = MPI_Aint_add((MPI_Aint)rdesc, desc_byte_offset); + dprint("remote desc fetch from %p, offset = %zd, aggreg. = %p\n", + rdesc, desc_byte_offset, disp); CAF_Win_lock(MPI_LOCK_SHARED, global_dynamic_win_rank, global_dynamic_win); ierr = MPI_Get(&dst_desc_data, sizeof_desc_for_rank(ref_rank), - MPI_BYTE, global_dynamic_win_rank, - MPI_Aint_add((MPI_Aint)ds, desc_byte_offset), + MPI_BYTE, global_dynamic_win_rank, disp, sizeof_desc_for_rank(ref_rank), MPI_BYTE, global_dynamic_win); chk_err(ierr); @@ -5729,18 +5735,16 @@ send_for_ref(caf_reference_t *ref, size_t *i, size_t src_index, desc_byte_offset = 0; #ifdef EXTRA_DEBUG_OUTPUT dprint("remote desc rank: %zd (ref_rank: %zd)\n", - GFC_DESCRIPTOR_RANK(src), ref_rank); - for (int r = 0; r < GFC_DESCRIPTOR_RANK(src); ++r) + GFC_DESCRIPTOR_RANK(dst), ref_rank); + for (int r = 0; r < GFC_DESCRIPTOR_RANK(dst); ++r) { - dprint("remote desc dim[%d] = (lb=%zd, ub=%zd, stride=%zd)\n", r, - src->dim[r].lower_bound, src->dim[r]._ubound, - src->dim[r]._stride); + dprint("remote desc dim[%d] = (lb=%zd, ub=%zd, stride=%zd), " + "ref-type: %s\n", + r, dst->dim[r].lower_bound, dst->dim[r]._ubound, + dst->dim[r]._stride, caf_array_ref_str[ref->u.a.mode[r]]); } #endif } - dprint("array_ref_dst[%zd] = %s := array_ref_src[%zd] = %s", dst_dim, - caf_array_ref_str[array_ref_dst], src_dim, - caf_array_ref_str[array_ref_src]); switch (array_ref_dst) { case CAF_ARR_REF_VECTOR: @@ -5771,15 +5775,15 @@ send_for_ref(caf_reference_t *ref, size_t *i, size_t src_index, #undef KINDCASE dprint("vector-index computed to: %zd\n", array_offset_dst); - send_for_ref(ref, i, src_index, mpi_token, dst, src, ds, sr, - dst_byte_offset + array_offset_dst * ref->item_size, - desc_byte_offset + array_offset_dst * ref->item_size, - dst_kind, src_kind, dst_dim + 1, src_dim + 1, 1, stat, - global_dynamic_win_rank, memptr_win_rank, ds_global, - desc_global + send_for_ref( + ref, i, src_index, mpi_token, dst, src, ds, sr, + dst_byte_offset + array_offset_dst * ref->item_size, rdesc, + desc_byte_offset + array_offset_dst * ref->item_size, dst_kind, + src_kind, dst_dim + 1, src_dim + 1, 1, stat, + global_dynamic_win_rank, memptr_win_rank, ds_global, desc_global #ifdef GCC_GE_8 - , - dst_type + , + dst_type #endif ); src_index += dst->dim[dst_dim]._stride; @@ -5799,20 +5803,19 @@ send_for_ref(caf_reference_t *ref, size_t *i, size_t src_index, for (ptrdiff_t idx = 0; idx < extent_dst; ++idx, array_offset_dst += dst_stride) { - send_for_ref(ref, i, src_index, mpi_token, dst, src, ds, sr, - dst_byte_offset + array_offset_dst * ref->item_size, - desc_byte_offset + array_offset_dst * ref->item_size, - dst_kind, src_kind, dst_dim + 1, src_dim + 1, 1, stat, - global_dynamic_win_rank, memptr_win_rank, ds_global, - desc_global + send_for_ref( + ref, i, src_index, mpi_token, dst, src, ds, sr, + dst_byte_offset + array_offset_dst * ref->item_size, rdesc, + desc_byte_offset + array_offset_dst * ref->item_size, dst_kind, + src_kind, dst_dim + 1, src_dim + 1, 1, stat, + global_dynamic_win_rank, memptr_win_rank, ds_global, desc_global #ifdef GCC_GE_8 - , - dst_type + , + dst_type #endif ); src_index += src_stride; } - // dprint("CAF_ARR_REF_FULL: return, i = %zd\n", *i); return; case CAF_ARR_REF_RANGE: @@ -5836,46 +5839,38 @@ send_for_ref(caf_reference_t *ref, size_t *i, size_t src_index, : dst_dim; for (ptrdiff_t idx = 0; idx < extent_dst; ++idx) { - send_for_ref(ref, i, src_index, mpi_token, dst, src, ds, sr, - dst_byte_offset + array_offset_dst * ref->item_size, - desc_byte_offset + array_offset_dst * ref->item_size, - dst_kind, src_kind, next_dst_dim, src_dim + 1, 1, stat, - global_dynamic_win_rank, memptr_win_rank, ds_global, - desc_global + send_for_ref( + ref, i, src_index, mpi_token, dst, src, ds, sr, + dst_byte_offset + array_offset_dst * ref->item_size, rdesc, + desc_byte_offset + array_offset_dst * ref->item_size, dst_kind, + src_kind, next_dst_dim, src_dim + 1, 1, stat, + global_dynamic_win_rank, memptr_win_rank, ds_global, desc_global #ifdef GCC_GE_8 - , - dst_type + , + dst_type #endif ); src_index += src_stride; array_offset_dst += dst_stride; } - // dprint("CAF_ARR_REF_RANGE: return, i = %zd\n", *i); return; case CAF_ARR_REF_SINGLE: array_offset_dst = (ref->u.a.dim[dst_dim].s.start - dst->dim[dst_dim].lower_bound) * dst->dim[dst_dim]._stride; - // FIXME: issue #552 - // next_dst_dim = ( - // (extent_dst > 1) || - // (GFC_DESCRIPTOR_EXTENT(src, src_dim) == 1 && extent_dst == 1) - // ) ? (dst_dim + 1) : dst_dim; - next_dst_dim = dst_dim; - send_for_ref(ref, i, src_index, mpi_token, dst, src, ds, sr, - dst_byte_offset + array_offset_dst * ref->item_size, - desc_byte_offset + array_offset_dst * ref->item_size, - dst_kind, src_kind, next_dst_dim, src_dim + 1, 1, stat, - global_dynamic_win_rank, memptr_win_rank, ds_global, - desc_global + send_for_ref( + ref, i, src_index, mpi_token, dst, src, ds, sr, + dst_byte_offset + array_offset_dst * ref->item_size, rdesc, + desc_byte_offset + array_offset_dst * ref->item_size, dst_kind, + src_kind, dst_dim + 1, src_dim + 1, 1, stat, + global_dynamic_win_rank, memptr_win_rank, ds_global, desc_global #ifdef GCC_GE_8 - , - dst_type + , + dst_type #endif ); - // dprint("CAF_ARR_REF_SINGLE: return, i = %zd\n", *i); return; case CAF_ARR_REF_OPEN_END: COMPUTE_NUM_ITEMS(extent_dst, ref->u.a.dim[dst_dim].s.stride, @@ -5890,15 +5885,15 @@ send_for_ref(caf_reference_t *ref, size_t *i, size_t src_index, * dst->dim[dst_dim]._stride; for (ptrdiff_t idx = 0; idx < extent_dst; ++idx) { - send_for_ref(ref, i, src_index, mpi_token, dst, src, ds, sr, - dst_byte_offset + array_offset_dst * ref->item_size, - desc_byte_offset + array_offset_dst * ref->item_size, - dst_kind, src_kind, dst_dim + 1, src_dim + 1, 1, stat, - global_dynamic_win_rank, memptr_win_rank, ds_global, - desc_global + send_for_ref( + ref, i, src_index, mpi_token, dst, src, ds, sr, + dst_byte_offset + array_offset_dst * ref->item_size, rdesc, + desc_byte_offset + array_offset_dst * ref->item_size, dst_kind, + src_kind, dst_dim + 1, src_dim + 1, 1, stat, + global_dynamic_win_rank, memptr_win_rank, ds_global, desc_global #ifdef GCC_GE_8 - , - dst_type + , + dst_type #endif ); src_index += src_stride; @@ -5916,15 +5911,15 @@ send_for_ref(caf_reference_t *ref, size_t *i, size_t src_index, array_offset_dst = 0; for (ptrdiff_t idx = 0; idx < extent_dst; ++idx) { - send_for_ref(ref, i, src_index, mpi_token, dst, src, ds, sr, - dst_byte_offset + array_offset_dst * ref->item_size, - desc_byte_offset + array_offset_dst * ref->item_size, - dst_kind, src_kind, dst_dim + 1, src_dim + 1, 1, stat, - global_dynamic_win_rank, memptr_win_rank, ds_global, - desc_global + send_for_ref( + ref, i, src_index, mpi_token, dst, src, ds, sr, + dst_byte_offset + array_offset_dst * ref->item_size, rdesc, + desc_byte_offset + array_offset_dst * ref->item_size, dst_kind, + src_kind, dst_dim + 1, src_dim + 1, 1, stat, + global_dynamic_win_rank, memptr_win_rank, ds_global, desc_global #ifdef GCC_GE_8 - , - dst_type + , + dst_type #endif ); src_index += src_stride; @@ -5938,9 +5933,9 @@ send_for_ref(caf_reference_t *ref, size_t *i, size_t src_index, case CAF_REF_STATIC_ARRAY: if (array_ref_dst == CAF_ARR_REF_NONE) { - send_for_ref(ref->next, i, src_index, mpi_token, dst, NULL, ds, sr, - dst_byte_offset, desc_byte_offset, dst_kind, src_kind, - dst_dim, 0, 1, stat, global_dynamic_win_rank, + send_for_ref(ref->next, i, src_index, mpi_token, dst, src, ds, sr, + dst_byte_offset, rdesc, desc_byte_offset, dst_kind, + src_kind, dst_dim, 0, 1, stat, global_dynamic_win_rank, memptr_win_rank, ds_global, desc_global #ifdef GCC_GE_8 , @@ -5975,15 +5970,15 @@ send_for_ref(caf_reference_t *ref, size_t *i, size_t src_index, } #undef KINDCASE - send_for_ref(ref, i, src_index, mpi_token, dst, NULL, ds, sr, - dst_byte_offset + array_offset_dst * ref->item_size, - desc_byte_offset + array_offset_dst * ref->item_size, - dst_kind, src_kind, dst_dim + 1, src_dim + 1, 1, stat, - global_dynamic_win_rank, memptr_win_rank, ds_global, - desc_global + send_for_ref( + ref, i, src_index, mpi_token, dst, src, ds, sr, + dst_byte_offset + array_offset_dst * ref->item_size, rdesc, + desc_byte_offset + array_offset_dst * ref->item_size, dst_kind, + src_kind, dst_dim + 1, src_dim + 1, 1, stat, + global_dynamic_win_rank, memptr_win_rank, ds_global, desc_global #ifdef GCC_GE_8 - , - dst_type + , + dst_type #endif ); src_index += src->dim[src_dim]._stride; @@ -5996,15 +5991,15 @@ send_for_ref(caf_reference_t *ref, size_t *i, size_t src_index, array_offset_dst <= ref->u.a.dim[dst_dim].s.end; array_offset_dst += ref->u.a.dim[dst_dim].s.stride) { - send_for_ref(ref, i, src_index, mpi_token, dst, NULL, ds, sr, - dst_byte_offset + array_offset_dst * ref->item_size, - desc_byte_offset + array_offset_dst * ref->item_size, - dst_kind, src_kind, dst_dim + 1, src_dim + 1, 1, stat, - global_dynamic_win_rank, memptr_win_rank, ds_global, - desc_global + send_for_ref( + ref, i, src_index, mpi_token, dst, src, ds, sr, + dst_byte_offset + array_offset_dst * ref->item_size, rdesc, + desc_byte_offset + array_offset_dst * ref->item_size, dst_kind, + src_kind, dst_dim + 1, src_dim + 1, 1, stat, + global_dynamic_win_rank, memptr_win_rank, ds_global, desc_global #ifdef GCC_GE_8 - , - dst_type + , + dst_type #endif ); src_index += src_stride; @@ -6019,15 +6014,15 @@ send_for_ref(caf_reference_t *ref, size_t *i, size_t src_index, array_offset_dst = ref->u.a.dim[dst_dim].s.start; for (ptrdiff_t idx = 0; idx < extent_dst; ++idx) { - send_for_ref(ref, i, src_index, mpi_token, dst, NULL, ds, sr, - dst_byte_offset + array_offset_dst * ref->item_size, - desc_byte_offset + array_offset_dst * ref->item_size, - dst_kind, src_kind, dst_dim + 1, src_dim + 1, 1, stat, - global_dynamic_win_rank, memptr_win_rank, ds_global, - desc_global + send_for_ref( + ref, i, src_index, mpi_token, dst, src, ds, sr, + dst_byte_offset + array_offset_dst * ref->item_size, rdesc, + desc_byte_offset + array_offset_dst * ref->item_size, dst_kind, + src_kind, dst_dim + 1, src_dim + 1, 1, stat, + global_dynamic_win_rank, memptr_win_rank, ds_global, desc_global #ifdef GCC_GE_8 - , - dst_type + , + dst_type #endif ); src_index += src_stride; @@ -6036,15 +6031,15 @@ send_for_ref(caf_reference_t *ref, size_t *i, size_t src_index, return; case CAF_ARR_REF_SINGLE: array_offset_dst = ref->u.a.dim[dst_dim].s.start; - send_for_ref(ref, i, src_index, mpi_token, dst, NULL, ds, sr, - dst_byte_offset + array_offset_dst * ref->item_size, - desc_byte_offset + array_offset_dst * ref->item_size, - dst_kind, src_kind, dst_dim, src_dim + 1, 1, stat, - global_dynamic_win_rank, memptr_win_rank, ds_global, - desc_global + send_for_ref( + ref, i, src_index, mpi_token, dst, src, ds, sr, + dst_byte_offset + array_offset_dst * ref->item_size, rdesc, + desc_byte_offset + array_offset_dst * ref->item_size, dst_kind, + src_kind, dst_dim + 1, src_dim + 1, 1, stat, + global_dynamic_win_rank, memptr_win_rank, ds_global, desc_global #ifdef GCC_GE_8 - , - dst_type + , + dst_type #endif ); return; @@ -6111,6 +6106,9 @@ PREFIX(send_by_ref)(caf_token_t token, int image_index, gfc_descriptor_t *src, bool access_desc_through_global_win = false; bool free_temp_src = false; caf_array_ref_t array_ref; +#ifdef EXTRA_DEBUG_OUTPUT + bool desc_seen = false; +#endif if (stat) *stat = 0; @@ -6152,17 +6150,22 @@ PREFIX(send_by_ref)(caf_token_t token, int image_index, gfc_descriptor_t *src, size = 1; while (riter) { - dprint("remote_image = %d, offset = %zd, remote_mem = %p\n", - global_dynamic_win_rank, data_offset, remote_memptr); + dprint("remote_image = %d, offset = %zd, remote_mem = %p, ref: %s, " + "global_win(data, desc) = (%d, %d)\n", + global_dynamic_win_rank, data_offset, remote_memptr, + caf_ref_type_str[riter->type], access_data_through_global_win, + access_desc_through_global_win); switch (riter->type) { case CAF_REF_COMPONENT: + data_offset += riter->u.c.offset; if (riter->u.c.caf_token_offset > 0) { + remote_base_memptr = remote_memptr; if (access_data_through_global_win) { - data_offset += riter->u.c.offset; - remote_base_memptr = remote_memptr; + dprint("remote_memptr(old) = %p, offset = %zd\n", + remote_base_memptr, data_offset); CAF_Win_lock(MPI_LOCK_SHARED, global_dynamic_win_rank, global_dynamic_win); ierr = MPI_Get(&remote_memptr, stdptr_size, MPI_BYTE, @@ -6170,6 +6173,7 @@ PREFIX(send_by_ref)(caf_token_t token, int image_index, gfc_descriptor_t *src, MPI_Aint_add((MPI_Aint)remote_memptr, data_offset), stdptr_size, MPI_BYTE, global_dynamic_win); CAF_Win_unlock(global_dynamic_win_rank, global_dynamic_win); + dprint("remote_memptr(new) = %p\n", remote_memptr); chk_err(ierr); /* On the second indirection access also the remote descriptor * using the global window. */ @@ -6177,7 +6181,8 @@ PREFIX(send_by_ref)(caf_token_t token, int image_index, gfc_descriptor_t *src, } else { - data_offset += riter->u.c.offset; + dprint("remote_memptr(old) = %p, offset = %zd\n", + remote_base_memptr, data_offset); CAF_Win_lock(MPI_LOCK_SHARED, memptr_win_rank, mpi_token->memptr_win); ierr = MPI_Get(&remote_memptr, stdptr_size, MPI_BYTE, @@ -6193,9 +6198,9 @@ PREFIX(send_by_ref)(caf_token_t token, int image_index, gfc_descriptor_t *src, } else { - data_offset += riter->u.c.offset; desc_offset += riter->u.c.offset; } + dprint("comp-ref done."); break; case CAF_REF_ARRAY: /* When there has been no CAF_REF_COMP before hand, then the descriptor @@ -6212,8 +6217,9 @@ PREFIX(send_by_ref)(caf_token_t token, int image_index, gfc_descriptor_t *src, dst = (gfc_descriptor_t *)&dst_desc; if (access_desc_through_global_win) { - dprint("remote desc fetch from %p, offset = %zd\n", - remote_base_memptr, desc_offset); + dprint("remote desc fetch from %p, offset = %zd, aggreg = %p\n", + remote_base_memptr, desc_offset, + remote_base_memptr + desc_offset); CAF_Win_lock(MPI_LOCK_SHARED, global_dynamic_win_rank, global_dynamic_win); ierr = MPI_Get( @@ -6242,6 +6248,7 @@ PREFIX(send_by_ref)(caf_token_t token, int image_index, gfc_descriptor_t *src, else dst = mpi_token->desc; #ifdef EXTRA_DEBUG_OUTPUT + desc_seen = true; dprint("remote desc rank: %zd (ref_rank: %zd)\n", GFC_DESCRIPTOR_RANK(dst), ref_rank); for (i = 0; i < GFC_DESCRIPTOR_RANK(dst); ++i) @@ -6390,8 +6397,7 @@ PREFIX(send_by_ref)(caf_token_t token, int image_index, gfc_descriptor_t *src, delta = riter->u.a.dim[i].v.nvec; #define KINDCASE(kind, type) \ case kind: \ - remote_memptr \ - += ((type *)riter->u.a.dim[i].v.vector)[0] * riter->item_size; \ + data_offset += ((type *)riter->u.a.dim[i].v.vector)[0] * riter->item_size; \ break switch (riter->u.a.dim[i].v.kind) @@ -6418,13 +6424,12 @@ PREFIX(send_by_ref)(caf_token_t token, int image_index, gfc_descriptor_t *src, COMPUTE_NUM_ITEMS(delta, riter->u.a.dim[i].s.stride, riter->u.a.dim[i].s.start, riter->u.a.dim[i].s.end); - remote_memptr += riter->u.a.dim[i].s.start - * riter->u.a.dim[i].s.stride * riter->item_size; + data_offset += riter->u.a.dim[i].s.start + * riter->u.a.dim[i].s.stride * riter->item_size; break; case CAF_ARR_REF_SINGLE: delta = 1; - remote_memptr += riter->u.a.dim[i].s.start - * riter->u.a.dim[i].s.stride * riter->item_size; + data_offset += riter->u.a.dim[i].s.start * riter->item_size; break; case CAF_ARR_REF_OPEN_END: /* This and OPEN_START are mapped to a RANGE and therefore @@ -6434,8 +6439,6 @@ PREFIX(send_by_ref)(caf_token_t token, int image_index, gfc_descriptor_t *src, caf_internal_error(unknownarrreftype, stat, NULL, 0); return; } // switch - dprint("i = %zd, array_ref = %s, delta = %ld\n", i, - caf_array_ref_str[array_ref], delta); if (delta <= 0) return; if (dst != NULL) @@ -6473,7 +6476,8 @@ PREFIX(send_by_ref)(caf_token_t token, int image_index, gfc_descriptor_t *src, return; } } - if (src_cur_dim < GFC_DESCRIPTOR_RANK(src)) + if (src_cur_dim < GFC_DESCRIPTOR_RANK(src) + && array_ref != CAF_ARR_REF_SINGLE) ++src_cur_dim; } size *= (ptrdiff_t)delta; @@ -6505,11 +6509,14 @@ PREFIX(send_by_ref)(caf_token_t token, int image_index, gfc_descriptor_t *src, remote_memptr = mpi_token->memptr; src_index = 0; #ifdef EXTRA_DEBUG_OUTPUT - dprint("src_rank: %zd\n", GFC_DESCRIPTOR_RANK(src)); - for (i = 0; i < GFC_DESCRIPTOR_RANK(src); ++i) + if (desc_seen) { - dprint("src_dim[%zd] = (%zd, %zd)\n", i, src->dim[i].lower_bound, - src->dim[i]._ubound); + dprint("dst_rank: %zd\n", GFC_DESCRIPTOR_RANK(dst)); + for (i = 0; i < GFC_DESCRIPTOR_RANK(dst); ++i) + { + dprint("dst_dim[%zd] = (%zd, %zd)\n", i, dst->dim[i].lower_bound, + dst->dim[i]._ubound); + } } #endif /* When accessing myself and may_require_tmp is set, then copy the source @@ -6544,8 +6551,9 @@ PREFIX(send_by_ref)(caf_token_t token, int image_index, gfc_descriptor_t *src, "dst_size = %zd\n", size, dst_size); send_for_ref(refs, &i, src_index, mpi_token, mpi_token->desc, src, - remote_memptr, src->base_addr, 0, 0, dst_kind, src_kind, 0, 0, 1, - stat, global_dynamic_win_rank, memptr_win_rank, false, false + remote_memptr, src->base_addr, 0, NULL, 0, dst_kind, src_kind, 0, + 0, 1, stat, global_dynamic_win_rank, memptr_win_rank, false, + false #ifdef GCC_GE_8 , dst_type @@ -6972,8 +6980,8 @@ PREFIX(sendget_by_ref)(caf_token_t dst_token, int dst_image_index, send_for_ref(dst_refs, &i, src_index, dst_mpi_token, dst_mpi_token->desc, (gfc_descriptor_t *)&temp_src_desc, dst_mpi_token->memptr, - temp_src_desc.base.base_addr, 0, 0, dst_kind, src_kind, 0, 0, 1, - dst_stat, global_dst_rank, memptr_dst_rank, false, false + temp_src_desc.base.base_addr, 0, NULL, 0, dst_kind, src_kind, 0, + 0, 1, dst_stat, global_dst_rank, memptr_dst_rank, false, false #ifdef GCC_GE_8 , dst_type diff --git a/src/tests/regression/reported/CMakeLists.txt b/src/tests/regression/reported/CMakeLists.txt index a057943f..ee2d72ad 100644 --- a/src/tests/regression/reported/CMakeLists.txt +++ b/src/tests/regression/reported/CMakeLists.txt @@ -12,6 +12,7 @@ caf_compile_executable(issue-503-non-contig-red-ndarray issue-503-non-contig-red caf_compile_executable(issue-322-non-coarray-vector-idx-lhs issue-322-non-coarray-vector-idx-lhs.f90) caf_compile_executable(issue-552-send_by_ref-singleton issue-552-send_by_ref-singleton.f90) caf_compile_executable(issue-511-incorrect-shape issue-511-incorrect-shape.f90) +caf_compile_executable(issue-654-send_by_ref_rank_2 issue-654-send_by_ref_rank_2.f90) caf_compile_executable(issue-700-allow-multiple-scalar-dim-array-gets issue-700-allow-multiple-scalar-dim-array-gets.f90) if (gfortran_compiler AND (NOT CMAKE_Fortran_COMPILER_VERSION VERSION_LESS 7.0.0)) caf_compile_executable(issue-515-mimic-mpi-gatherv issue-515-mimic-mpi-gatherv.f90) diff --git a/src/tests/regression/reported/issue-654-send_by_ref_rank_2.f90 b/src/tests/regression/reported/issue-654-send_by_ref_rank_2.f90 new file mode 100644 index 00000000..e7e4b748 --- /dev/null +++ b/src/tests/regression/reported/issue-654-send_by_ref_rank_2.f90 @@ -0,0 +1,58 @@ +! Check transfering data from one to a different remote image works. + +program test_sendget_by_ref + implicit none + type :: rank1_type + integer, allocatable :: A(:) + end type + type :: rank2_type + integer, allocatable :: A(:,:) + end type + type(rank1_type) :: R_get[*] + type(rank2_type) :: R_send[*] + integer :: i, j + logical :: res = .True. + + allocate(R_get%A(this_image()), source=-1) + R_get%A(this_image()) = this_image() + + allocate(R_send%A(num_images(),num_images()), source=-2) + + sync all + + ! Send data from image j to image i's j-th column. + ! This ensures that sendget_by_ref works as expected. + do i = 1, num_images() + do j = 1, num_images() + R_send[i]%A(j,this_image()) = R_get[j]%A(j) + end do + end do + + sync all + + ! Check that each image has the correct data in its R_send array, + ! which in fact is the receiver here. + do i = 1, num_images() + if (any(R_send%A(:,i) /= (/(j, j = 1, num_images())/))) res = .False. + end do + + ! Recude the result. both() is just a logical scalar and, because there + ! is no predefined operator for this. + call co_reduce(res, both) + write(*,*) this_image(), ':', R_get%A, '|', R_send%A + + if (this_image() == 1) then + if (res) then + write(*,*) "Test passed." + else + write(*,*) "Test failed." + end if + end if +contains + + pure function both(lhs, rhs) result(res) + logical, intent(in) :: lhs, rhs + logical :: res + res = lhs .AND. rhs + end function +end program test_sendget_by_ref diff --git a/src/tests/unit/send-get/get_static_array.f90 b/src/tests/unit/send-get/get_static_array.f90 index 5721b3d0..59aceecb 100644 --- a/src/tests/unit/send-get/get_static_array.f90 +++ b/src/tests/unit/send-get/get_static_array.f90 @@ -1,5 +1,6 @@ program get_static_array type :: container + real :: f integer, allocatable :: stuff(:) end type @@ -20,4 +21,3 @@ program get_static_array end if end if end program -