Skip to content

Commit 9c374e1

Browse files
authored
Merge pull request #7740 from abouteiller/export/ulfm-to-ompi5
Export/ulfm to ompi5
2 parents 04b9a4a + 6a406fb commit 9c374e1

File tree

201 files changed

+11845
-321
lines changed

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

201 files changed

+11845
-321
lines changed

README.FT.ULFM.md

Lines changed: 483 additions & 0 deletions
Large diffs are not rendered by default.

config/ompi_setup_prrte.m4

Lines changed: 4 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -3,7 +3,7 @@
33
# Copyright (c) 2004-2009 The Trustees of Indiana University and Indiana
44
# University Research and Technology
55
# Corporation. All rights reserved.
6-
# Copyright (c) 2004-2005 The University of Tennessee and The University
6+
# Copyright (c) 2004-2021 The University of Tennessee and The University
77
# of Tennessee Research Foundation. All rights
88
# reserved.
99
# Copyright (c) 2004-2007 High Performance Computing Center Stuttgart,
@@ -139,6 +139,9 @@ AC_DEFUN([OMPI_SETUP_PRRTE_INTERNAL], [
139139

140140
# add the extra libs
141141
internal_prrte_args="$internal_prrte_args --with-prte-extra-lib=\"$internal_prrte_libs\" --with-prte-extra-ltlib=\"$internal_prrte_libs\""
142+
AS_IF([test "$with_ft" != "no"],
143+
[internal_prrte_args="--enable-prte-ft $internal_prrte_args"],
144+
[])
142145

143146
# Pass all our compiler/linker flags to PRRTE, so that it
144147
# picks up how to build an internal HWLOC, libevent, and PMIx, plus

config/opal_setup_ft.m4

Lines changed: 63 additions & 13 deletions
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,8 @@
11
dnl
2+
dnl Copyright (c) 2004-2020 The University of Tennessee and The University
3+
dnl of Tennessee Research Foundation. All rights
4+
dnl reserved.
5+
dnl Copyright (c) 2009-2012 Oak Ridge National Labs. All rights reserved.
26
dnl Copyright (c) 2013-2014 Intel, Inc. All rights reserved.
37
dnl Copyright (c) 2015 Research Organization for Information Science
48
dnl and Technology (RIST). All rights reserved.
@@ -12,11 +16,15 @@ dnl
1216
#
1317
# --with-ft=TYPE
1418
# TYPE:
19+
# - mpi (synonym for 'ulfm')
1520
# - LAM (synonym for 'cr' currently)
1621
# - cr
1722
# /* General FT sections */
1823
# #if OPAL_ENABLE_FT == 0 /* FT Disabled globaly */
1924
# #if OPAL_ENABLE_FT == 1 /* FT Enabled globaly */
25+
# /* ULFM Specific sections */
26+
# #if OPAL_ENABLE_FT_MPI == 0 /* FT ULFM Disabled */
27+
# #if OPAL_ENABLE_FT_MPI == 1 /* FT ULFM Enabled */
2028
# /* CR Specific sections */
2129
# #if OPAL_ENABLE_FT_CR == 0 /* FT Ckpt/Restart Disabled */
2230
# #if OPAL_ENABLE_FT_CR == 1 /* FT Ckpt/Restart Enabled */
@@ -28,14 +36,13 @@ AC_DEFUN([OPAL_SETUP_FT_BANNER],[
2836
])
2937

3038
AC_DEFUN([OPAL_SETUP_FT_OPTIONS],[
31-
AC_REQUIRE([OPAL_SETUP_FT_BANNER])
3239
# define a variable that tells us that these options were enabled
3340
opal_setup_ft_options="yes"
3441
AC_ARG_WITH(ft,
3542
[AC_HELP_STRING([--with-ft=TYPE],
36-
[Specify the type of fault tolerance to enable. Options: LAM (LAM/MPI-like), cr (Checkpoint/Restart), (default: disabled)])],
37-
[opal_want_ft=1],
38-
[opal_want_ft=0])
43+
[Specify the type of fault tolerance to enable. Options: mpi (ULFM), LAM (LAM/MPI-like), cr (Checkpoint/Restart) (default: mpi)])],
44+
[],
45+
[with_ft=auto]) # If not specified act as if --with-ft=mpi, but make external prte support failure only if hard requested
3946
4047
#
4148
# Checkpoint/restart enabled debugging
@@ -60,12 +67,15 @@ AC_DEFUN([OPAL_SETUP_FT_OPTIONS],[
6067
])
6168

6269
AC_DEFUN([OPAL_SETUP_FT],[
70+
AC_REQUIRE([OPAL_SETUP_FT_BANNER])
6371
if test "$opal_setup_ft_options" = "yes"; then
6472
AC_MSG_CHECKING([if want fault tolerance])
6573
fi
66-
if test "x$with_ft" != "x" || test "$opal_want_ft" = "1"; then
74+
75+
if test x"$with_ft" != "xno"; then
6776
opal_want_ft=1
6877
opal_want_ft_cr=0
78+
opal_want_ft_mpi=0
6979
opal_want_ft_type=none
7080
7181
as_save_IFS=$IFS
@@ -74,8 +84,18 @@ AC_DEFUN([OPAL_SETUP_FT],[
7484
IFS=$as_save_IFS
7585
7686
# Default value
77-
if test "$opt" = "" || test "$opt" = "yes"; then
78-
opal_want_ft_cr=1
87+
if test "$opt" = "auto"; then
88+
opal_want_ft_mpi=1
89+
elif test "$opt" = "yes"; then
90+
opal_want_ft_mpi=1
91+
elif test "$opt" = "ULFM"; then
92+
opal_want_ft_mpi=1
93+
elif test "$opt" = "ulfm"; then
94+
opal_want_ft_mpi=1
95+
elif test "$opt" = "MPI"; then
96+
opal_want_ft_mpi=1
97+
elif test "$opt" = "mpi"; then
98+
opal_want_ft_mpi=1
7999
elif test "$opt" = "LAM"; then
80100
opal_want_ft_cr=1
81101
elif test "$opt" = "lam"; then
@@ -89,28 +109,57 @@ AC_DEFUN([OPAL_SETUP_FT],[
89109
AC_MSG_ERROR([Cannot continue])
90110
fi
91111
done
92-
if test "$opal_want_ft_cr" = 1; then
112+
if test "$opal_want_ft_mpi" = 1; then
113+
opal_want_ft_type="mpi"
114+
elif test "$opal_want_ft_cr" = 1; then
93115
opal_want_ft_type="cr"
94116
fi
95117
118+
# If we use external PRTE, does it support FT?
119+
AS_IF([test "$internal_prrte_build" = "0" -a "$opal_want_ft_type" != "none"], [
120+
AS_IF([prte_info | $GREP "Resilience support: yes"], [], [
121+
AS_IF([test "$with_ft" != auto], [
122+
AC_MSG_ERROR([Requested enabling fault-tolerance and using external launcher, but external PRTE doesn't support resilience; you can either use the internal PRTE, recompile the external PRTE with fault-tolerance, or disable fault-tolerance. ABORTING.])
123+
], [
124+
AC_MSG_WARN([**************************************************])
125+
AC_MSG_WARN([*** Requested external PRTE which doesn't have *])
126+
AC_MSG_WARN([*** Resilience compiled-in. *])
127+
AC_MSG_WARN([*** To enable Open MPI Fault-Tolerance, either *])
128+
AC_MSG_WARN([*** use the internal PRTE, or *])
129+
AC_MSG_WARN([*** compile the external PRTE with resilience *])
130+
AC_MSG_WARN([*** DISABLING FAULT TOLERANCE SUPPORT. *])
131+
AC_MSG_WARN([**************************************************])
132+
opal_want_ft_mpi=0
133+
opal_want_ft_cr=0
134+
opal_want_ft_type="none"
135+
])
136+
])
137+
])
96138
AC_MSG_RESULT([Enabled $opal_want_ft_type (Specified $with_ft)])
97-
AC_MSG_WARN([**************************************************])
98-
AC_MSG_WARN([*** Fault Tolerance Integration into Open MPI is *])
99-
AC_MSG_WARN([*** a research quality implementation, and care *])
100-
AC_MSG_WARN([*** should be used when choosing to enable it. *])
101-
AC_MSG_WARN([**************************************************])
139+
AS_IF([test "$opal_want_ft_type" != "none"], [
140+
AC_MSG_WARN([**************************************************])
141+
AC_MSG_WARN([*** Fault Tolerance Integration into Open MPI is *])
142+
AC_MSG_WARN([*** compiled-in, but off by default. Use mpiexec *])
143+
AC_MSG_WARN([*** and MCA parameters to turn it on. *])
144+
AC_MSG_WARN([*** Not all components support fault tolerance. *])
145+
AC_MSG_WARN([**************************************************])
146+
])
102147
else
103148
opal_want_ft=0
149+
opal_want_ft_mpi=0
104150
opal_want_ft_cr=0
105151
if test "$opal_setup_ft_options" = "yes"; then
106152
AC_MSG_RESULT([Disabled fault tolerance])
107153
fi
108154
fi
109155
AC_DEFINE_UNQUOTED([OPAL_ENABLE_FT], [$opal_want_ft],
110156
[Enable fault tolerance general components and logic])
157+
AC_DEFINE_UNQUOTED([OPAL_ENABLE_FT_MPI], [$opal_want_ft_mpi],
158+
[Enable fault tolerance MPI ULFM components and logic])
111159
AC_DEFINE_UNQUOTED([OPAL_ENABLE_FT_CR], [$opal_want_ft_cr],
112160
[Enable fault tolerance checkpoint/restart components and logic])
113161
AM_CONDITIONAL(WANT_FT, test "$opal_want_ft" = "1")
162+
AM_CONDITIONAL(WANT_FT_MPI, test "$opal_want_ft_mpi" = "1")
114163
AM_CONDITIONAL(WANT_FT_CR, test "$opal_want_ft_cr" = "1")
115164
116165
if test "$opal_setup_ft_options" = "yes"; then
@@ -175,4 +224,5 @@ AC_DEFUN([OPAL_SETUP_FT],[
175224
AC_DEFINE_UNQUOTED([OPAL_ENABLE_FT_THREAD], [$opal_want_ft_thread],
176225
[Enable fault tolerance thread in Open PAL])
177226
AM_CONDITIONAL(WANT_FT_THREAD, test "$opal_want_ft_thread" = "1")
227+
OPAL_SUMMARY_ADD([[Miscellaneous]],[[Fault Tolerance support]],[unnecessary], [$opal_want_ft_type])
178228
])

configure.ac

Lines changed: 10 additions & 22 deletions
Original file line numberDiff line numberDiff line change
@@ -1116,29 +1116,11 @@ AC_INCLUDES_DEFAULT
11161116
# checkpoint results
11171117
AC_CACHE_SAVE
11181118

1119-
###########################################################
1119+
##################################
11201120
# Fault Tolerance
1121-
#
1122-
# The FT code in the OMPI trunk is currently broken. We don't
1123-
# have an active maintainer for it at this time, and it isn't
1124-
# clear if/when we will return to it. We have therefore removed
1125-
# the configure options supporting it until such time as it
1126-
# can be fixed.
1127-
#
1128-
# However, we recognize that there are researchers who use this
1129-
# option on their independent branches. In such cases, simply
1130-
# uncomment the line below to render the FT configure options
1131-
# visible again
1132-
#
1133-
###########################################################
1134-
dnl OPAL_SETUP_FT_OPTIONS
1135-
###########################################################
1136-
# The following line is always required as it contains the
1137-
# AC_DEFINE and AM_CONDITIONAL calls that set variables used
1138-
# throughout the build system. If the above line is commented
1139-
# out, then those variables will be set to "off". Otherwise,
1140-
# they are controlled by the options
1141-
OPAL_SETUP_FT
1121+
# Part1: must happen before prte
1122+
##################################
1123+
OPAL_SETUP_FT_OPTIONS
11421124

11431125
##################################
11441126
# 3rd-party packages not called ROMIO
@@ -1160,6 +1142,12 @@ AC_SUBST(OPAL_3RDPARTY_DIST_SUBDIRS)
11601142
AC_SUBST(OPAL_3RDPARTY_EXTRA_DIST)
11611143
AC_SUBST(OPAL_3RDPARTY_DISTCLEAN_DIRS)
11621144

1145+
##################################
1146+
# Fault Tolerance
1147+
# Part2: must happen after prte
1148+
##################################
1149+
OPAL_SETUP_FT
1150+
11631151
##################################
11641152
# MCA
11651153
##################################

contrib/Makefile.am

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -27,6 +27,10 @@
2727
amca_paramdir = $(AMCA_PARAM_SETS_DIR)
2828
dist_amca_param_DATA = amca-param-sets/example.conf
2929

30+
if WANT_FT_MPI
31+
dist_amca_param_DATA += amca-param-sets/ft-mpi
32+
endif # WANT_FT_MPI
33+
3034
if WANT_FT_CR
3135
dist_amca_param_DATA += \
3236
amca-param-sets/ft-enable-cr \

contrib/amca-param-sets/ft-mpi

Lines changed: 89 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,89 @@
1+
#
2+
# Copyright (c) 2020 The University of Tennessee and The University
3+
# of Tennessee Research Foundation. All rights
4+
# reserved.
5+
# $COPYRIGHT$
6+
#
7+
# Additional copyrights may follow
8+
#
9+
# $HEADER$
10+
#
11+
# An Aggregate MCA Parameter Set to setup an environment that can support
12+
# User-Level Failure Mitigation (ULFM) fault tolerance (must also be
13+
# compiled in with --with-ft=mpi).
14+
#
15+
# Usage:
16+
# shell$ mpirun --tune ft-mpi ./app
17+
#
18+
19+
mpi_ft_enable=true
20+
21+
# Since failures are expected, reduce the verbosity of the transport errors
22+
btl_base_warn_peer_error=false
23+
24+
#
25+
# Performance tuning parameters (default shown)
26+
# By default the PRTE failure detector is used (see README.ULFM.md)
27+
#mpi_ft_detector=false
28+
#mpi_ft_detector_thread=false
29+
#mpi_ft_detector_rdma_heartbeat=false
30+
#mpi_ft_detector_period=3.
31+
#mpi_ft_detector_timeout=10.
32+
#
33+
34+
35+
#
36+
# Select only ULFM ready components
37+
# disabling non-tested and known broken components in FT-MPI builds
38+
#
39+
40+
#
41+
# The following frameworks/components are TESTED
42+
# They handle faults amd should be prefered when running with FT.
43+
# pml ob1
44+
# btl tcp, self, sm(+xpmem,+cma), ugni, uct
45+
# coll base/basic, tuned, ftagree, libnbc
46+
pml=ob1
47+
threads=pthreads
48+
49+
#
50+
# The following frameworks/components are UNTESTED, but **may** work.
51+
# They should run without faults, and **may** work with faults.
52+
# You may try and report if successfull.
53+
# btl ofi, portals4, smcuda, usnic, sm(+knem)
54+
# coll inter, sm, sync, cuda, monitoring
55+
# pml monitoring, v/vprotocol
56+
# We will disable only the components for which good components are known to exist.
57+
btl=^usnic
58+
# older versions of xpmem generate bus errors when the other end is dead.
59+
#btl_sm_single_copy_mechanism=cma
60+
61+
62+
#
63+
# The following frameworks/components are UNTESTED, and probably won't work.
64+
# They should run without faults, and will probably crash/deadlock after a fault.
65+
# You may try at your own risk.
66+
# coll hcoll, portals4
67+
# topo (all)
68+
# osc (all)
69+
# io (all)
70+
# fcoll (all)
71+
# fbtl (all)
72+
# We will disable only the components for which good components are known to exist.
73+
# Other untested components are selectable but will issue a runtime warning at
74+
# initiation if FT is enabled.
75+
coll=^hcoll,portals4
76+
77+
#
78+
# The following frameworks/components are NOT WORKING. Do not enable these with FT.
79+
# mtl (all)
80+
# pml cm, crcpw, ucx
81+
mtl=^ofi,portals4,psm2
82+
# allready enforced by pml=ob1 above
83+
#pml=^cm,crcpw,ucx
84+
# allready enforced by threads=pthreads above
85+
#threads=^argobots,qthreads
86+
# There is a bug in libevent with the "select" backend that causes an infinite loop
87+
# when an unplanned disconnect happens. Use something else, or bail.
88+
opal_event_include=epoll,devpoll,kqueue,evport,poll
89+

ompi/attribute/attribute_predefined.c

Lines changed: 16 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -2,7 +2,7 @@
22
* Copyright (c) 2004-2005 The Trustees of Indiana University and Indiana
33
* University Research and Technology
44
* Corporation. All rights reserved.
5-
* Copyright (c) 2004-2005 The University of Tennessee and The University
5+
* Copyright (c) 2004-2021 The University of Tennessee and The University
66
* of Tennessee Research Foundation. All rights
77
* reserved.
88
* Copyright (c) 2004-2005 High Performance Computing Center Stuttgart,
@@ -52,6 +52,9 @@
5252
*
5353
* MPI_WTIME_IS_GLOBAL is set to 0 (a conservative answer).
5454
*
55+
* MPI_FT is set to 0 or 1 (according to OPAL_ENABLE_FT_MPI and
56+
* ompi_ftmpi_enabled)
57+
*
5558
* MPI_APPNUM is set as the result of a GPR subscription.
5659
*
5760
* MPI_LASTUSEDCODE is set to an initial value and is reset every time
@@ -123,7 +126,9 @@ int ompi_attr_create_predefined(void)
123126
OMPI_SUCCESS != (ret = create_win(MPI_WIN_SIZE)) ||
124127
OMPI_SUCCESS != (ret = create_win(MPI_WIN_DISP_UNIT)) ||
125128
OMPI_SUCCESS != (ret = create_win(MPI_WIN_CREATE_FLAVOR)) ||
126-
OMPI_SUCCESS != (ret = create_win(MPI_WIN_MODEL))) {
129+
OMPI_SUCCESS != (ret = create_win(MPI_WIN_MODEL)) ||
130+
OMPI_SUCCESS != (ret = create_comm(MPI_FT, false)) || /* not #if conditional on OPAL_ENABLE_FT_MPI for ABI */
131+
0) {
127132
return ret;
128133
}
129134

@@ -133,6 +138,14 @@ int ompi_attr_create_predefined(void)
133138
OMPI_SUCCESS != (ret = set_f(MPI_HOST, MPI_PROC_NULL)) ||
134139
OMPI_SUCCESS != (ret = set_f(MPI_IO, MPI_ANY_SOURCE)) ||
135140
OMPI_SUCCESS != (ret = set_f(MPI_WTIME_IS_GLOBAL, 0)) ||
141+
#if OPAL_ENABLE_FT_MPI
142+
/* Although we always define the key to ease fortran integration,
143+
* lets not set a default value to the attribute if we do not
144+
* have fault tolerance built in. */
145+
OMPI_SUCCESS != (ret = set_f(MPI_FT, ompi_ftmpi_enabled)) ||
146+
#else
147+
OMPI_SUCCESS != (ret = set_f(MPI_FT, false)) ||
148+
#endif /* OPAL_ENABLE_FT_MPI */
136149
OMPI_SUCCESS != (ret = set_f(MPI_LASTUSEDCODE,
137150
ompi_mpi_errcode_lastused))) {
138151
return ret;
@@ -161,6 +174,7 @@ int ompi_attr_free_predefined(void)
161174
OMPI_SUCCESS != (ret = free_comm(MPI_APPNUM)) ||
162175
OMPI_SUCCESS != (ret = free_comm(MPI_LASTUSEDCODE)) ||
163176
OMPI_SUCCESS != (ret = free_comm(MPI_UNIVERSE_SIZE)) ||
177+
OMPI_SUCCESS != (ret = free_comm(MPI_FT)) || /* not #if conditional on OPAL_ENABLE_FT_MPI for ABI */
164178
OMPI_SUCCESS != (ret = free_win(MPI_WIN_BASE)) ||
165179
OMPI_SUCCESS != (ret = free_win(MPI_WIN_SIZE)) ||
166180
OMPI_SUCCESS != (ret = free_win(MPI_WIN_DISP_UNIT)) ||

ompi/communicator/Makefile.am

Lines changed: 6 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -3,7 +3,7 @@
33
# Copyright (c) 2004-2005 The Trustees of Indiana University and Indiana
44
# University Research and Technology
55
# Corporation. All rights reserved.
6-
# Copyright (c) 2004-2005 The University of Tennessee and The University
6+
# Copyright (c) 2004-2020 The University of Tennessee and The University
77
# of Tennessee Research Foundation. All rights
88
# reserved.
99
# Copyright (c) 2004-2005 High Performance Computing Center Stuttgart,
@@ -34,3 +34,8 @@ lib@OMPI_LIBMPI_NAME@_la_SOURCES += \
3434
communicator/comm_cid.c \
3535
communicator/comm_request.c
3636

37+
if WANT_FT_MPI
38+
lib@OMPI_LIBMPI_NAME@_la_SOURCES += \
39+
communicator/ft/comm_ft.c communicator/ft/comm_ft_reliable_bcast.c communicator/ft/comm_ft_propagator.c communicator/ft/comm_ft_detector.c communicator/ft/comm_ft_revoke.c
40+
endif # WANT_FT_MPI
41+

0 commit comments

Comments
 (0)