Skip to content

Commit 77b2dc7

Browse files
committed
Import ULFM Fault Tolerance
The historical repositories contain the full history and attribution and are available from https://bitbucket.org/icldistcomp/ulfm2/src/ulfm/ and prior https://github.com/ICLDisco/ulfm-legacy Signed-off-by: Aurelien Bouteiller <[email protected]> Signed-off-by: George Bosilca <[email protected]> Signed-off-by: Josh Hursey <[email protected]> Signed-off-by: Thomas Herault <[email protected]> Signed-off-by: Wesley Bland <[email protected]> Signed-off-by: Nuria Losada <[email protected]> Signed-off-by: Nathan T. Weeks <[email protected]> Squashed commit of the following: commit 878bf118703eadec73f3fee09342c6c33c19c00b Merge: 4c24553d a7e91d8 Author: Aurelien Bouteiller <[email protected]> Date: Tue Jan 5 10:53:44 2021 -0500 Merge branch 'master' into export/ulfm-to-ompi5-expanded commit 4c24553daf7db2028f87a01b0769226abb417992 Author: Aurelien Bouteiller <[email protected]> Date: Tue Jan 5 10:53:26 2021 -0500 Revert "TEMPORARY: Use the corrected prrte by default to avoid confusion" This reverts commit e5769efcf00ac0faa7f556ed57bcd7b4064fe262. ... commit 69ab6b8 Author: Aurélien Bouteiller <[email protected]> Date: Thu Feb 18 20:03:12 2016 -0500 Importing ULFM ompi layer: snapshot of WIP Missing BTL and COLL imports. Almost compiles w/o --with-ft
1 parent a7e91d8 commit 77b2dc7

File tree

199 files changed

+11780
-293
lines changed

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

199 files changed

+11780
-293
lines changed

README.FT.ULFM.md

Lines changed: 483 additions & 0 deletions
Large diffs are not rendered by default.

config/ompi_setup_prrte.m4

Lines changed: 10 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -3,7 +3,7 @@
33
# Copyright (c) 2004-2009 The Trustees of Indiana University and Indiana
44
# University Research and Technology
55
# Corporation. All rights reserved.
6-
# Copyright (c) 2004-2005 The University of Tennessee and The University
6+
# Copyright (c) 2004-2020 The University of Tennessee and The University
77
# of Tennessee Research Foundation. All rights
88
# reserved.
99
# Copyright (c) 2004-2007 High Performance Computing Center Stuttgart,
@@ -45,6 +45,12 @@ AC_DEFUN([OMPI_SETUP_PRRTE],[
4545

4646
AM_CONDITIONAL([OMPI_WANT_PRRTE], [test "$internal_prrte_build" = "1"])
4747

48+
AS_IF([test "$internal_prrte_build" = "0" -a "$opal_want_ft_type" = "mpi"], [
49+
AS_IF([prte_info | $GREP "Resilience support: yes"], [], [
50+
AC_MSG_ERROR([Requested enabling fault-tolerance and using external launcher, but external PRTE doesn't support resilience; you can either use the internal PRTE, recompile the external PRTE with fault-tolerance, or disable fault-tolerance. Aborting.])
51+
])
52+
])
53+
4854
OPAL_VAR_SCOPE_POP
4955
])
5056
@@ -139,6 +145,9 @@ AC_DEFUN([OMPI_SETUP_PRRTE_INTERNAL], [
139145
140146
# add the extra libs
141147
internal_prrte_args="$internal_prrte_args --with-prte-extra-lib=\"$internal_prrte_libs\" --with-prte-extra-ltlib=\"$internal_prrte_libs\""
148+
AS_IF([test "$opal_want_ft_type" = "mpi"],
149+
[internal_prrte_args="--enable-prte-ft $internal_prrte_args"],
150+
[])
142151
143152
# Pass all our compiler/linker flags to PRRTE, so that it
144153
# picks up how to build an internal HWLOC, libevent, and PMIx, plus

config/opal_setup_ft.m4

Lines changed: 35 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,8 @@
11
dnl
2+
dnl Copyright (c) 2004-2020 The University of Tennessee and The University
3+
dnl of Tennessee Research Foundation. All rights
4+
dnl reserved.
5+
dnl Copyright (c) 2009-2012 Oak Ridge National Labs. All rights reserved.
26
dnl Copyright (c) 2013-2014 Intel, Inc. All rights reserved.
37
dnl Copyright (c) 2015 Research Organization for Information Science
48
dnl and Technology (RIST). All rights reserved.
@@ -12,11 +16,15 @@ dnl
1216
#
1317
# --with-ft=TYPE
1418
# TYPE:
19+
# - mpi (synonym for 'ulfm')
1520
# - LAM (synonym for 'cr' currently)
1621
# - cr
1722
# /* General FT sections */
1823
# #if OPAL_ENABLE_FT == 0 /* FT Disabled globaly */
1924
# #if OPAL_ENABLE_FT == 1 /* FT Enabled globaly */
25+
# /* ULFM Specific sections */
26+
# #if OPAL_ENABLE_FT_MPI == 0 /* FT ULFM Disabled */
27+
# #if OPAL_ENABLE_FT_MPI == 1 /* FT ULFM Enabled */
2028
# /* CR Specific sections */
2129
# #if OPAL_ENABLE_FT_CR == 0 /* FT Ckpt/Restart Disabled */
2230
# #if OPAL_ENABLE_FT_CR == 1 /* FT Ckpt/Restart Enabled */
@@ -33,9 +41,9 @@ AC_DEFUN([OPAL_SETUP_FT_OPTIONS],[
3341
opal_setup_ft_options="yes"
3442
AC_ARG_WITH(ft,
3543
[AC_HELP_STRING([--with-ft=TYPE],
36-
[Specify the type of fault tolerance to enable. Options: LAM (LAM/MPI-like), cr (Checkpoint/Restart), (default: disabled)])],
37-
[opal_want_ft=1],
38-
[opal_want_ft=0])
44+
[Specify the type of fault tolerance to enable. Options: mpi (ULFM), LAM (LAM/MPI-like), cr (Checkpoint/Restart) (default: mpi)])],
45+
[],
46+
[with_ft=mpi]) # If not specified act as if --with-ft=mpi
3947
4048
#
4149
# Checkpoint/restart enabled debugging
@@ -63,9 +71,11 @@ AC_DEFUN([OPAL_SETUP_FT],[
6371
if test "$opal_setup_ft_options" = "yes"; then
6472
AC_MSG_CHECKING([if want fault tolerance])
6573
fi
66-
if test "x$with_ft" != "x" || test "$opal_want_ft" = "1"; then
74+
75+
if test x"$with_ft" != "xno"; then
6776
opal_want_ft=1
6877
opal_want_ft_cr=0
78+
opal_want_ft_mpi=0
6979
opal_want_ft_type=none
7080
7181
as_save_IFS=$IFS
@@ -74,8 +84,16 @@ AC_DEFUN([OPAL_SETUP_FT],[
7484
IFS=$as_save_IFS
7585
7686
# Default value
77-
if test "$opt" = "" || test "$opt" = "yes"; then
78-
opal_want_ft_cr=1
87+
if test "$opt" = "yes"; then
88+
opal_want_ft_mpi=1
89+
elif test "$opt" = "ULFM"; then
90+
opal_want_ft_mpi=1
91+
elif test "$opt" = "ulfm"; then
92+
opal_want_ft_mpi=1
93+
elif test "$opt" = "MPI"; then
94+
opal_want_ft_mpi=1
95+
elif test "$opt" = "mpi"; then
96+
opal_want_ft_mpi=1
7997
elif test "$opt" = "LAM"; then
8098
opal_want_ft_cr=1
8199
elif test "$opt" = "lam"; then
@@ -89,28 +107,35 @@ AC_DEFUN([OPAL_SETUP_FT],[
89107
AC_MSG_ERROR([Cannot continue])
90108
fi
91109
done
92-
if test "$opal_want_ft_cr" = 1; then
110+
if test "$opal_want_ft_mpi" = 1; then
111+
opal_want_ft_type="mpi"
112+
elif test "$opal_want_ft_cr" = 1; then
93113
opal_want_ft_type="cr"
94114
fi
95115
96116
AC_MSG_RESULT([Enabled $opal_want_ft_type (Specified $with_ft)])
97117
AC_MSG_WARN([**************************************************])
98118
AC_MSG_WARN([*** Fault Tolerance Integration into Open MPI is *])
99-
AC_MSG_WARN([*** a research quality implementation, and care *])
100-
AC_MSG_WARN([*** should be used when choosing to enable it. *])
119+
AC_MSG_WARN([*** compiled-in, but off by default. Use mpiexec *])
120+
AC_MSG_WARN([*** and MCA parameters to turn it on. *])
121+
AC_MSG_WARN([*** Not all components support fault tolerance. *])
101122
AC_MSG_WARN([**************************************************])
102123
else
103124
opal_want_ft=0
125+
opal_want_ft_mpi=0
104126
opal_want_ft_cr=0
105127
if test "$opal_setup_ft_options" = "yes"; then
106128
AC_MSG_RESULT([Disabled fault tolerance])
107129
fi
108130
fi
109131
AC_DEFINE_UNQUOTED([OPAL_ENABLE_FT], [$opal_want_ft],
110132
[Enable fault tolerance general components and logic])
133+
AC_DEFINE_UNQUOTED([OPAL_ENABLE_FT_MPI], [$opal_want_ft_mpi],
134+
[Enable fault tolerance MPI ULFM components and logic])
111135
AC_DEFINE_UNQUOTED([OPAL_ENABLE_FT_CR], [$opal_want_ft_cr],
112136
[Enable fault tolerance checkpoint/restart components and logic])
113137
AM_CONDITIONAL(WANT_FT, test "$opal_want_ft" = "1")
138+
AM_CONDITIONAL(WANT_FT_MPI, test "$opal_want_ft_mpi" = "1")
114139
AM_CONDITIONAL(WANT_FT_CR, test "$opal_want_ft_cr" = "1")
115140
116141
if test "$opal_setup_ft_options" = "yes"; then
@@ -175,4 +200,5 @@ AC_DEFUN([OPAL_SETUP_FT],[
175200
AC_DEFINE_UNQUOTED([OPAL_ENABLE_FT_THREAD], [$opal_want_ft_thread],
176201
[Enable fault tolerance thread in Open PAL])
177202
AM_CONDITIONAL(WANT_FT_THREAD, test "$opal_want_ft_thread" = "1")
203+
OPAL_SUMMARY_ADD([[Miscellaneous]],[[Fault Tolerance support]],[unnecessary], [$with_ft])
178204
])

configure.ac

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1128,7 +1128,7 @@ AC_CACHE_SAVE
11281128
# visible again
11291129
#
11301130
###########################################################
1131-
dnl OPAL_SETUP_FT_OPTIONS
1131+
OPAL_SETUP_FT_OPTIONS
11321132
###########################################################
11331133
# The following line is always required as it contains the
11341134
# AC_DEFINE and AM_CONDITIONAL calls that set variables used

contrib/Makefile.am

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -27,6 +27,10 @@
2727
amca_paramdir = $(AMCA_PARAM_SETS_DIR)
2828
dist_amca_param_DATA = amca-param-sets/example.conf
2929

30+
if WANT_FT_MPI
31+
dist_amca_param_DATA += amca-param-sets/ft-mpi
32+
endif # WANT_FT_MPI
33+
3034
if WANT_FT_CR
3135
dist_amca_param_DATA += \
3236
amca-param-sets/ft-enable-cr \

contrib/amca-param-sets/ft-mpi

Lines changed: 89 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,89 @@
1+
#
2+
# Copyright (c) 2020 The University of Tennessee and The University
3+
# of Tennessee Research Foundation. All rights
4+
# reserved.
5+
# $COPYRIGHT$
6+
#
7+
# Additional copyrights may follow
8+
#
9+
# $HEADER$
10+
#
11+
# An Aggregate MCA Parameter Set to setup an environment that can support
12+
# User-Level Failure Mitigation (ULFM) fault tolerance (must also be
13+
# compiled in with --with-ft=mpi).
14+
#
15+
# Usage:
16+
# shell$ mpirun --tune ft-mpi ./app
17+
#
18+
19+
mpi_ft_enable=true
20+
21+
# Since failures are expected, reduce the verbosity of the transport errors
22+
btl_base_warn_peer_error=false
23+
24+
#
25+
# Performance tuning parameters (default shown)
26+
# By default the PRTE failure detector is used (see README.ULFM.md)
27+
#mpi_ft_detector=false
28+
#mpi_ft_detector_thread=false
29+
#mpi_ft_detector_rdma_heartbeat=false
30+
#mpi_ft_detector_period=3.
31+
#mpi_ft_detector_timeout=10.
32+
#
33+
34+
35+
#
36+
# Select only ULFM ready components
37+
# disabling non-tested and known broken components in FT-MPI builds
38+
#
39+
40+
#
41+
# The following frameworks/components are TESTED
42+
# They handle faults amd should be prefered when running with FT.
43+
# pml ob1
44+
# btl tcp, self, sm(+xpmem,+cma), ugni, uct
45+
# coll base/basic, tuned, ftagree, libnbc
46+
pml=ob1
47+
threads=pthreads
48+
49+
#
50+
# The following frameworks/components are UNTESTED, but **may** work.
51+
# They should run without faults, and **may** work with faults.
52+
# You may try and report if successfull.
53+
# btl ofi, portals4, smcuda, usnic, sm(+knem)
54+
# coll inter, sm, sync, cuda, monitoring
55+
# pml monitoring, v/vprotocol
56+
# We will disable only the components for which good components are known to exist.
57+
btl=^usnic
58+
# older versions of xpmem generate bus errors when the other end is dead.
59+
#btl_sm_single_copy_mechanism=cma
60+
61+
62+
#
63+
# The following frameworks/components are UNTESTED, and probably won't work.
64+
# They should run without faults, and will probably crash/deadlock after a fault.
65+
# You may try at your own risk.
66+
# coll hcoll, portals4
67+
# topo (all)
68+
# osc (all)
69+
# io (all)
70+
# fcoll (all)
71+
# fbtl (all)
72+
# We will disable only the components for which good components are known to exist.
73+
# Other untested components are selectable but will issue a runtime warning at
74+
# initiation if FT is enabled.
75+
coll=^hcoll,portals4
76+
77+
#
78+
# The following frameworks/components are NOT WORKING. Do not enable these with FT.
79+
# mtl (all)
80+
# pml cm, crcpw, ucx
81+
mtl=^ofi,portals4,psm2
82+
# allready enforced by pml=ob1 above
83+
#pml=^cm,crcpw,ucx
84+
# allready enforced by threads=pthreads above
85+
#threads=^argobots,qthreads
86+
# There is a bug in libevent with the "select" backend that causes an infinite loop
87+
# when an unplanned disconnect happens. Use something else, or bail.
88+
opal_event_include=epoll,devpoll,kqueue,evport,poll
89+

ompi/attribute/attribute_predefined.c

Lines changed: 20 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -2,7 +2,7 @@
22
* Copyright (c) 2004-2005 The Trustees of Indiana University and Indiana
33
* University Research and Technology
44
* Corporation. All rights reserved.
5-
* Copyright (c) 2004-2005 The University of Tennessee and The University
5+
* Copyright (c) 2004-2020 The University of Tennessee and The University
66
* of Tennessee Research Foundation. All rights
77
* reserved.
88
* Copyright (c) 2004-2005 High Performance Computing Center Stuttgart,
@@ -52,6 +52,9 @@
5252
*
5353
* MPI_WTIME_IS_GLOBAL is set to 0 (a conservative answer).
5454
*
55+
* MPI_FT is set to 0 or 1 (according to OPAL_ENABLE_FT_MPI and
56+
* ompi_ftmpi_enabled)
57+
*
5558
* MPI_APPNUM is set as the result of a GPR subscription.
5659
*
5760
* MPI_LASTUSEDCODE is set to an initial value and is reset every time
@@ -123,7 +126,11 @@ int ompi_attr_create_predefined(void)
123126
OMPI_SUCCESS != (ret = create_win(MPI_WIN_SIZE)) ||
124127
OMPI_SUCCESS != (ret = create_win(MPI_WIN_DISP_UNIT)) ||
125128
OMPI_SUCCESS != (ret = create_win(MPI_WIN_CREATE_FLAVOR)) ||
126-
OMPI_SUCCESS != (ret = create_win(MPI_WIN_MODEL))) {
129+
OMPI_SUCCESS != (ret = create_win(MPI_WIN_MODEL)) ||
130+
#if 1 /* not conditional on OPAL_ENABLE_FT_MPI for ABI */
131+
OMPI_SUCCESS != (ret = create_comm(MPI_FT, true)) ||
132+
#endif /* OPAL_ENABLE_FT_MPI */
133+
0) {
127134
return ret;
128135
}
129136

@@ -133,6 +140,14 @@ int ompi_attr_create_predefined(void)
133140
OMPI_SUCCESS != (ret = set_f(MPI_HOST, MPI_PROC_NULL)) ||
134141
OMPI_SUCCESS != (ret = set_f(MPI_IO, MPI_ANY_SOURCE)) ||
135142
OMPI_SUCCESS != (ret = set_f(MPI_WTIME_IS_GLOBAL, 0)) ||
143+
#if OPAL_ENABLE_FT_MPI
144+
/* Although we always define the key to ease fortran integration,
145+
* lets not set a default value to the attribute if we do not
146+
* have fault tolerance built in. */
147+
OMPI_SUCCESS != (ret = set_f(MPI_FT, ompi_ftmpi_enabled)) ||
148+
#else
149+
OMPI_SUCCESS != (ret = set_f(MPI_FT, false)) ||
150+
#endif /* OPAL_ENABLE_FT_MPI */
136151
OMPI_SUCCESS != (ret = set_f(MPI_LASTUSEDCODE,
137152
ompi_mpi_errcode_lastused))) {
138153
return ret;
@@ -161,6 +176,9 @@ int ompi_attr_free_predefined(void)
161176
OMPI_SUCCESS != (ret = free_comm(MPI_APPNUM)) ||
162177
OMPI_SUCCESS != (ret = free_comm(MPI_LASTUSEDCODE)) ||
163178
OMPI_SUCCESS != (ret = free_comm(MPI_UNIVERSE_SIZE)) ||
179+
#if 1 /* not conditional on OPAL_ENABLE_FT_MPI for ABI */
180+
OMPI_SUCCESS != (ret = free_comm(MPI_FT)) ||
181+
#endif /* OPAL_ENABLE_FT_MPI */
164182
OMPI_SUCCESS != (ret = free_win(MPI_WIN_BASE)) ||
165183
OMPI_SUCCESS != (ret = free_win(MPI_WIN_SIZE)) ||
166184
OMPI_SUCCESS != (ret = free_win(MPI_WIN_DISP_UNIT)) ||

ompi/communicator/Makefile.am

Lines changed: 6 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -3,7 +3,7 @@
33
# Copyright (c) 2004-2005 The Trustees of Indiana University and Indiana
44
# University Research and Technology
55
# Corporation. All rights reserved.
6-
# Copyright (c) 2004-2005 The University of Tennessee and The University
6+
# Copyright (c) 2004-2020 The University of Tennessee and The University
77
# of Tennessee Research Foundation. All rights
88
# reserved.
99
# Copyright (c) 2004-2005 High Performance Computing Center Stuttgart,
@@ -34,3 +34,8 @@ lib@OMPI_LIBMPI_NAME@_la_SOURCES += \
3434
communicator/comm_cid.c \
3535
communicator/comm_request.c
3636

37+
if WANT_FT_MPI
38+
lib@OMPI_LIBMPI_NAME@_la_SOURCES += \
39+
communicator/ft/comm_ft.c communicator/ft/comm_ft_reliable_bcast.c communicator/ft/comm_ft_propagator.c communicator/ft/comm_ft_detector.c communicator/ft/comm_ft_revoke.c
40+
endif # WANT_FT_MPI
41+

0 commit comments

Comments
 (0)