Skip to content

Commit bd8c781

Browse files
committed
Import ULFM Fault Tolerance
The historical repositories contain the full history and attribution and are available from https://bitbucket.org/icldistcomp/ulfm2/src/ulfm/ and prior https://github.com/ICLDisco/ulfm-legacy Signed-off-by: Aurelien Bouteiller <[email protected]> Signed-off-by: George Bosilca <[email protected]> Signed-off-by: Josh Hursey <[email protected]> Signed-off-by: Thomas Herault <[email protected]> Signed-off-by: Wesley Bland <[email protected]> Signed-off-by: Nuria Losada <[email protected]> Signed-off-by: Nathan T. Weeks <[email protected]> Squashed commit of the following: commit fc929da06f68ec80d204e9792ba6ac1e7e01d365 Author: Aurelien Bouteiller <[email protected]> Date: Wed Oct 28 00:17:11 2020 -0400 Do not conditionally compile FT_MPI ABI impacting members of structures Signed-off-by: Aurelien Bouteiller <[email protected]> commit bc0a690f44c0c622fdaecde2ea527a08324a14a8 Merge: ced46891 593a918 Author: Aurelien Bouteiller <[email protected]> Date: Tue Oct 27 14:58:54 2020 -0400 Merge branch 'master' (593a918) into export/ulfm-to-ompi5-expanded ... commit 69ab6b8 Author: Aurélien Bouteiller <[email protected]> Date: Thu Feb 18 20:03:12 2016 -0500 Importing ULFM ompi layer: snapshot of WIP Missing BTL and COLL imports. Almost compiles w/o --with-ft
1 parent 593a918 commit bd8c781

File tree

199 files changed

+11775
-294
lines changed

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

199 files changed

+11775
-294
lines changed

README.FT.ULFM.md

Lines changed: 483 additions & 0 deletions
Large diffs are not rendered by default.

config/ompi_setup_prrte.m4

Lines changed: 4 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -3,7 +3,7 @@
33
# Copyright (c) 2004-2009 The Trustees of Indiana University and Indiana
44
# University Research and Technology
55
# Corporation. All rights reserved.
6-
# Copyright (c) 2004-2005 The University of Tennessee and The University
6+
# Copyright (c) 2004-2020 The University of Tennessee and The University
77
# of Tennessee Research Foundation. All rights
88
# reserved.
99
# Copyright (c) 2004-2007 High Performance Computing Center Stuttgart,
@@ -139,6 +139,9 @@ AC_DEFUN([OMPI_SETUP_PRRTE_INTERNAL], [
139139

140140
# add the extra libs
141141
internal_prrte_args="$internal_prrte_args --with-prte-extra-lib=\"$internal_prrte_libs\" --with-prte-extra-ltlib=\"$internal_prrte_libs\""
142+
AS_IF([test "$opal_want_ft_type" = "mpi"],
143+
[internal_prrte_args="--enable-prte-ft $internal_prrte_args"],
144+
[])
142145

143146
# Pass all our compiler/linker flags to PRRTE, so that it
144147
# picks up how to build an internal HWLOC, libevent, and PMIx, plus

config/opal_setup_ft.m4

Lines changed: 35 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,8 @@
11
dnl
2+
dnl Copyright (c) 2004-2020 The University of Tennessee and The University
3+
dnl of Tennessee Research Foundation. All rights
4+
dnl reserved.
5+
dnl Copyright (c) 2009-2012 Oak Ridge National Labs. All rights reserved.
26
dnl Copyright (c) 2013-2014 Intel, Inc. All rights reserved.
37
dnl Copyright (c) 2015 Research Organization for Information Science
48
dnl and Technology (RIST). All rights reserved.
@@ -12,11 +16,15 @@ dnl
1216
#
1317
# --with-ft=TYPE
1418
# TYPE:
19+
# - mpi (synonym for 'ulfm')
1520
# - LAM (synonym for 'cr' currently)
1621
# - cr
1722
# /* General FT sections */
1823
# #if OPAL_ENABLE_FT == 0 /* FT Disabled globaly */
1924
# #if OPAL_ENABLE_FT == 1 /* FT Enabled globaly */
25+
# /* ULFM Specific sections */
26+
# #if OPAL_ENABLE_FT_MPI == 0 /* FT ULFM Disabled */
27+
# #if OPAL_ENABLE_FT_MPI == 1 /* FT ULFM Enabled */
2028
# /* CR Specific sections */
2129
# #if OPAL_ENABLE_FT_CR == 0 /* FT Ckpt/Restart Disabled */
2230
# #if OPAL_ENABLE_FT_CR == 1 /* FT Ckpt/Restart Enabled */
@@ -33,9 +41,9 @@ AC_DEFUN([OPAL_SETUP_FT_OPTIONS],[
3341
opal_setup_ft_options="yes"
3442
AC_ARG_WITH(ft,
3543
[AC_HELP_STRING([--with-ft=TYPE],
36-
[Specify the type of fault tolerance to enable. Options: LAM (LAM/MPI-like), cr (Checkpoint/Restart), (default: disabled)])],
37-
[opal_want_ft=1],
38-
[opal_want_ft=0])
44+
[Specify the type of fault tolerance to enable. Options: mpi (ULFM), LAM (LAM/MPI-like), cr (Checkpoint/Restart) (default: mpi)])],
45+
[],
46+
[with_ft=mpi]) # If not specified act as if --with-ft=mpi
3947
4048
#
4149
# Checkpoint/restart enabled debugging
@@ -63,9 +71,11 @@ AC_DEFUN([OPAL_SETUP_FT],[
6371
if test "$opal_setup_ft_options" = "yes"; then
6472
AC_MSG_CHECKING([if want fault tolerance])
6573
fi
66-
if test "x$with_ft" != "x" || test "$opal_want_ft" = "1"; then
74+
75+
if test x"$with_ft" != "xno"; then
6776
opal_want_ft=1
6877
opal_want_ft_cr=0
78+
opal_want_ft_mpi=0
6979
opal_want_ft_type=none
7080
7181
as_save_IFS=$IFS
@@ -74,8 +84,16 @@ AC_DEFUN([OPAL_SETUP_FT],[
7484
IFS=$as_save_IFS
7585
7686
# Default value
77-
if test "$opt" = "" || test "$opt" = "yes"; then
78-
opal_want_ft_cr=1
87+
if test "$opt" = "yes"; then
88+
opal_want_ft_mpi=1
89+
elif test "$opt" = "ULFM"; then
90+
opal_want_ft_mpi=1
91+
elif test "$opt" = "ulfm"; then
92+
opal_want_ft_mpi=1
93+
elif test "$opt" = "MPI"; then
94+
opal_want_ft_mpi=1
95+
elif test "$opt" = "mpi"; then
96+
opal_want_ft_mpi=1
7997
elif test "$opt" = "LAM"; then
8098
opal_want_ft_cr=1
8199
elif test "$opt" = "lam"; then
@@ -89,28 +107,35 @@ AC_DEFUN([OPAL_SETUP_FT],[
89107
AC_MSG_ERROR([Cannot continue])
90108
fi
91109
done
92-
if test "$opal_want_ft_cr" = 1; then
110+
if test "$opal_want_ft_mpi" = 1; then
111+
opal_want_ft_type="mpi"
112+
elif test "$opal_want_ft_cr" = 1; then
93113
opal_want_ft_type="cr"
94114
fi
95115
96116
AC_MSG_RESULT([Enabled $opal_want_ft_type (Specified $with_ft)])
97117
AC_MSG_WARN([**************************************************])
98118
AC_MSG_WARN([*** Fault Tolerance Integration into Open MPI is *])
99-
AC_MSG_WARN([*** a research quality implementation, and care *])
100-
AC_MSG_WARN([*** should be used when choosing to enable it. *])
119+
AC_MSG_WARN([*** compiled-in, but off by default. Use mpiexec *])
120+
AC_MSG_WARN([*** and MCA parameters to turn it on. *])
121+
AC_MSG_WARN([*** Not all components support fault tolerance. *])
101122
AC_MSG_WARN([**************************************************])
102123
else
103124
opal_want_ft=0
125+
opal_want_ft_mpi=0
104126
opal_want_ft_cr=0
105127
if test "$opal_setup_ft_options" = "yes"; then
106128
AC_MSG_RESULT([Disabled fault tolerance])
107129
fi
108130
fi
109131
AC_DEFINE_UNQUOTED([OPAL_ENABLE_FT], [$opal_want_ft],
110132
[Enable fault tolerance general components and logic])
133+
AC_DEFINE_UNQUOTED([OPAL_ENABLE_FT_MPI], [$opal_want_ft_mpi],
134+
[Enable fault tolerance MPI ULFM components and logic])
111135
AC_DEFINE_UNQUOTED([OPAL_ENABLE_FT_CR], [$opal_want_ft_cr],
112136
[Enable fault tolerance checkpoint/restart components and logic])
113137
AM_CONDITIONAL(WANT_FT, test "$opal_want_ft" = "1")
138+
AM_CONDITIONAL(WANT_FT_MPI, test "$opal_want_ft_mpi" = "1")
114139
AM_CONDITIONAL(WANT_FT_CR, test "$opal_want_ft_cr" = "1")
115140
116141
if test "$opal_setup_ft_options" = "yes"; then
@@ -175,4 +200,5 @@ AC_DEFUN([OPAL_SETUP_FT],[
175200
AC_DEFINE_UNQUOTED([OPAL_ENABLE_FT_THREAD], [$opal_want_ft_thread],
176201
[Enable fault tolerance thread in Open PAL])
177202
AM_CONDITIONAL(WANT_FT_THREAD, test "$opal_want_ft_thread" = "1")
203+
OPAL_SUMMARY_ADD([[Miscellaneous]],[[Fault Tolerance support]],[unnecessary], [$with_ft])
178204
])

configure.ac

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1108,7 +1108,7 @@ AC_CACHE_SAVE
11081108
# visible again
11091109
#
11101110
###########################################################
1111-
dnl OPAL_SETUP_FT_OPTIONS
1111+
OPAL_SETUP_FT_OPTIONS
11121112
###########################################################
11131113
# The following line is always required as it contains the
11141114
# AC_DEFINE and AM_CONDITIONAL calls that set variables used

contrib/Makefile.am

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -27,6 +27,10 @@
2727
amca_paramdir = $(AMCA_PARAM_SETS_DIR)
2828
dist_amca_param_DATA = amca-param-sets/example.conf
2929

30+
if WANT_FT_MPI
31+
dist_amca_param_DATA += amca-param-sets/ft-mpi
32+
endif # WANT_FT_MPI
33+
3034
if WANT_FT_CR
3135
dist_amca_param_DATA += \
3236
amca-param-sets/ft-enable-cr \

contrib/amca-param-sets/ft-mpi

Lines changed: 89 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,89 @@
1+
#
2+
# Copyright (c) 2020 The University of Tennessee and The University
3+
# of Tennessee Research Foundation. All rights
4+
# reserved.
5+
# $COPYRIGHT$
6+
#
7+
# Additional copyrights may follow
8+
#
9+
# $HEADER$
10+
#
11+
# An Aggregate MCA Parameter Set to setup an environment that can support
12+
# User-Level Failure Mitigation (ULFM) fault tolerance (must also be
13+
# compiled in with --with-ft=mpi).
14+
#
15+
# Usage:
16+
# shell$ mpirun --tune ft-mpi ./app
17+
#
18+
19+
mpi_ft_enable=true
20+
21+
# Since failures are expected, reduce the verbosity of the transport errors
22+
btl_base_warn_peer_error=false
23+
24+
#
25+
# Performance tuning parameters (default shown)
26+
# By default the PRTE failure detector is used (see README.ULFM.md)
27+
#mpi_ft_detector=false
28+
#mpi_ft_detector_thread=false
29+
#mpi_ft_detector_rdma_heartbeat=false
30+
#mpi_ft_detector_period=3.
31+
#mpi_ft_detector_timeout=10.
32+
#
33+
34+
35+
#
36+
# Select only ULFM ready components
37+
# disabling non-tested and known broken components in FT-MPI builds
38+
#
39+
40+
#
41+
# The following frameworks/components are TESTED
42+
# They handle faults amd should be prefered when running with FT.
43+
# pml ob1
44+
# btl tcp, self, sm(+xpmem,+cma), ugni, uct
45+
# coll base/basic, tuned, ftagree, libnbc
46+
pml=ob1
47+
threads=pthreads
48+
49+
#
50+
# The following frameworks/components are UNTESTED, but **may** work.
51+
# They should run without faults, and **may** work with faults.
52+
# You may try and report if successfull.
53+
# btl ofi, portals4, smcuda, usnic, sm(+knem)
54+
# coll inter, sm, sync, cuda, monitoring
55+
# pml monitoring, v/vprotocol
56+
# We will disable only the components for which good components are known to exist.
57+
btl=^usnic
58+
# older versions of xpmem generate bus errors when the other end is dead.
59+
#btl_sm_single_copy_mechanism=cma
60+
61+
62+
#
63+
# The following frameworks/components are UNTESTED, and probably won't work.
64+
# They should run without faults, and will probably crash/deadlock after a fault.
65+
# You may try at your own risk.
66+
# coll hcoll, portals4
67+
# topo (all)
68+
# osc (all)
69+
# io (all)
70+
# fcoll (all)
71+
# fbtl (all)
72+
# We will disable only the components for which good components are known to exist.
73+
# Other untested components are selectable but will issue a runtime warning at
74+
# initiation if FT is enabled.
75+
coll=^hcoll,portals4
76+
77+
#
78+
# The following frameworks/components are NOT WORKING. Do not enable these with FT.
79+
# mtl (all)
80+
# pml cm, crcpw, ucx
81+
mtl=^ofi,portals4,psm2
82+
# allready enforced by pml=ob1 above
83+
#pml=^cm,crcpw,ucx
84+
# allready enforced by threads=pthreads above
85+
#threads=^argobots,qthreads
86+
# There is a bug in libevent with the "select" backend that causes an infinite loop
87+
# when an unplanned disconnect happens. Use something else, or bail.
88+
opal_event_include=epoll,devpoll,kqueue,evport,poll
89+

ompi/attribute/attribute_predefined.c

Lines changed: 20 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -2,7 +2,7 @@
22
* Copyright (c) 2004-2005 The Trustees of Indiana University and Indiana
33
* University Research and Technology
44
* Corporation. All rights reserved.
5-
* Copyright (c) 2004-2005 The University of Tennessee and The University
5+
* Copyright (c) 2004-2020 The University of Tennessee and The University
66
* of Tennessee Research Foundation. All rights
77
* reserved.
88
* Copyright (c) 2004-2005 High Performance Computing Center Stuttgart,
@@ -52,6 +52,9 @@
5252
*
5353
* MPI_WTIME_IS_GLOBAL is set to 0 (a conservative answer).
5454
*
55+
* MPI_FT is set to 0 or 1 (according to OPAL_ENABLE_FT_MPI and
56+
* ompi_ftmpi_enabled)
57+
*
5558
* MPI_APPNUM is set as the result of a GPR subscription.
5659
*
5760
* MPI_LASTUSEDCODE is set to an initial value and is reset every time
@@ -123,7 +126,11 @@ int ompi_attr_create_predefined(void)
123126
OMPI_SUCCESS != (ret = create_win(MPI_WIN_SIZE)) ||
124127
OMPI_SUCCESS != (ret = create_win(MPI_WIN_DISP_UNIT)) ||
125128
OMPI_SUCCESS != (ret = create_win(MPI_WIN_CREATE_FLAVOR)) ||
126-
OMPI_SUCCESS != (ret = create_win(MPI_WIN_MODEL))) {
129+
OMPI_SUCCESS != (ret = create_win(MPI_WIN_MODEL)) ||
130+
#if 1 /* not conditional on OPAL_ENABLE_FT_MPI for ABI */
131+
OMPI_SUCCESS != (ret = create_comm(MPI_FT, true)) ||
132+
#endif /* OPAL_ENABLE_FT_MPI */
133+
0) {
127134
return ret;
128135
}
129136

@@ -133,6 +140,14 @@ int ompi_attr_create_predefined(void)
133140
OMPI_SUCCESS != (ret = set_f(MPI_HOST, MPI_PROC_NULL)) ||
134141
OMPI_SUCCESS != (ret = set_f(MPI_IO, MPI_ANY_SOURCE)) ||
135142
OMPI_SUCCESS != (ret = set_f(MPI_WTIME_IS_GLOBAL, 0)) ||
143+
#if OPAL_ENABLE_FT_MPI
144+
/* Although we always define the key to ease fortran integration,
145+
* lets not set a default value to the attribute if we do not
146+
* have fault tolerance built in. */
147+
OMPI_SUCCESS != (ret = set_f(MPI_FT, ompi_ftmpi_enabled)) ||
148+
#else
149+
OMPI_SUCCESS != (ret = set_f(MPI_FT, false)) ||
150+
#endif /* OPAL_ENABLE_FT_MPI */
136151
OMPI_SUCCESS != (ret = set_f(MPI_LASTUSEDCODE,
137152
ompi_mpi_errcode_lastused))) {
138153
return ret;
@@ -161,6 +176,9 @@ int ompi_attr_free_predefined(void)
161176
OMPI_SUCCESS != (ret = free_comm(MPI_APPNUM)) ||
162177
OMPI_SUCCESS != (ret = free_comm(MPI_LASTUSEDCODE)) ||
163178
OMPI_SUCCESS != (ret = free_comm(MPI_UNIVERSE_SIZE)) ||
179+
#if 1 /* not conditional on OPAL_ENABLE_FT_MPI for ABI */
180+
OMPI_SUCCESS != (ret = free_comm(MPI_FT)) ||
181+
#endif /* OPAL_ENABLE_FT_MPI */
164182
OMPI_SUCCESS != (ret = free_win(MPI_WIN_BASE)) ||
165183
OMPI_SUCCESS != (ret = free_win(MPI_WIN_SIZE)) ||
166184
OMPI_SUCCESS != (ret = free_win(MPI_WIN_DISP_UNIT)) ||

ompi/communicator/Makefile.am

Lines changed: 6 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -3,7 +3,7 @@
33
# Copyright (c) 2004-2005 The Trustees of Indiana University and Indiana
44
# University Research and Technology
55
# Corporation. All rights reserved.
6-
# Copyright (c) 2004-2005 The University of Tennessee and The University
6+
# Copyright (c) 2004-2020 The University of Tennessee and The University
77
# of Tennessee Research Foundation. All rights
88
# reserved.
99
# Copyright (c) 2004-2005 High Performance Computing Center Stuttgart,
@@ -34,3 +34,8 @@ lib@OMPI_LIBMPI_NAME@_la_SOURCES += \
3434
communicator/comm_cid.c \
3535
communicator/comm_request.c
3636

37+
if WANT_FT_MPI
38+
lib@OMPI_LIBMPI_NAME@_la_SOURCES += \
39+
communicator/ft/comm_ft.c communicator/ft/comm_ft_reliable_bcast.c communicator/ft/comm_ft_propagator.c communicator/ft/comm_ft_detector.c communicator/ft/comm_ft_revoke.c
40+
endif # WANT_FT_MPI
41+

0 commit comments

Comments
 (0)