@@ -1767,7 +1767,7 @@ int orte_odls_base_default_kill_local_procs(opal_pointer_array_t *procs,
1767
1767
orte_proc_t * child ;
1768
1768
opal_list_t procs_killed ;
1769
1769
orte_proc_t * proc , proctmp ;
1770
- int i , j ;
1770
+ int i , j , ret ;
1771
1771
opal_pointer_array_t procarray , * procptr ;
1772
1772
bool do_cleanup ;
1773
1773
orte_odls_quick_caddy_t * cd ;
@@ -1913,7 +1913,17 @@ int orte_odls_base_default_kill_local_procs(opal_pointer_array_t *procs,
1913
1913
/* if we are issuing signals, then we need to wait a little
1914
1914
* and send the next in sequence */
1915
1915
if (0 < opal_list_get_size (& procs_killed )) {
1916
- sleep (orte_odls_globals .timeout_before_sigkill );
1916
+ /* Wait a little. Do so in a loop since sleep() can be interrupted by a
1917
+ * signal. Most likely SIGCHLD in this case */
1918
+ ret = orte_odls_globals .timeout_before_sigkill ;
1919
+ while ( ret > 0 ) {
1920
+ OPAL_OUTPUT_VERBOSE ((5 , orte_odls_base_framework .framework_output ,
1921
+ "%s Sleep %d sec (total = %d)" ,
1922
+ ORTE_NAME_PRINT (ORTE_PROC_MY_NAME ),
1923
+ ret , orte_odls_globals .timeout_before_sigkill ));
1924
+ ret = sleep (ret );
1925
+ }
1926
+
1917
1927
/* issue a SIGTERM to all */
1918
1928
OPAL_LIST_FOREACH (cd , & procs_killed , orte_odls_quick_caddy_t ) {
1919
1929
OPAL_OUTPUT_VERBOSE ((5 , orte_odls_base_framework .framework_output ,
@@ -1922,8 +1932,18 @@ int orte_odls_base_default_kill_local_procs(opal_pointer_array_t *procs,
1922
1932
ORTE_NAME_PRINT (& cd -> child -> name )));
1923
1933
kill_local (cd -> child -> pid , SIGTERM );
1924
1934
}
1925
- /* wait a little again */
1926
- sleep (orte_odls_globals .timeout_before_sigkill );
1935
+
1936
+ /* Wait a little. Do so in a loop since sleep() can be interrupted by a
1937
+ * signal. Most likely SIGCHLD in this case */
1938
+ ret = orte_odls_globals .timeout_before_sigkill ;
1939
+ while ( ret > 0 ) {
1940
+ OPAL_OUTPUT_VERBOSE ((5 , orte_odls_base_framework .framework_output ,
1941
+ "%s Sleep %d sec (total = %d)" ,
1942
+ ORTE_NAME_PRINT (ORTE_PROC_MY_NAME ),
1943
+ ret , orte_odls_globals .timeout_before_sigkill ));
1944
+ ret = sleep (ret );
1945
+ }
1946
+
1927
1947
/* issue a SIGKILL to all */
1928
1948
OPAL_LIST_FOREACH (cd , & procs_killed , orte_odls_quick_caddy_t ) {
1929
1949
OPAL_OUTPUT_VERBOSE ((5 , orte_odls_base_framework .framework_output ,
0 commit comments