Skip to content

Commit 1a8c351

Browse files
committed
fix incorrect merge with upstream
java: try do dlopen libmpi with the full path Since OS X 10.11 (aka El Capitan) DYLD_LIBRARY_PATH is no more propagated to children, so try to dlopen libmpi with the full path using the directory of libmpi_java Fixes open-mpi#1220 Thanks Alexander Daryin for reporting this Update symbol-hiding script btl/sm: rename file after file descriptor has been closed. Thanks George for spotting this.
1 parent cc70678 commit 1a8c351

File tree

6 files changed

+62
-78
lines changed

6 files changed

+62
-78
lines changed

config/ompi_setup_java.m4

+3
Original file line numberDiff line numberDiff line change
@@ -74,6 +74,9 @@ AC_DEFUN([OMPI_SETUP_JAVA_BINDINGS],[
7474
# header file needs this file, so we need to check for
7575
# it/include it in our sources when compiling on Mac).
7676
AC_CHECK_HEADERS([TargetConditionals.h])
77+
78+
# dladdr and Dl_info are required to build the full path to libmpi on OS X 10.11 aka El Capitan
79+
AC_CHECK_TYPES([Dl_info], [], [], [[#include <dlfcn.h>]])
7780
else
7881
AC_MSG_RESULT([no])
7982
WANT_MPI_JAVA_SUPPORT=0

contrib/symbol-hiding.pl

+25-4
Original file line numberDiff line numberDiff line change
@@ -12,6 +12,7 @@
1212
my $mylib;
1313
my $myprefix;
1414
my $mysuffix;
15+
my $mycapprefix;
1516

1617
# Set to true if the script should merely check for symbols in
1718
# the library that are not in the provided output file - useful
@@ -63,6 +64,8 @@ sub quiet_print {
6364

6465
#-------------------------------------------------------------------------------
6566

67+
$mycapprefix = uc $myprefix;
68+
6669
# get the symbol output for this lib
6770
my $output = qx(nm $mylib);
6871

@@ -84,8 +87,7 @@ sub quiet_print {
8487
# next token indicates a public symbol by
8588
# being a 'T' or a 'B'
8689
$val = shift(@values);
87-
if ("T" eq $val || "B" eq $val || "D" eq $val ||
88-
"t" eq $val || "b" eq $val || "d" eq $val) {
90+
if ("T" eq $val || "B" eq $val || "D" eq $val) {
8991
$val = shift(@values);
9092
# if this symbol contains a '.', then we
9193
# need to ignore it
@@ -105,10 +107,24 @@ sub quiet_print {
105107
if ($myfile ne "") {
106108
open FILE, ">$myfile" || die "file could not be opened";
107109
}
110+
sub checkCase {
111+
if ($_[0] =~ /^[[:upper:]]/) {
112+
return 1;
113+
}
114+
else {
115+
return 0;
116+
}
117+
}
118+
108119
foreach my $sym (@symbols) {
109120
my $out;
110121
if ($REVERSE) {
111-
$out = "#define " . $myprefix . $sym . $mysuffix;
122+
# if the first char is a cap, then use the cap prefix
123+
if (checkCase($sym)) {
124+
$out = "#define " . $mycapprefix . $sym . $mysuffix;
125+
} else {
126+
$out = "#define " . $myprefix . $sym . $mysuffix;
127+
}
112128
} else {
113129
$out = "#define " . $sym;
114130
}
@@ -119,7 +135,12 @@ sub quiet_print {
119135
if ($REVERSE) {
120136
$out = $out . $sym . "\n";
121137
} else {
122-
$out = $out . $myprefix . $sym . $mysuffix . "\n";
138+
# if the first char is a cap, then use the cap prefix
139+
if (checkCase($sym)) {
140+
$out = $out . $mycapprefix . $sym . $mysuffix . "\n";
141+
} else {
142+
$out = $out . $myprefix . $sym . $mysuffix . "\n";
143+
}
123144
}
124145
if ($myfile ne "") {
125146
print FILE $out;

ompi/mpi/java/c/Makefile.am

+4-2
Original file line numberDiff line numberDiff line change
@@ -1,9 +1,11 @@
11
# -*- makefile -*-
22
#
33
# Copyright (c) 2011-2013 Cisco Systems, Inc. All rights reserved.
4-
# Copyright (c) 2012 Oracle and/or its affiliates. All rights reserved.
4+
# Copyright (c) 2012 Oracle and/or its affiliates. All rights reserved.
55
# Copyright (c) 2015 Los Alamos National Security, LLC. All rights
66
# reserved.
7+
# Copyright (c) 2015 Research Organization for Information Science
8+
# and Technology (RIST). All rights reserved.
79
# $COPYRIGHT$
810
#
911
# Additional copyrights may follow
@@ -44,7 +46,7 @@ libmpi_java_la_SOURCES = \
4446
mpi_Status.c \
4547
mpi_Win.c
4648

47-
libmpi_java_la_LIBADD = $(top_builddir)/ompi/libmpi.la
49+
libmpi_java_la_LIBADD = -ldl $(top_builddir)/ompi/libmpi.la
4850
libmpi_java_la_LDFLAGS = -version-info $(libmpi_java_so_version)
4951

5052
endif

ompi/mpi/java/c/mpi_MPI.c

+28-1
Original file line numberDiff line numberDiff line change
@@ -14,6 +14,8 @@
1414
* reserved.
1515
* Copyright (c) 2015 Cisco Systems, Inc. All rights reserved.
1616
* Copyright (c) 2015 Intel, Inc. All rights reserved.
17+
* Copyright (c) 2015 Research Organization for Information Science
18+
* and Technology (RIST). All rights reserved.
1719
* $COPYRIGHT$
1820
*
1921
* Additional copyrights may follow
@@ -62,8 +64,13 @@
6264
#ifdef HAVE_SYS_STAT_H
6365
#include <sys/stat.h>
6466
#endif
67+
#ifdef HAVE_DLFCN_H
6568
#include <dlfcn.h>
69+
#endif
6670
#include <poll.h>
71+
#ifdef HAVE_LIBGEN_H
72+
#include <libgen.h>
73+
#endif
6774

6875
#include "opal/util/output.h"
6976
#include "opal/datatype/opal_convertor.h"
@@ -126,7 +133,27 @@ jint JNI_OnLoad(JavaVM *vm, void *reserved)
126133
{
127134
libmpi = dlopen("libmpi." OPAL_DYN_LIB_SUFFIX, RTLD_NOW | RTLD_GLOBAL);
128135

129-
if(libmpi == NULL)
136+
#if defined(HAVE_DL_INFO) && defined(HAVE_LIBGEN_H)
137+
/*
138+
* OS X El Capitan does not propagate DYLD_LIBRARY_PATH to children any more
139+
* so if previous dlopen failed, try to open libmpi in the same directory
140+
* than the current libmpi_java
141+
*/
142+
if(NULL == libmpi) {
143+
Dl_info info;
144+
if(0 != dladdr((void *)JNI_OnLoad, &info)) {
145+
char libmpipath[OPAL_PATH_MAX];
146+
char *libmpijavapath = strdup(info.dli_fname);
147+
if (NULL != libmpijavapath) {
148+
snprintf(libmpipath, OPAL_PATH_MAX-1, "%s/libmpi." OPAL_DYN_LIB_SUFFIX, dirname(libmpijavapath));
149+
free(libmpijavapath);
150+
libmpi = dlopen(libmpipath, RTLD_NOW | RTLD_GLOBAL);
151+
}
152+
}
153+
}
154+
#endif
155+
156+
if(NULL == libmpi)
130157
{
131158
fprintf(stderr, "Java bindings failed to load libmpi: %s\n",dlerror());
132159
exit(1);

opal/mca/btl/sm/btl_sm_component.c

+2
Original file line numberDiff line numberDiff line change
@@ -696,6 +696,8 @@ create_rndv_file(mca_btl_sm_component_t *comp_ptr,
696696
/* only do this for the mpool case */
697697
OBJ_RELEASE(tmp_modp);
698698
}
699+
(void)close(fd);
700+
fd = -1;
699701
if (0 != rename(tmpfname, fname)) {
700702
rc = OPAL_ERR_IN_ERRNO;
701703
goto out;

orte/mca/ess/base/ess_base_std_orted.c

-71
Original file line numberDiff line numberDiff line change
@@ -510,77 +510,6 @@ int orte_ess_base_orted_setup(char **hosts)
510510
goto error;
511511
}
512512
}
513-
/* setup the global job and node arrays */
514-
orte_job_data = OBJ_NEW(opal_pointer_array_t);
515-
if (ORTE_SUCCESS != (ret = opal_pointer_array_init(orte_job_data,
516-
1,
517-
ORTE_GLOBAL_ARRAY_MAX_SIZE,
518-
1))) {
519-
ORTE_ERROR_LOG(ret);
520-
error = "setup job array";
521-
goto error;
522-
}
523-
orte_node_pool = OBJ_NEW(opal_pointer_array_t);
524-
if (ORTE_SUCCESS != (ret = opal_pointer_array_init(orte_node_pool,
525-
ORTE_GLOBAL_ARRAY_BLOCK_SIZE,
526-
ORTE_GLOBAL_ARRAY_MAX_SIZE,
527-
ORTE_GLOBAL_ARRAY_BLOCK_SIZE))) {
528-
ORTE_ERROR_LOG(ret);
529-
error = "setup node array";
530-
goto error;
531-
}
532-
orte_node_topologies = OBJ_NEW(opal_pointer_array_t);
533-
if (ORTE_SUCCESS != (ret = opal_pointer_array_init(orte_node_topologies,
534-
ORTE_GLOBAL_ARRAY_BLOCK_SIZE,
535-
ORTE_GLOBAL_ARRAY_MAX_SIZE,
536-
ORTE_GLOBAL_ARRAY_BLOCK_SIZE))) {
537-
ORTE_ERROR_LOG(ret);
538-
error = "setup node topologies array";
539-
goto error;
540-
}
541-
/* Setup the job data object for the daemons */
542-
/* create and store the job data object */
543-
jdata = OBJ_NEW(orte_job_t);
544-
jdata->jobid = ORTE_PROC_MY_NAME->jobid;
545-
opal_pointer_array_set_item(orte_job_data, 0, jdata);
546-
/* every job requires at least one app */
547-
app = OBJ_NEW(orte_app_context_t);
548-
opal_pointer_array_set_item(jdata->apps, 0, app);
549-
jdata->num_apps++;
550-
/* create and store a node object where we are */
551-
node = OBJ_NEW(orte_node_t);
552-
node->name = strdup(orte_process_info.nodename);
553-
node->index = opal_pointer_array_set_item(orte_node_pool, ORTE_PROC_MY_NAME->vpid, node);
554-
/* point our topology to the one detected locally */
555-
node->topology = opal_hwloc_topology;
556-
557-
/* create and store a proc object for us */
558-
proc = OBJ_NEW(orte_proc_t);
559-
proc->name.jobid = ORTE_PROC_MY_NAME->jobid;
560-
proc->name.vpid = ORTE_PROC_MY_NAME->vpid;
561-
proc->pid = orte_process_info.pid;
562-
proc->rml_uri = orte_rml.get_contact_info();
563-
proc->state = ORTE_PROC_STATE_RUNNING;
564-
opal_pointer_array_set_item(jdata->procs, proc->name.vpid, proc);
565-
/* record that the daemon (i.e., us) is on this node
566-
* NOTE: we do not add the proc object to the node's
567-
* proc array because we are not an application proc.
568-
* Instead, we record it in the daemon field of the
569-
* node object
570-
*/
571-
OBJ_RETAIN(proc); /* keep accounting straight */
572-
node->daemon = proc;
573-
ORTE_FLAG_SET(node, ORTE_NODE_FLAG_DAEMON_LAUNCHED);
574-
node->state = ORTE_NODE_STATE_UP;
575-
/* now point our proc node field to the node */
576-
OBJ_RETAIN(node); /* keep accounting straight */
577-
proc->node = node;
578-
/* record that the daemon job is running */
579-
jdata->num_procs = 1;
580-
jdata->state = ORTE_JOB_STATE_RUNNING;
581-
/* obviously, we have "reported" */
582-
jdata->num_reported = 1;
583-
584513
/* setup the PMIx framework - ensure it skips all non-PMIx components,
585514
* but do not override anything we were given */
586515
opal_setenv("OMPI_MCA_pmix", "^s1,s2,cray", false, &environ);

0 commit comments

Comments
 (0)