Skip to content

Commit 41eb41c

Browse files
committed
Cleanup stale code in ORTE/OOB
Remove code for multiple OOB progress threads as it is an optimization nobody uses. Also turns out to have a race condition that can cause segfault on finalize, so maybe good that nobody is using it. Signed-off-by: Ralph Castain <[email protected]>
1 parent c672a51 commit 41eb41c

9 files changed

+14
-92
lines changed

orte/mca/oob/base/base.h

Lines changed: 0 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -63,7 +63,6 @@ typedef struct {
6363
opal_list_t actives;
6464
int max_uri_length;
6565
opal_hash_table_t peers;
66-
int num_threads;
6766
#if OPAL_ENABLE_TIMING
6867
bool timing;
6968
#endif

orte/mca/oob/base/oob_base_frame.c

Lines changed: 1 addition & 20 deletions
Original file line numberDiff line numberDiff line change
@@ -15,7 +15,7 @@
1515
* reserved.
1616
* Copyright (c) 2015-2016 Research Organization for Information Science
1717
* and Technology (RIST). All rights reserved.
18-
* Copyright (c) 2017 Intel, Inc. All rights reserved.
18+
* Copyright (c) 2017-2019 Intel, Inc. All rights reserved.
1919
* $COPYRIGHT$
2020
*
2121
* Additional copyrights may follow
@@ -55,14 +55,6 @@ orte_oob_base_t orte_oob_base = {0};
5555

5656
static int orte_oob_base_register(mca_base_register_flag_t flags)
5757
{
58-
orte_oob_base.num_threads = 0;
59-
(void)mca_base_var_register("orte", "oob", "base", "num_progress_threads",
60-
"Number of independent progress OOB messages for each interface",
61-
MCA_BASE_VAR_TYPE_INT, NULL, 0, 0,
62-
OPAL_INFO_LVL_9,
63-
MCA_BASE_VAR_SCOPE_READONLY,
64-
&orte_oob_base.num_threads);
65-
6658
#if OPAL_ENABLE_TIMING
6759
/* Detailed timing setup */
6860
orte_oob_base.timing = false;
@@ -91,10 +83,6 @@ static int orte_oob_base_close(void)
9183
OBJ_RELEASE(cli);
9284
}
9385

94-
if (!ORTE_PROC_IS_APP && !ORTE_PROC_IS_TOOL) {
95-
opal_progress_thread_finalize("OOB-BASE");
96-
}
97-
9886
/* destruct our internal lists */
9987
OBJ_DESTRUCT(&orte_oob_base.actives);
10088

@@ -122,13 +110,6 @@ static int orte_oob_base_open(mca_base_open_flag_t flags)
122110
opal_hash_table_init(&orte_oob_base.peers, 128);
123111
OBJ_CONSTRUCT(&orte_oob_base.actives, opal_list_t);
124112

125-
if (ORTE_PROC_IS_APP || ORTE_PROC_IS_TOOL) {
126-
orte_oob_base.ev_base = orte_event_base;
127-
} else {
128-
orte_oob_base.ev_base = opal_progress_thread_init("OOB-BASE");
129-
}
130-
131-
132113
#if OPAL_ENABLE_FT_CR == 1
133114
/* register the FT events callback */
134115
orte_state.add_job_state(ORTE_JOB_STATE_FT_CHECKPOINT, orte_oob_base_ft_event, ORTE_ERROR_PRI);

orte/mca/oob/tcp/oob_tcp.c

Lines changed: 1 addition & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -141,12 +141,6 @@ static void ping(const orte_process_name_t *proc)
141141
return;
142142
}
143143

144-
/* has this peer had a progress thread assigned yet? */
145-
if (NULL == peer->ev_base) {
146-
/* nope - assign one */
147-
ORTE_OOB_TCP_NEXT_BASE(peer);
148-
}
149-
150144
/* if we are already connected, there is nothing to do */
151145
if (MCA_OOB_TCP_CONNECTED == peer->state) {
152146
opal_output_verbose(2, orte_oob_base_framework.framework_output,
@@ -204,11 +198,7 @@ static void send_nb(orte_rml_send_t *msg)
204198
__FILE__, __LINE__,
205199
ORTE_NAME_PRINT(&msg->dst), msg->tag, msg->seq_num,
206200
ORTE_NAME_PRINT(&peer->name));
207-
/* has this peer had a progress thread assigned yet? */
208-
if (NULL == peer->ev_base) {
209-
/* nope - assign one */
210-
ORTE_OOB_TCP_NEXT_BASE(peer);
211-
}
201+
212202
/* add the msg to the hop's send queue */
213203
if (MCA_OOB_TCP_CONNECTED == peer->state) {
214204
opal_output_verbose(2, orte_oob_base_framework.framework_output,

orte/mca/oob/tcp/oob_tcp_component.c

Lines changed: 0 additions & 31 deletions
Original file line numberDiff line numberDiff line change
@@ -145,12 +145,8 @@ mca_oob_tcp_component_t mca_oob_tcp_component = {
145145
*/
146146
static int tcp_component_open(void)
147147
{
148-
mca_oob_tcp_component.next_base = 0;
149148
OBJ_CONSTRUCT(&mca_oob_tcp_component.peers, opal_hash_table_t);
150149
opal_hash_table_init(&mca_oob_tcp_component.peers, 32);
151-
OBJ_CONSTRUCT(&mca_oob_tcp_component.ev_bases, opal_pointer_array_t);
152-
opal_pointer_array_init(&mca_oob_tcp_component.ev_bases,
153-
orte_oob_base.num_threads, 256, 8);
154150

155151
OBJ_CONSTRUCT(&mca_oob_tcp_component.listeners, opal_list_t);
156152
if (ORTE_PROC_IS_HNP) {
@@ -204,8 +200,6 @@ static int tcp_component_close(void)
204200
}
205201
#endif
206202

207-
OBJ_DESTRUCT(&mca_oob_tcp_component.ev_bases);
208-
209203
return ORTE_SUCCESS;
210204
}
211205
static char *static_port_string;
@@ -626,27 +620,11 @@ static int component_available(void)
626620
static int component_startup(void)
627621
{
628622
int rc = ORTE_SUCCESS;
629-
int i;
630-
char *tmp;
631-
opal_event_base_t *evb;
632623

633624
opal_output_verbose(2, orte_oob_base_framework.framework_output,
634625
"%s TCP STARTUP",
635626
ORTE_NAME_PRINT(ORTE_PROC_MY_NAME));
636627

637-
/* initialize state */
638-
if (0 == orte_oob_base.num_threads) {
639-
opal_pointer_array_add(&mca_oob_tcp_component.ev_bases, orte_oob_base.ev_base);
640-
} else {
641-
for (i=0; i < orte_oob_base.num_threads; i++) {
642-
opal_asprintf(&tmp, "OOB-TCP-%d", i);
643-
evb = opal_progress_thread_init(tmp);
644-
opal_pointer_array_add(&mca_oob_tcp_component.ev_bases, evb);
645-
opal_argv_append_nosize(&mca_oob_tcp_component.ev_threads, tmp);
646-
free(tmp);
647-
}
648-
}
649-
650628
/* if we are a daemon/HNP, or we are a standalone app,
651629
* then it is possible that someone else may initiate a
652630
* connection to us. In these cases, we need to start the
@@ -674,14 +652,6 @@ static void component_shutdown(void)
674652
"%s TCP SHUTDOWN",
675653
ORTE_NAME_PRINT(ORTE_PROC_MY_NAME));
676654

677-
if (0 < orte_oob_base.num_threads) {
678-
for (i=0; i < orte_oob_base.num_threads; i++) {
679-
opal_progress_thread_finalize(mca_oob_tcp_component.ev_threads[i]);
680-
opal_pointer_array_set_item(&mca_oob_tcp_component.ev_bases, i, NULL);
681-
}
682-
opal_argv_free(mca_oob_tcp_component.ev_threads);
683-
}
684-
685655
if (ORTE_PROC_IS_HNP && mca_oob_tcp_component.listen_thread_active) {
686656
mca_oob_tcp_component.listen_thread_active = false;
687657
/* tell the thread to exit */
@@ -1327,7 +1297,6 @@ static char **split_and_resolve(char **orig_str, char *name)
13271297

13281298
static void peer_cons(mca_oob_tcp_peer_t *peer)
13291299
{
1330-
peer->ev_base = NULL;
13311300
peer->auth_method = NULL;
13321301
peer->sd = -1;
13331302
OBJ_CONSTRUCT(&peer->addrs, opal_list_t);

orte/mca/oob/tcp/oob_tcp_component.h

Lines changed: 1 addition & 13 deletions
Original file line numberDiff line numberDiff line change
@@ -12,7 +12,7 @@
1212
* Copyright (c) 2006-2013 Los Alamos National Security, LLC.
1313
* All rights reserved.
1414
* Copyright (c) 2010-2011 Cisco Systems, Inc. All rights reserved.
15-
* Copyright (c) 2014-2017 Intel, Inc. All rights reserved.
15+
* Copyright (c) 2014-2019 Intel, Inc. All rights reserved.
1616
* $COPYRIGHT$
1717
*
1818
* Additional copyrights may follow
@@ -48,9 +48,6 @@ typedef struct {
4848
int max_retries; /**< max number of retries before declaring peer gone */
4949
opal_list_t events; /**< events for monitoring connections */
5050
int peer_limit; /**< max size of tcp peer cache */
51-
opal_pointer_array_t ev_bases; // event base array for progress threads
52-
char** ev_threads; // event progress thread names
53-
int next_base; // counter to load-level thread use
5451
opal_hash_table_t peers; // connection addresses for peers
5552

5653
/* Port specifications */
@@ -96,13 +93,4 @@ ORTE_MODULE_DECLSPEC void mca_oob_tcp_component_failed_to_connect(int fd, short
9693
ORTE_MODULE_DECLSPEC void mca_oob_tcp_component_no_route(int fd, short args, void *cbdata);
9794
ORTE_MODULE_DECLSPEC void mca_oob_tcp_component_hop_unknown(int fd, short args, void *cbdata);
9895

99-
#define ORTE_OOB_TCP_NEXT_BASE(p) \
100-
do { \
101-
++mca_oob_tcp_component.next_base; \
102-
if (orte_oob_base.num_threads <= mca_oob_tcp_component.next_base) { \
103-
mca_oob_tcp_component.next_base = 0; \
104-
} \
105-
(p)->ev_base = (opal_event_base_t*)opal_pointer_array_get_item(&mca_oob_tcp_component.ev_bases, mca_oob_tcp_component.next_base); \
106-
} while(0)
107-
10896
#endif /* _MCA_OOB_TCP_COMPONENT_H_ */

orte/mca/oob/tcp/oob_tcp_connection.c

Lines changed: 2 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -507,10 +507,7 @@ static void tcp_peer_event_init(mca_oob_tcp_peer_t* peer)
507507
{
508508
if (peer->sd >= 0) {
509509
assert(!peer->send_ev_active && !peer->recv_ev_active);
510-
if (NULL == peer->ev_base) {
511-
ORTE_OOB_TCP_NEXT_BASE(peer);
512-
}
513-
opal_event_set(peer->ev_base,
510+
opal_event_set(orte_event_base,
514511
&peer->recv_event,
515512
peer->sd,
516513
OPAL_EV_READ|OPAL_EV_PERSIST,
@@ -522,7 +519,7 @@ static void tcp_peer_event_init(mca_oob_tcp_peer_t* peer)
522519
peer->recv_ev_active = false;
523520
}
524521

525-
opal_event_set(peer->ev_base,
522+
opal_event_set(orte_event_base,
526523
&peer->send_event,
527524
peer->sd,
528525
OPAL_EV_WRITE|OPAL_EV_PERSIST,
@@ -803,7 +800,6 @@ int mca_oob_tcp_peer_recv_connect_ack(mca_oob_tcp_peer_t* pr,
803800
ORTE_NAME_PRINT(ORTE_PROC_MY_NAME));
804801
peer = OBJ_NEW(mca_oob_tcp_peer_t);
805802
peer->name = hdr.origin;
806-
ORTE_OOB_TCP_NEXT_BASE(peer); // assign it an event base
807803
peer->state = MCA_OOB_TCP_ACCEPTING;
808804
ui64 = (uint64_t*)(&peer->name);
809805
if (OPAL_SUCCESS != opal_hash_table_set_value_uint64(&mca_oob_tcp_component.peers, (*ui64), peer)) {

orte/mca/oob/tcp/oob_tcp_connection.h

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -12,7 +12,7 @@
1212
* Copyright (c) 2006-2013 Los Alamos National Security, LLC.
1313
* All rights reserved.
1414
* Copyright (c) 2010-2011 Cisco Systems, Inc. All rights reserved.
15-
* Copyright (c) 2014-2017 Intel, Inc. All rights reserved.
15+
* Copyright (c) 2014-2019 Intel, Inc. All rights reserved.
1616
* $COPYRIGHT$
1717
*
1818
* Additional copyrights may follow
@@ -60,14 +60,14 @@ OBJ_CLASS_DECLARATION(mca_oob_tcp_conn_op_t);
6060
ORTE_NAME_PRINT((&(p)->name))); \
6161
cop = OBJ_NEW(mca_oob_tcp_conn_op_t); \
6262
cop->peer = (p); \
63-
ORTE_THREADSHIFT(cop, (p)->ev_base, (cbfunc), ORTE_MSG_PRI); \
63+
ORTE_THREADSHIFT(cop, orte_event_base, (cbfunc), ORTE_MSG_PRI); \
6464
} while(0);
6565

6666
#define ORTE_ACTIVATE_TCP_ACCEPT_STATE(s, a, cbfunc) \
6767
do { \
6868
mca_oob_tcp_conn_op_t *cop; \
6969
cop = OBJ_NEW(mca_oob_tcp_conn_op_t); \
70-
opal_event_set(orte_oob_base.ev_base, &cop->ev, s, \
70+
opal_event_set(orte_event_base, &cop->ev, s, \
7171
OPAL_EV_READ, (cbfunc), cop); \
7272
opal_event_set_priority(&cop->ev, ORTE_MSG_PRI); \
7373
ORTE_POST_OBJECT(cop); \
@@ -84,7 +84,7 @@ OBJ_CLASS_DECLARATION(mca_oob_tcp_conn_op_t);
8484
ORTE_NAME_PRINT((&(p)->name))); \
8585
cop = OBJ_NEW(mca_oob_tcp_conn_op_t); \
8686
cop->peer = (p); \
87-
opal_event_evtimer_set((p)->ev_base, \
87+
opal_event_evtimer_set(orte_event_base, \
8888
&cop->ev, \
8989
(cbfunc), cop); \
9090
ORTE_POST_OBJECT(cop); \

orte/mca/oob/tcp/oob_tcp_peer.h

Lines changed: 1 addition & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -52,7 +52,6 @@ typedef struct {
5252
mca_oob_tcp_addr_t *active_addr;
5353
mca_oob_tcp_state_t state;
5454
int num_retries;
55-
opal_event_base_t *ev_base; // progress thread this peer is assigned to
5655
opal_event_t send_event; /**< registration with event thread for send events */
5756
bool send_ev_active;
5857
opal_event_t recv_event; /**< registration with event thread for recv events */
@@ -82,7 +81,7 @@ OBJ_CLASS_DECLARATION(mca_oob_tcp_peer_op_t);
8281
pop = OBJ_NEW(mca_oob_tcp_peer_op_t); \
8382
pop->peer.jobid = (p)->name.jobid; \
8483
pop->peer.vpid = (p)->name.vpid; \
85-
ORTE_THREADSHIFT(pop, orte_oob_base.ev_base, \
84+
ORTE_THREADSHIFT(pop, orte_event_base, \
8685
(cbfunc), ORTE_MSG_PRI); \
8786
} while(0);
8887

orte/mca/oob/tcp/oob_tcp_sendrecv.h

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -83,7 +83,7 @@ OBJ_CLASS_DECLARATION(mca_oob_tcp_recv_t);
8383
do { \
8484
(s)->peer = (struct mca_oob_tcp_peer_t*)(p); \
8585
(s)->activate = (f); \
86-
ORTE_THREADSHIFT((s), (p)->ev_base, \
86+
ORTE_THREADSHIFT((s), orte_event_base, \
8787
mca_oob_tcp_queue_msg, ORTE_MSG_PRI); \
8888
} while(0)
8989

@@ -228,7 +228,7 @@ OBJ_CLASS_DECLARATION(mca_oob_tcp_msg_op_t);
228228
ORTE_NAME_PRINT(&((ms)->dst))); \
229229
mop = OBJ_NEW(mca_oob_tcp_msg_op_t); \
230230
mop->msg = (ms); \
231-
ORTE_THREADSHIFT(mop, (ms)->peer->ev_base, \
231+
ORTE_THREADSHIFT(mop, orte_event_base, \
232232
(cbfunc), ORTE_MSG_PRI); \
233233
} while(0);
234234

@@ -274,7 +274,7 @@ OBJ_CLASS_DECLARATION(mca_oob_tcp_msg_error_t);
274274
mop->hop.jobid = (h)->jobid; \
275275
mop->hop.vpid = (h)->vpid; \
276276
/* this goes to the OOB framework, so use that event base */ \
277-
ORTE_THREADSHIFT(mop, orte_oob_base.ev_base, \
277+
ORTE_THREADSHIFT(mop, orte_event_base, \
278278
(cbfunc), ORTE_MSG_PRI); \
279279
} while(0)
280280

@@ -292,7 +292,7 @@ OBJ_CLASS_DECLARATION(mca_oob_tcp_msg_error_t);
292292
mop->hop.vpid = (h)->vpid; \
293293
/* this goes to the component, so use the framework \
294294
* event base */ \
295-
ORTE_THREADSHIFT(mop, orte_oob_base.ev_base, \
295+
ORTE_THREADSHIFT(mop, orte_event_base, \
296296
(c), ORTE_MSG_PRI); \
297297
} while(0)
298298

0 commit comments

Comments
 (0)