Skip to content

Commit 106109a

Browse files
authored
Merge pull request #7043 from jsquyres/pr/v4.0.x/usnic-fixes-and-optimizations
v4.0.x: usnic fixes and optimizations
2 parents cb5f4e7 + c659282 commit 106109a

File tree

5 files changed

+50
-28
lines changed

5 files changed

+50
-28
lines changed

opal/mca/btl/usnic/btl_usnic.h

Lines changed: 9 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -68,7 +68,7 @@ extern uint64_t opal_btl_usnic_ticks;
6868
extern opal_recursive_mutex_t btl_usnic_lock;
6969

7070
static inline uint64_t
71-
get_nsec(void)
71+
get_ticks(void)
7272
{
7373
return opal_btl_usnic_ticks;
7474
}
@@ -206,6 +206,14 @@ typedef struct opal_btl_usnic_component_t {
206206
/** retrans characteristics */
207207
int retrans_timeout;
208208

209+
/** max number of messages re-sent during a single progress
210+
iteration */
211+
int max_resends_per_iteration;
212+
213+
/** minimum number of times through component progress before
214+
checking to see if standalone ACKs need to be sent */
215+
int ack_iteration_delay;
216+
209217
/** transport header length for all usNIC devices on this server
210218
(it is guaranteed that all usNIC devices on a single server
211219
will have the same underlying transport, and therefore the

opal/mca/btl/usnic/btl_usnic_component.c

Lines changed: 14 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -384,8 +384,9 @@ static int check_usnic_config(opal_btl_usnic_module_t *module,
384384

385385
static void usnic_clock_callback(int fd, short flags, void *timeout)
386386
{
387-
/* 1ms == 1,000,000 ns */
388-
opal_btl_usnic_ticks += 1000000;
387+
/* Increase by so many ticks that we will definitely force sending
388+
any ACKs that are pending */
389+
opal_btl_usnic_ticks += 1000;
389390

390391
/* run progress to make sure time change gets noticed */
391392
usnic_component_progress();
@@ -1132,7 +1133,7 @@ static mca_btl_base_module_t** usnic_component_init(int* num_btl_modules,
11321133
*/
11331134
static int usnic_handle_completion(opal_btl_usnic_module_t* module,
11341135
opal_btl_usnic_channel_t *channel, struct fi_cq_entry *completion);
1135-
static int usnic_component_progress_2(void);
1136+
static int usnic_component_progress_2(bool check_priority);
11361137
static void usnic_handle_cq_error(opal_btl_usnic_module_t* module,
11371138
opal_btl_usnic_channel_t *channel, int cq_ret);
11381139

@@ -1145,9 +1146,7 @@ static int usnic_component_progress(void)
11451146
struct fi_cq_entry completion;
11461147
opal_btl_usnic_channel_t *channel;
11471148
static bool fastpath_ok = true;
1148-
1149-
/* update our simulated clock */
1150-
opal_btl_usnic_ticks += 5000;
1149+
bool check_priority = true;
11511150

11521151
count = 0;
11531152
if (fastpath_ok) {
@@ -1180,10 +1179,11 @@ static int usnic_component_progress(void)
11801179
usnic_handle_cq_error(module, channel, ret);
11811180
}
11821181
}
1182+
check_priority = false;
11831183
}
11841184

11851185
fastpath_ok = true;
1186-
return count + usnic_component_progress_2();
1186+
return count + usnic_component_progress_2(check_priority);
11871187
}
11881188

11891189
static int usnic_handle_completion(
@@ -1304,7 +1304,7 @@ usnic_handle_cq_error(opal_btl_usnic_module_t* module,
13041304
}
13051305
}
13061306

1307-
static int usnic_component_progress_2(void)
1307+
static int usnic_component_progress_2(bool check_priority)
13081308
{
13091309
int i, j, count = 0, num_events, ret;
13101310
opal_btl_usnic_module_t* module;
@@ -1313,15 +1313,18 @@ static int usnic_component_progress_2(void)
13131313
int rc;
13141314
int c;
13151315

1316-
/* update our simulated clock */
1317-
opal_btl_usnic_ticks += 5000;
1316+
opal_btl_usnic_ticks += 1;
1317+
1318+
/* If we need to check priority, start with the priority channel.
1319+
Otherwise, just check the data channel. */
1320+
int c_start = check_priority ? USNIC_PRIORITY_CHANNEL : USNIC_DATA_CHANNEL;
13181321

13191322
/* Poll for completions */
13201323
for (i = 0; i < mca_btl_usnic_component.num_modules; i++) {
13211324
module = mca_btl_usnic_component.usnic_active_modules[i];
13221325

13231326
/* poll each channel */
1324-
for (c=0; c<USNIC_NUM_CHANNELS; ++c) {
1327+
for (c=c_start; c<USNIC_NUM_CHANNELS; ++c) {
13251328
channel = &module->mod_channels[c];
13261329

13271330
if (channel->chan_deferred_recv != NULL) {

opal/mca/btl/usnic/btl_usnic_mca.c

Lines changed: 8 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -260,6 +260,14 @@ int opal_btl_usnic_component_register(void)
260260
5000, &mca_btl_usnic_component.retrans_timeout,
261261
REGINT_GE_ONE, OPAL_INFO_LVL_5));
262262

263+
CHECK(reg_int("max_resends_per_iteration", "Maximum number of frames to resend in a single iteration through usNIC component progress",
264+
16, &mca_btl_usnic_component.max_resends_per_iteration,
265+
REGINT_GE_ONE, OPAL_INFO_LVL_5));
266+
267+
CHECK(reg_int("ack_iteration_delay", "Minimum number of times through usNIC \"progress\" function before checking to see if standalone ACKs need to be sent",
268+
4, &mca_btl_usnic_component.ack_iteration_delay,
269+
REGINT_GE_ZERO, OPAL_INFO_LVL_5));
270+
263271
CHECK(reg_int("priority_limit", "Max size of \"priority\" messages (0 = use pre-set defaults; depends on number and type of devices available)",
264272
0, &max_tiny_msg_size,
265273
REGINT_GE_ZERO, OPAL_INFO_LVL_5));

opal/mca/btl/usnic/btl_usnic_module.c

Lines changed: 14 additions & 14 deletions
Original file line numberDiff line numberDiff line change
@@ -963,11 +963,12 @@ usnic_do_resends(
963963
opal_btl_usnic_send_segment_t *sseg;
964964
opal_btl_usnic_endpoint_t *endpoint;
965965
struct opal_btl_usnic_channel_t *data_channel;
966-
int ret;
966+
int ret, count;
967967

968968
data_channel = &module->mod_channels[USNIC_DATA_CHANNEL];
969969

970-
while ((get_send_credits(data_channel) > 1) &&
970+
count = mca_btl_usnic_component.max_resends_per_iteration;
971+
while (count > 0 && (get_send_credits(data_channel) > 1) &&
971972
!opal_list_is_empty(&module->pending_resend_segs)) {
972973

973974
/*
@@ -1009,6 +1010,8 @@ usnic_do_resends(
10091010
BTL_ERROR(("hotel checkin failed\n"));
10101011
abort(); /* should not be possible */
10111012
}
1013+
1014+
--count;
10121015
}
10131016
}
10141017

@@ -1236,7 +1239,7 @@ opal_btl_usnic_module_progress_sends(
12361239

12371240
/* Is it time to send ACK? */
12381241
if (endpoint->endpoint_acktime == 0 ||
1239-
endpoint->endpoint_acktime <= get_nsec()) {
1242+
endpoint->endpoint_acktime <= get_ticks()) {
12401243
if (OPAL_LIKELY(opal_btl_usnic_ack_send(module, endpoint) == OPAL_SUCCESS)) {
12411244
opal_btl_usnic_remove_from_endpoints_needing_ack(endpoint);
12421245
} else {
@@ -2366,14 +2369,14 @@ static void init_freelists(opal_btl_usnic_module_t *module)
23662369
uint32_t segsize;
23672370

23682371
segsize = (module->local_modex.max_msg_size +
2369-
opal_cache_line_size - 1) &
2372+
mca_btl_usnic_component.prefix_send_offset +
2373+
opal_cache_line_size - 1) &
23702374
~(opal_cache_line_size - 1);
23712375

23722376
/* Send frags freelists */
23732377
OBJ_CONSTRUCT(&module->small_send_frags, opal_free_list_t);
23742378
rc = usnic_compat_free_list_init(&module->small_send_frags,
2375-
sizeof(opal_btl_usnic_small_send_frag_t) +
2376-
mca_btl_usnic_component.prefix_send_offset,
2379+
sizeof(opal_btl_usnic_small_send_frag_t),
23772380
opal_cache_line_size,
23782381
OBJ_CLASS(opal_btl_usnic_small_send_frag_t),
23792382
segsize,
@@ -2390,8 +2393,7 @@ static void init_freelists(opal_btl_usnic_module_t *module)
23902393

23912394
OBJ_CONSTRUCT(&module->large_send_frags, opal_free_list_t);
23922395
rc = usnic_compat_free_list_init(&module->large_send_frags,
2393-
sizeof(opal_btl_usnic_large_send_frag_t) +
2394-
mca_btl_usnic_component.prefix_send_offset,
2396+
sizeof(opal_btl_usnic_large_send_frag_t),
23952397
opal_cache_line_size,
23962398
OBJ_CLASS(opal_btl_usnic_large_send_frag_t),
23972399
0, /* payload size */
@@ -2408,8 +2410,7 @@ static void init_freelists(opal_btl_usnic_module_t *module)
24082410

24092411
OBJ_CONSTRUCT(&module->put_dest_frags, opal_free_list_t);
24102412
rc = usnic_compat_free_list_init(&module->put_dest_frags,
2411-
sizeof(opal_btl_usnic_put_dest_frag_t) +
2412-
mca_btl_usnic_component.prefix_send_offset,
2413+
sizeof(opal_btl_usnic_put_dest_frag_t),
24132414
opal_cache_line_size,
24142415
OBJ_CLASS(opal_btl_usnic_put_dest_frag_t),
24152416
0, /* payload size */
@@ -2427,8 +2428,7 @@ static void init_freelists(opal_btl_usnic_module_t *module)
24272428
/* list of segments to use for sending */
24282429
OBJ_CONSTRUCT(&module->chunk_segs, opal_free_list_t);
24292430
rc = usnic_compat_free_list_init(&module->chunk_segs,
2430-
sizeof(opal_btl_usnic_chunk_segment_t) +
2431-
mca_btl_usnic_component.prefix_send_offset,
2431+
sizeof(opal_btl_usnic_chunk_segment_t),
24322432
opal_cache_line_size,
24332433
OBJ_CLASS(opal_btl_usnic_chunk_segment_t),
24342434
segsize,
@@ -2446,11 +2446,11 @@ static void init_freelists(opal_btl_usnic_module_t *module)
24462446
/* ACK segments freelist */
24472447
uint32_t ack_segment_len;
24482448
ack_segment_len = (sizeof(opal_btl_usnic_btl_header_t) +
2449+
mca_btl_usnic_component.prefix_send_offset +
24492450
opal_cache_line_size - 1) & ~(opal_cache_line_size - 1);
24502451
OBJ_CONSTRUCT(&module->ack_segs, opal_free_list_t);
24512452
rc = usnic_compat_free_list_init(&module->ack_segs,
2452-
sizeof(opal_btl_usnic_ack_segment_t) +
2453-
mca_btl_usnic_component.prefix_send_offset,
2453+
sizeof(opal_btl_usnic_ack_segment_t),
24542454
opal_cache_line_size,
24552455
OBJ_CLASS(opal_btl_usnic_ack_segment_t),
24562456
ack_segment_len,

opal/mca/btl/usnic/btl_usnic_recv.h

Lines changed: 5 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -112,9 +112,12 @@ opal_btl_usnic_update_window(
112112
opal_btl_usnic_add_to_endpoints_needing_ack(endpoint);
113113
}
114114

115-
/* give this process a chance to send something before ACKing */
115+
/* A hueristic: set to send this ACK after we have checked our
116+
incoming DATA_CHANNEL component.act_iteration_delay times
117+
(i.e., so we can piggyback an ACK on an outgoing send) */
116118
if (0 == endpoint->endpoint_acktime) {
117-
endpoint->endpoint_acktime = get_nsec() + 50000; /* 50 usec */
119+
endpoint->endpoint_acktime =
120+
get_ticks() + mca_btl_usnic_component.ack_iteration_delay;
118121
}
119122

120123
/* Save this incoming segment in the received segmentss array on the

0 commit comments

Comments
 (0)