Skip to content

Commit 1edfb95

Browse files
committed
Updated backoff to spec.
1 parent 01eda53 commit 1edfb95

File tree

11 files changed

+170
-63
lines changed

11 files changed

+170
-63
lines changed

doc/connection-backoff.md

Lines changed: 4 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -7,9 +7,10 @@ requests) and instead do some form of exponential backoff.
77

88
We have several parameters:
99
1. INITIAL_BACKOFF (how long to wait after the first failure before retrying)
10-
2. MULTIPLIER (factor with which to multiply backoff after a failed retry)
11-
3. MAX_BACKOFF (upper bound on backoff)
12-
4. MIN_CONNECT_TIMEOUT (minimum time we're willing to give a connection to
10+
1. MULTIPLIER (factor with which to multiply backoff after a failed retry)
11+
1. JITTER (by how much to randomize backoffs).
12+
1. MAX_BACKOFF (upper bound on backoff)
13+
1. MIN_CONNECT_TIMEOUT (minimum time we're willing to give a connection to
1314
complete)
1415

1516
## Proposed Backoff Algorithm

src/core/ext/client_channel/subchannel.c

Lines changed: 8 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -334,16 +334,18 @@ grpc_subchannel *grpc_subchannel_create(grpc_exec_ctx *exec_ctx,
334334
int initial_backoff_ms =
335335
GRPC_SUBCHANNEL_INITIAL_CONNECT_BACKOFF_SECONDS * 1000;
336336
int max_backoff_ms = GRPC_SUBCHANNEL_RECONNECT_MAX_BACKOFF_SECONDS * 1000;
337+
int min_backoff_ms = GRPC_SUBCHANNEL_MIN_CONNECT_TIMEOUT_SECONDS * 1000;
337338
bool fixed_reconnect_backoff = false;
338339
if (c->args) {
339340
for (size_t i = 0; i < c->args->num_args; i++) {
340341
if (0 == strcmp(c->args->args[i].key,
341-
"grpc.testing.fixed_reconnect_backoff")) {
342+
"grpc.testing.fixed_reconnect_backoff_ms")) {
342343
GPR_ASSERT(c->args->args[i].type == GRPC_ARG_INTEGER);
343344
fixed_reconnect_backoff = true;
344-
initial_backoff_ms = max_backoff_ms = grpc_channel_arg_get_integer(
345-
&c->args->args[i],
346-
(grpc_integer_options){initial_backoff_ms, 100, INT_MAX});
345+
initial_backoff_ms = min_backoff_ms = max_backoff_ms =
346+
grpc_channel_arg_get_integer(
347+
&c->args->args[i],
348+
(grpc_integer_options){initial_backoff_ms, 100, INT_MAX});
347349
} else if (0 == strcmp(c->args->args[i].key,
348350
GRPC_ARG_MAX_RECONNECT_BACKOFF_MS)) {
349351
fixed_reconnect_backoff = false;
@@ -360,11 +362,11 @@ grpc_subchannel *grpc_subchannel_create(grpc_exec_ctx *exec_ctx,
360362
}
361363
}
362364
gpr_backoff_init(
363-
&c->backoff_state,
365+
&c->backoff_state, initial_backoff_ms,
364366
fixed_reconnect_backoff ? 1.0
365367
: GRPC_SUBCHANNEL_RECONNECT_BACKOFF_MULTIPLIER,
366368
fixed_reconnect_backoff ? 0.0 : GRPC_SUBCHANNEL_RECONNECT_JITTER,
367-
initial_backoff_ms, max_backoff_ms);
369+
min_backoff_ms, max_backoff_ms);
368370
gpr_mu_init(&c->mu);
369371

370372
return grpc_subchannel_index_register(exec_ctx, key, c);

src/core/ext/lb_policy/grpclb/grpclb.c

Lines changed: 11 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -123,10 +123,11 @@
123123
#include "src/core/lib/surface/channel.h"
124124
#include "src/core/lib/transport/static_metadata.h"
125125

126-
#define BACKOFF_MULTIPLIER 1.6
127-
#define BACKOFF_JITTER 0.2
128-
#define BACKOFF_MIN_SECONDS 10
129-
#define BACKOFF_MAX_SECONDS 60
126+
#define GRPC_GRPCLB_MIN_CONNECT_TIMEOUT_SECONDS 20
127+
#define GRPC_GRPCLB_INITIAL_CONNECT_BACKOFF_SECONDS 1
128+
#define GRPC_GRPCLB_RECONNECT_BACKOFF_MULTIPLIER 1.6
129+
#define GRPC_GRPCLB_RECONNECT_MAX_BACKOFF_SECONDS 120
130+
#define GRPC_GRPCLB_RECONNECT_JITTER 0.2
130131

131132
int grpc_lb_glb_trace = 0;
132133

@@ -1107,9 +1108,12 @@ static void lb_call_init_locked(glb_lb_policy *glb_policy) {
11071108
grpc_closure_init(&glb_policy->lb_on_response_received,
11081109
lb_on_response_received, glb_policy);
11091110

1110-
gpr_backoff_init(&glb_policy->lb_call_backoff_state, BACKOFF_MULTIPLIER,
1111-
BACKOFF_JITTER, BACKOFF_MIN_SECONDS * 1000,
1112-
BACKOFF_MAX_SECONDS * 1000);
1111+
gpr_backoff_init(&glb_policy->lb_call_backoff_state,
1112+
GRPC_GRPCLB_INITIAL_CONNECT_BACKOFF_SECONDS,
1113+
GRPC_GRPCLB_RECONNECT_BACKOFF_MULTIPLIER,
1114+
GRPC_GRPCLB_RECONNECT_JITTER,
1115+
GRPC_GRPCLB_MIN_CONNECT_TIMEOUT_SECONDS * 1000,
1116+
GRPC_GRPCLB_RECONNECT_MAX_BACKOFF_SECONDS * 1000);
11131117
}
11141118

11151119
static void lb_call_destroy_locked(glb_lb_policy *glb_policy) {

src/core/ext/resolver/dns/native/dns_resolver.c

Lines changed: 10 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -46,10 +46,11 @@
4646
#include "src/core/lib/support/backoff.h"
4747
#include "src/core/lib/support/string.h"
4848

49-
#define BACKOFF_MULTIPLIER 1.6
50-
#define BACKOFF_JITTER 0.2
51-
#define BACKOFF_MIN_SECONDS 1
52-
#define BACKOFF_MAX_SECONDS 120
49+
#define GRPC_DNS_MIN_CONNECT_TIMEOUT_SECONDS 1
50+
#define GRPC_DNS_INITIAL_CONNECT_BACKOFF_SECONDS 1
51+
#define GRPC_DNS_RECONNECT_BACKOFF_MULTIPLIER 1.6
52+
#define GRPC_DNS_RECONNECT_MAX_BACKOFF_SECONDS 120
53+
#define GRPC_DNS_RECONNECT_JITTER 0.2
5354

5455
typedef struct {
5556
/** base class: must be first */
@@ -269,8 +270,11 @@ static grpc_resolver *dns_create(grpc_resolver_args *args,
269270
server_name_arg.value.string = (char *)path;
270271
r->channel_args =
271272
grpc_channel_args_copy_and_add(args->args, &server_name_arg, 1);
272-
gpr_backoff_init(&r->backoff_state, BACKOFF_MULTIPLIER, BACKOFF_JITTER,
273-
BACKOFF_MIN_SECONDS * 1000, BACKOFF_MAX_SECONDS * 1000);
273+
gpr_backoff_init(&r->backoff_state, GRPC_DNS_INITIAL_CONNECT_BACKOFF_SECONDS,
274+
GRPC_DNS_RECONNECT_BACKOFF_MULTIPLIER,
275+
GRPC_DNS_RECONNECT_JITTER,
276+
GRPC_DNS_MIN_CONNECT_TIMEOUT_SECONDS * 1000,
277+
GRPC_DNS_RECONNECT_MAX_BACKOFF_SECONDS * 1000);
274278
return &r->base;
275279
}
276280

src/core/lib/support/backoff.c

Lines changed: 24 additions & 13 deletions
Original file line numberDiff line numberDiff line change
@@ -35,8 +35,10 @@
3535

3636
#include <grpc/support/useful.h>
3737

38-
void gpr_backoff_init(gpr_backoff *backoff, double multiplier, double jitter,
38+
void gpr_backoff_init(gpr_backoff *backoff, int64_t initial_connect_timeout,
39+
double multiplier, double jitter,
3940
int64_t min_timeout_millis, int64_t max_timeout_millis) {
41+
backoff->initial_connect_timeout = initial_connect_timeout;
4042
backoff->multiplier = multiplier;
4143
backoff->jitter = jitter;
4244
backoff->min_timeout_millis = min_timeout_millis;
@@ -45,9 +47,10 @@ void gpr_backoff_init(gpr_backoff *backoff, double multiplier, double jitter,
4547
}
4648

4749
gpr_timespec gpr_backoff_begin(gpr_backoff *backoff, gpr_timespec now) {
48-
backoff->current_timeout_millis = backoff->min_timeout_millis;
49-
return gpr_time_add(
50-
now, gpr_time_from_millis(backoff->current_timeout_millis, GPR_TIMESPAN));
50+
backoff->current_timeout_millis = backoff->initial_connect_timeout;
51+
const int64_t first_timeout =
52+
GPR_MAX(backoff->current_timeout_millis, backoff->min_timeout_millis);
53+
return gpr_time_add(now, gpr_time_from_millis(first_timeout, GPR_TIMESPAN));
5154
}
5255

5356
/* Generate a random number between 0 and 1. */
@@ -57,20 +60,28 @@ static double generate_uniform_random_number(uint32_t *rng_state) {
5760
}
5861

5962
gpr_timespec gpr_backoff_step(gpr_backoff *backoff, gpr_timespec now) {
60-
double new_timeout_millis =
63+
const double new_timeout_millis =
6164
backoff->multiplier * (double)backoff->current_timeout_millis;
62-
double jitter_range = backoff->jitter * new_timeout_millis;
63-
double jitter =
65+
backoff->current_timeout_millis =
66+
GPR_MIN((int64_t)new_timeout_millis, backoff->max_timeout_millis);
67+
68+
const double jitter_range_width = backoff->jitter * new_timeout_millis;
69+
const double jitter =
6470
(2 * generate_uniform_random_number(&backoff->rng_state) - 1) *
65-
jitter_range;
71+
jitter_range_width;
72+
6673
backoff->current_timeout_millis =
67-
GPR_CLAMP((int64_t)(new_timeout_millis + jitter),
68-
backoff->min_timeout_millis, backoff->max_timeout_millis);
69-
return gpr_time_add(
74+
(int64_t)((double)(backoff->current_timeout_millis) + jitter);
75+
76+
const gpr_timespec current_deadline = gpr_time_add(
7077
now, gpr_time_from_millis(backoff->current_timeout_millis, GPR_TIMESPAN));
78+
79+
const gpr_timespec min_deadline = gpr_time_add(
80+
now, gpr_time_from_millis(backoff->min_timeout_millis, GPR_TIMESPAN));
81+
82+
return gpr_time_max(current_deadline, min_deadline);
7183
}
7284

7385
void gpr_backoff_reset(gpr_backoff *backoff) {
74-
// forces step() to return a timeout of min_timeout_millis
75-
backoff->current_timeout_millis = 0;
86+
backoff->current_timeout_millis = backoff->initial_connect_timeout;
7687
}

src/core/lib/support/backoff.h

Lines changed: 5 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -37,7 +37,9 @@
3737
#include <grpc/support/time.h>
3838

3939
typedef struct {
40-
/// const: multiplier between retry attempts
40+
/// const: how long to wait after the first failure before retrying
41+
int64_t initial_connect_timeout;
42+
/// const: factor with which to multiply backoff after a failed retry
4143
double multiplier;
4244
/// const: amount to randomize backoffs
4345
double jitter;
@@ -54,7 +56,8 @@ typedef struct {
5456
} gpr_backoff;
5557

5658
/// Initialize backoff machinery - does not need to be destroyed
57-
void gpr_backoff_init(gpr_backoff *backoff, double multiplier, double jitter,
59+
void gpr_backoff_init(gpr_backoff *backoff, int64_t initial_connect_timeout,
60+
double multiplier, double jitter,
5861
int64_t min_timeout_millis, int64_t max_timeout_millis);
5962

6063
/// Begin retry loop: returns a timespec for the NEXT retry

test/core/client_channel/lb_policies_test.c

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -481,7 +481,7 @@ void run_spec(const test_spec *spec) {
481481
gpr_asprintf(&client_hostport, "ipv4:%s", servers_hostports_str);
482482

483483
arg_array[0].type = GRPC_ARG_INTEGER;
484-
arg_array[0].key = "grpc.testing.fixed_reconnect_backoff";
484+
arg_array[0].key = "grpc.testing.fixed_reconnect_backoff_ms";
485485
arg_array[0].value.integer = RETRY_TIMEOUT;
486486
arg_array[1].type = GRPC_ARG_STRING;
487487
arg_array[1].key = GRPC_ARG_LB_POLICY_NAME;
@@ -519,7 +519,7 @@ static grpc_channel *create_client(const servers_fixture *f) {
519519
gpr_asprintf(&client_hostport, "ipv4:%s", servers_hostports_str);
520520

521521
arg_array[0].type = GRPC_ARG_INTEGER;
522-
arg_array[0].key = "grpc.testing.fixed_reconnect_backoff";
522+
arg_array[0].key = "grpc.testing.fixed_reconnect_backoff_ms";
523523
arg_array[0].value.integer = RETRY_TIMEOUT;
524524
arg_array[1].type = GRPC_ARG_STRING;
525525
arg_array[1].key = GRPC_ARG_LB_POLICY_NAME;

test/core/end2end/goaway_server_test.c

Lines changed: 9 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -126,8 +126,16 @@ int main(int argc, char **argv) {
126126

127127
char *addr;
128128

129+
grpc_channel_args client_args;
130+
grpc_arg arg_array[1];
131+
arg_array[0].type = GRPC_ARG_INTEGER;
132+
arg_array[0].key = "grpc.testing.fixed_reconnect_backoff_ms";
133+
arg_array[0].value.integer = 1000;
134+
client_args.args = arg_array;
135+
client_args.num_args = 1;
136+
129137
/* create a channel that picks first amongst the servers */
130-
grpc_channel *chan = grpc_insecure_channel_create("test", NULL, NULL);
138+
grpc_channel *chan = grpc_insecure_channel_create("test", &client_args, NULL);
131139
/* and an initial call to them */
132140
grpc_call *call1 = grpc_channel_create_call(
133141
chan, NULL, GRPC_PROPAGATE_DEFAULTS, cq, "/foo", "127.0.0.1",

test/core/end2end/tests/connectivity.c

Lines changed: 9 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -68,7 +68,15 @@ static void test_connectivity(grpc_end2end_test_config config) {
6868
gpr_thd_options thdopt = gpr_thd_options_default();
6969
gpr_thd_id thdid;
7070

71-
config.init_client(&f, NULL);
71+
grpc_channel_args client_args;
72+
grpc_arg arg_array[1];
73+
arg_array[0].type = GRPC_ARG_INTEGER;
74+
arg_array[0].key = "grpc.testing.fixed_reconnect_backoff_ms";
75+
arg_array[0].value.integer = 1000;
76+
client_args.args = arg_array;
77+
client_args.num_args = 1;
78+
79+
config.init_client(&f, &client_args);
7280

7381
ce.channel = f.client;
7482
ce.cq = f.cq;

test/core/end2end/tests/simple_delayed_request.c

Lines changed: 17 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -200,21 +200,36 @@ static void simple_delayed_request_body(grpc_end2end_test_config config,
200200

201201
static void test_simple_delayed_request_short(grpc_end2end_test_config config) {
202202
grpc_end2end_test_fixture f;
203+
grpc_channel_args client_args;
204+
grpc_arg arg_array[1];
205+
arg_array[0].type = GRPC_ARG_INTEGER;
206+
arg_array[0].key = "grpc.testing.fixed_reconnect_backoff_ms";
207+
arg_array[0].value.integer = 1000;
208+
client_args.args = arg_array;
209+
client_args.num_args = 1;
203210

204211
gpr_log(GPR_INFO, "%s/%s", "test_simple_delayed_request_short", config.name);
205212
f = config.create_fixture(NULL, NULL);
206-
simple_delayed_request_body(config, &f, NULL, NULL, 100000);
213+
214+
simple_delayed_request_body(config, &f, &client_args, NULL, 100000);
207215
end_test(&f);
208216
config.tear_down_data(&f);
209217
}
210218

211219
static void test_simple_delayed_request_long(grpc_end2end_test_config config) {
212220
grpc_end2end_test_fixture f;
221+
grpc_channel_args client_args;
222+
grpc_arg arg_array[1];
223+
arg_array[0].type = GRPC_ARG_INTEGER;
224+
arg_array[0].key = "grpc.testing.fixed_reconnect_backoff_ms";
225+
arg_array[0].value.integer = 1000;
226+
client_args.args = arg_array;
227+
client_args.num_args = 1;
213228

214229
gpr_log(GPR_INFO, "%s/%s", "test_simple_delayed_request_long", config.name);
215230
f = config.create_fixture(NULL, NULL);
216231
/* This timeout should be longer than a single retry */
217-
simple_delayed_request_body(config, &f, NULL, NULL, 1500000);
232+
simple_delayed_request_body(config, &f, &client_args, NULL, 1500000);
218233
end_test(&f);
219234
config.tear_down_data(&f);
220235
}

0 commit comments

Comments
 (0)