Skip to content

Commit 0bad878

Browse files
committed
Package.Fetch: retry fetch on error
As noted in ziglang#17472, this should probably be configurable; however, I'm not sure what the best approach for this is, and it's not necessary for an MVP. For now, this is hardcoded to make 4 total attempts, with delays of 200, 800, and 4000 ms respectively between attempts. I've erred on the side of retrying unnecessarily here; there are definitely errors we retry on which really ought to trigger the error immediately. We can work to tighten this up a little in future. Resolves: ziglang#17472
1 parent 7abf9b3 commit 0bad878

File tree

1 file changed

+115
-39
lines changed

1 file changed

+115
-39
lines changed

src/Package/Fetch.zig

Lines changed: 115 additions & 39 deletions
Original file line numberDiff line numberDiff line change
@@ -234,7 +234,44 @@ pub const RunError = error{
234234
FetchFailed,
235235
};
236236

237+
const RetryableRunError = RunError || error{
238+
/// An error occurred, but it may be temporary (e.g. due to network conditions).
239+
/// Also, we have not yet retried this fetch `retry_delays.len` many times.
240+
/// No error was added to the bundle. Attempt the fetch again after a short delay.
241+
FetchFailedRetryable,
242+
};
243+
244+
/// The delay between fetch attempts (after retryable errors) in ns.
245+
/// The length of this list is the number of times a fetch will be retried.
246+
const retry_delays = [_]u32{
247+
200 * std.time.ns_per_ms,
248+
800 * std.time.ns_per_ms,
249+
4000 * std.time.ns_per_ms,
250+
};
251+
237252
pub fn run(f: *Fetch) RunError!void {
253+
var attempt_count: u32 = 0;
254+
while (true) {
255+
if (f.runInner(&attempt_count)) |_| {
256+
return;
257+
} else |err| switch (err) {
258+
error.FetchFailedRetryable => {
259+
// The function which triggered this error should have checked and updated attempt_count.
260+
assert(attempt_count > 0);
261+
assert(attempt_count <= retry_delays.len);
262+
263+
// Since we'll be sleeping, make sure the progress is up-to-date.
264+
f.prog_node.context.refresh();
265+
266+
std.time.sleep(retry_delays[attempt_count - 1]);
267+
continue;
268+
},
269+
else => |e| return e,
270+
}
271+
}
272+
}
273+
274+
fn runInner(f: *Fetch, attempt_count: *u32) RetryableRunError!void {
238275
const eb = &f.error_bundle;
239276
const arena = f.arena.allocator();
240277
const gpa = f.arena.child_allocator;
@@ -278,12 +315,12 @@ pub fn run(f: *Fetch) RunError!void {
278315
.path_or_url => |path_or_url| {
279316
if (fs.cwd().openIterableDir(path_or_url, .{})) |dir| {
280317
var resource: Resource = .{ .dir = dir };
281-
return runResource(f, path_or_url, &resource, null);
318+
return runResource(f, path_or_url, &resource, null, attempt_count);
282319
} else |dir_err| {
283320
const file_err = if (dir_err == error.NotDir) e: {
284321
if (fs.cwd().openFile(path_or_url, .{})) |file| {
285322
var resource: Resource = .{ .file = file };
286-
return runResource(f, path_or_url, &resource, null);
323+
return runResource(f, path_or_url, &resource, null, attempt_count);
287324
} else |err| break :e err;
288325
} else dir_err;
289326

@@ -293,8 +330,9 @@ pub fn run(f: *Fetch) RunError!void {
293330
.{ path_or_url, @errorName(file_err), @errorName(uri_err) },
294331
));
295332
};
296-
var resource = try f.initResource(uri);
297-
return runResource(f, uri.path, &resource, null);
333+
334+
var resource = try f.initResource(uri, attempt_count);
335+
return runResource(f, uri.path, &resource, null, attempt_count);
298336
}
299337
},
300338
};
@@ -330,8 +368,8 @@ pub fn run(f: *Fetch) RunError!void {
330368
f.location_tok,
331369
try eb.printString("invalid URI: {s}", .{@errorName(err)}),
332370
);
333-
var resource = try f.initResource(uri);
334-
return runResource(f, uri.path, &resource, remote.hash);
371+
var resource = try f.initResource(uri, attempt_count);
372+
return runResource(f, uri.path, &resource, remote.hash, attempt_count);
335373
}
336374

337375
pub fn deinit(f: *Fetch) void {
@@ -345,7 +383,8 @@ fn runResource(
345383
uri_path: []const u8,
346384
resource: *Resource,
347385
remote_hash: ?Manifest.MultiHashHexDigest,
348-
) RunError!void {
386+
attempt_count: *u32,
387+
) RetryableRunError!void {
349388
defer resource.deinit();
350389
const arena = f.arena.allocator();
351390
const eb = &f.error_bundle;
@@ -371,7 +410,7 @@ fn runResource(
371410
};
372411
defer tmp_directory.handle.close();
373412

374-
try unpackResource(f, resource, uri_path, tmp_directory);
413+
try unpackResource(f, resource, uri_path, tmp_directory, attempt_count);
375414

376415
// Load, parse, and validate the unpacked build.zig.zon file. It is allowed
377416
// for the file to be missing, in which case this fetched package is
@@ -707,6 +746,14 @@ fn fail(f: *Fetch, msg_tok: std.zig.Ast.TokenIndex, msg_str: u32) RunError {
707746
return error.FetchFailed;
708747
}
709748

749+
fn failRetryable(f: *Fetch, msg_tok: std.zig.Ast.TokenIndex, comptime msg_format: []const u8, msg_args: anytype, attempt_count: *u32) RetryableRunError {
750+
if (attempt_count.* == retry_delays.len) {
751+
return f.fail(msg_tok, try f.error_bundle.printString(msg_format, msg_args));
752+
}
753+
attempt_count.* += 1;
754+
return error.FetchFailedRetryable;
755+
}
756+
710757
const Resource = union(enum) {
711758
file: fs.File,
712759
http_request: std.http.Client.Request,
@@ -797,7 +844,7 @@ const FileType = enum {
797844
}
798845
};
799846

800-
fn initResource(f: *Fetch, uri: std.Uri) RunError!Resource {
847+
fn initResource(f: *Fetch, uri: std.Uri, attempt_count: *u32) RetryableRunError!Resource {
801848
const gpa = f.arena.child_allocator;
802849
const arena = f.arena.allocator();
803850
const eb = &f.error_bundle;
@@ -819,31 +866,39 @@ fn initResource(f: *Fetch, uri: std.Uri) RunError!Resource {
819866
defer h.deinit();
820867

821868
var req = http_client.request(.GET, uri, h, .{}) catch |err| {
822-
return f.fail(f.location_tok, try eb.printString(
869+
return f.failRetryable(
870+
f.location_tok,
823871
"unable to connect to server: {s}",
824872
.{@errorName(err)},
825-
));
873+
attempt_count,
874+
);
826875
};
827876
errdefer req.deinit(); // releases more than memory
828877

829878
req.start(.{}) catch |err| {
830-
return f.fail(f.location_tok, try eb.printString(
879+
return f.failRetryable(
880+
f.location_tok,
831881
"HTTP request failed: {s}",
832882
.{@errorName(err)},
833-
));
883+
attempt_count,
884+
);
834885
};
835886
req.wait() catch |err| {
836-
return f.fail(f.location_tok, try eb.printString(
887+
return f.failRetryable(
888+
f.location_tok,
837889
"invalid HTTP response: {s}",
838890
.{@errorName(err)},
839-
));
891+
attempt_count,
892+
);
840893
};
841894

842895
if (req.response.status != .ok) {
843-
return f.fail(f.location_tok, try eb.printString(
896+
return f.failRetryable(
897+
f.location_tok,
844898
"bad HTTP response code: '{d} {s}'",
845899
.{ @intFromEnum(req.response.status), req.response.status.phrase() orelse "" },
846-
));
900+
attempt_count,
901+
);
847902
}
848903

849904
return .{ .http_request = req };
@@ -859,16 +914,19 @@ fn initResource(f: *Fetch, uri: std.Uri) RunError!Resource {
859914
session.discoverCapabilities(gpa, &redirect_uri) catch |err| switch (err) {
860915
error.Redirected => {
861916
defer gpa.free(redirect_uri);
917+
// We got a valid response, so this wasn't a network issue, so this is not retryable.
862918
return f.fail(f.location_tok, try eb.printString(
863919
"repository moved to {s}",
864920
.{redirect_uri},
865921
));
866922
},
867923
else => |e| {
868-
return f.fail(f.location_tok, try eb.printString(
924+
return f.failRetryable(
925+
f.location_tok,
869926
"unable to discover remote git server capabilities: {s}",
870927
.{@errorName(e)},
871-
));
928+
attempt_count,
929+
);
872930
},
873931
};
874932

@@ -883,17 +941,21 @@ fn initResource(f: *Fetch, uri: std.Uri) RunError!Resource {
883941
.ref_prefixes = &.{ want_ref, want_ref_head, want_ref_tag },
884942
.include_peeled = true,
885943
}) catch |err| {
886-
return f.fail(f.location_tok, try eb.printString(
944+
return f.failRetryable(
945+
f.location_tok,
887946
"unable to list refs: {s}",
888947
.{@errorName(err)},
889-
));
948+
attempt_count,
949+
);
890950
};
891951
defer ref_iterator.deinit();
892952
while (ref_iterator.next() catch |err| {
893-
return f.fail(f.location_tok, try eb.printString(
953+
return f.failRetryable(
954+
f.location_tok,
894955
"unable to iterate refs: {s}",
895956
.{@errorName(err)},
896-
));
957+
attempt_count,
958+
);
897959
}) |ref| {
898960
if (std.mem.eql(u8, ref.name, want_ref) or
899961
std.mem.eql(u8, ref.name, want_ref_head) or
@@ -902,6 +964,7 @@ fn initResource(f: *Fetch, uri: std.Uri) RunError!Resource {
902964
break :want_oid ref.peeled orelse ref.oid;
903965
}
904966
}
967+
// We successfully iterated the refs, so this wasn't a network issue, so this is not retryable.
905968
return f.fail(f.location_tok, try eb.printString("ref not found: {s}", .{want_ref}));
906969
};
907970
if (uri.fragment == null) {
@@ -925,10 +988,12 @@ fn initResource(f: *Fetch, uri: std.Uri) RunError!Resource {
925988
std.fmt.fmtSliceHexLower(&want_oid),
926989
}) catch unreachable;
927990
var fetch_stream = session.fetch(gpa, &.{&want_oid_buf}) catch |err| {
928-
return f.fail(f.location_tok, try eb.printString(
991+
return f.failRetryable(
992+
f.location_tok,
929993
"unable to create fetch stream: {s}",
930994
.{@errorName(err)},
931-
));
995+
attempt_count,
996+
);
932997
};
933998
errdefer fetch_stream.deinit();
934999

@@ -949,7 +1014,8 @@ fn unpackResource(
9491014
resource: *Resource,
9501015
uri_path: []const u8,
9511016
tmp_directory: Cache.Directory,
952-
) RunError!void {
1017+
attempt_count: *u32,
1018+
) RetryableRunError!void {
9531019
const eb = &f.error_bundle;
9541020
const file_type = switch (resource.*) {
9551021
.file => FileType.fromPath(uri_path) orelse
@@ -1010,16 +1076,19 @@ fn unpackResource(
10101076
};
10111077

10121078
switch (file_type) {
1013-
.tar => try unpackTarball(f, tmp_directory.handle, resource.reader()),
1014-
.@"tar.gz" => try unpackTarballCompressed(f, tmp_directory.handle, resource, std.compress.gzip),
1015-
.@"tar.xz" => try unpackTarballCompressed(f, tmp_directory.handle, resource, std.compress.xz),
1079+
.tar => try unpackTarball(f, tmp_directory.handle, resource.reader(), attempt_count),
1080+
.@"tar.gz" => try unpackTarballCompressed(f, tmp_directory.handle, resource, std.compress.gzip, attempt_count),
1081+
.@"tar.xz" => try unpackTarballCompressed(f, tmp_directory.handle, resource, std.compress.xz, attempt_count),
10161082
.git_pack => unpackGitPack(f, tmp_directory.handle, resource) catch |err| switch (err) {
10171083
error.FetchFailed => return error.FetchFailed,
10181084
error.OutOfMemory => return error.OutOfMemory,
1019-
else => |e| return f.fail(f.location_tok, try eb.printString(
1085+
// TODO: don't mark actual git errors as retryable. We're only interested in reader errors.
1086+
else => |e| return f.failRetryable(
1087+
f.location_tok,
10201088
"unable to unpack git files: {s}",
10211089
.{@errorName(e)},
1022-
)),
1090+
attempt_count,
1091+
),
10231092
},
10241093
}
10251094
}
@@ -1029,7 +1098,8 @@ fn unpackTarballCompressed(
10291098
out_dir: fs.Dir,
10301099
resource: *Resource,
10311100
comptime Compression: type,
1032-
) RunError!void {
1101+
attempt_count: *u32,
1102+
) RetryableRunError!void {
10331103
const gpa = f.arena.child_allocator;
10341104
const eb = &f.error_bundle;
10351105
const reader = resource.reader();
@@ -1043,10 +1113,10 @@ fn unpackTarballCompressed(
10431113
};
10441114
defer decompress.deinit();
10451115

1046-
return unpackTarball(f, out_dir, decompress.reader());
1116+
return unpackTarball(f, out_dir, decompress.reader(), attempt_count);
10471117
}
10481118

1049-
fn unpackTarball(f: *Fetch, out_dir: fs.Dir, reader: anytype) RunError!void {
1119+
fn unpackTarball(f: *Fetch, out_dir: fs.Dir, reader: anytype, attempt_count: *u32) RetryableRunError!void {
10501120
const eb = &f.error_bundle;
10511121
const gpa = f.arena.child_allocator;
10521122

@@ -1063,10 +1133,15 @@ fn unpackTarball(f: *Fetch, out_dir: fs.Dir, reader: anytype) RunError!void {
10631133
// bit on Windows from the ACLs (see the isExecutable function).
10641134
.mode_mode = .ignore,
10651135
.exclude_empty_directories = true,
1066-
}) catch |err| return f.fail(f.location_tok, try eb.printString(
1067-
"unable to unpack tarball to temporary directory: {s}",
1068-
.{@errorName(err)},
1069-
));
1136+
}) catch |err| {
1137+
// TODO: don't mark actual tar errors as retryable. We're only interested in reader errors.
1138+
return f.failRetryable(
1139+
f.location_tok,
1140+
"unable to unpack tarball to temporary directory: {s}",
1141+
.{@errorName(err)},
1142+
attempt_count,
1143+
);
1144+
};
10701145

10711146
if (diagnostics.errors.items.len > 0) {
10721147
const notes_len: u32 = @intCast(diagnostics.errors.items.len);
@@ -1163,7 +1238,8 @@ fn unpackGitPack(f: *Fetch, out_dir: fs.Dir, resource: *Resource) anyerror!void
11631238
},
11641239
}
11651240
}
1166-
return error.InvalidGitPack;
1241+
// Use FetchFailed since we already added an error.
1242+
return error.FetchFailed;
11671243
}
11681244
}
11691245
}

0 commit comments

Comments
 (0)