Skip to content

Commit fa945d9

Browse files
committed
Support fetching dependencies over git+http(s)
Closes #14298 This commit adds support for fetching dependencies over git+http(s) using a minimal implementation of the Git protocols and formats relevant to fetching repository data. Git URLs can be specified in `build.zig.zon` as follows: ```zig .xml = .{ .url = "git+https://github.com/ianprime0509/zig-xml#7380d59d50f1cd8460fd748b5f6f179306679e2f", .hash = "122085c1e4045fa9cb69632ff771c56acdb6760f34ca5177e80f70b0b92cd80da3e9", }, ``` The fragment part of the URL may specify a commit ID (SHA1 hash), branch name, or tag. It is an error to omit the fragment: if this happens, the compiler will prompt the user to add it, using the commit ID of the HEAD commit of the repository (that is, the latest commit of the default branch): ``` Fetch Packages... xml... /var/home/ian/src/zig-gobject/build.zig.zon:6:20: error: url field is missing an explicit ref .url = "git+https://github.com/ianprime0509/zig-xml", ^~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ note: try .url = "git+https://github.com/ianprime0509/zig-xml#dfdc044f3271641c7d428dc8ec8cd46423d8b8b6", ``` This implementation currently supports only version 2 of Git's wire protocol (documented in [protocol-v2](https://git-scm.com/docs/protocol-v2)), which was first introduced in Git 2.19 (2018) and made the default in 2.26 (2020). The wire protocol behaves similarly when used over other transports, such as SSH and the "Git protocol" (git:// URLs), so it should be reasonably straightforward to support fetching dependencies from such URLs if the necessary transports are implemented (e.g. #14295).
1 parent 19a82ff commit fa945d9

File tree

4 files changed

+1549
-5
lines changed

4 files changed

+1549
-5
lines changed

src/Package.zig

Lines changed: 128 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -17,6 +17,7 @@ const Module = @import("Module.zig");
1717
const Cache = std.Build.Cache;
1818
const build_options = @import("build_options");
1919
const Manifest = @import("Manifest.zig");
20+
const git = @import("git.zig");
2021

2122
pub const Table = std.StringHashMapUnmanaged(*Package);
2223

@@ -514,6 +515,7 @@ const FetchLocation = union(enum) {
514515
file: []const u8,
515516
directory: []const u8,
516517
http_request: std.Uri,
518+
git_request: std.Uri,
517519

518520
pub fn init(gpa: Allocator, dep: Manifest.Dependency, root_dir: Compilation.Directory, report: Report) !FetchLocation {
519521
switch (dep.location) {
@@ -524,8 +526,13 @@ const FetchLocation = union(enum) {
524526
};
525527
if (ascii.eqlIgnoreCase(uri.scheme, "file")) {
526528
return report.fail(dep.location_tok, "'file' scheme is not allowed for URLs. Use '.path' instead", .{});
529+
} else if (ascii.eqlIgnoreCase(uri.scheme, "http") or ascii.eqlIgnoreCase(uri.scheme, "https")) {
530+
return .{ .http_request = uri };
531+
} else if (ascii.eqlIgnoreCase(uri.scheme, "git+http") or ascii.eqlIgnoreCase(uri.scheme, "git+https")) {
532+
return .{ .git_request = uri };
533+
} else {
534+
return report.fail(dep.location_tok, "Unsupported URL scheme: {s}", .{uri.scheme});
527535
}
528-
return .{ .http_request = uri };
529536
},
530537
.path => |path| {
531538
if (fs.path.isAbsolute(path)) {
@@ -548,7 +555,7 @@ const FetchLocation = union(enum) {
548555
pub fn deinit(f: *FetchLocation, gpa: Allocator) void {
549556
switch (f.*) {
550557
inline .file, .directory => |path| gpa.free(path),
551-
.http_request => {},
558+
.http_request, .git_request => {},
552559
}
553560
f.* = undefined;
554561
}
@@ -592,6 +599,71 @@ const FetchLocation = union(enum) {
592599
.resource = .{ .http_request = req },
593600
};
594601
},
602+
.git_request => |uri| {
603+
var transport_uri = uri;
604+
transport_uri.scheme = uri.scheme["git+".len..];
605+
var redirect_uri: []u8 = undefined;
606+
var session: git.Session = .{ .transport = http_client, .uri = transport_uri };
607+
session.discoverCapabilities(gpa, &redirect_uri) catch |e| switch (e) {
608+
error.Redirected => {
609+
defer gpa.free(redirect_uri);
610+
return report.fail(dep.location_tok, "Repository moved to {s}", .{redirect_uri});
611+
},
612+
else => |other| return other,
613+
};
614+
615+
const want_oid = want_oid: {
616+
const want_ref = uri.fragment orelse "HEAD";
617+
if (git.parseOid(want_ref)) |oid| break :want_oid oid else |_| {}
618+
619+
const want_ref_head = try std.fmt.allocPrint(gpa, "refs/heads/{s}", .{want_ref});
620+
defer gpa.free(want_ref_head);
621+
const want_ref_tag = try std.fmt.allocPrint(gpa, "refs/tags/{s}", .{want_ref});
622+
defer gpa.free(want_ref_tag);
623+
624+
var ref_iterator = try session.listRefs(gpa, .{
625+
.ref_prefixes = &.{ want_ref, want_ref_head, want_ref_tag },
626+
.include_peeled = true,
627+
});
628+
defer ref_iterator.deinit();
629+
while (try ref_iterator.next()) |ref| {
630+
if (mem.eql(u8, ref.name, want_ref) or
631+
mem.eql(u8, ref.name, want_ref_head) or
632+
mem.eql(u8, ref.name, want_ref_tag))
633+
{
634+
break :want_oid ref.peeled orelse ref.oid;
635+
}
636+
}
637+
return report.fail(dep.location_tok, "Ref not found: {s}", .{want_ref});
638+
};
639+
if (uri.fragment == null) {
640+
const file_path = try report.directory.join(gpa, &.{Manifest.basename});
641+
defer gpa.free(file_path);
642+
643+
const eb = report.error_bundle;
644+
const notes_len = 1;
645+
try Report.addErrorMessage(report.ast.*, file_path, eb, notes_len, .{
646+
.tok = dep.location_tok,
647+
.off = 0,
648+
.msg = "url field is missing an explicit ref",
649+
});
650+
const notes_start = try eb.reserveNotes(notes_len);
651+
eb.extra.items[notes_start] = @intFromEnum(try eb.addErrorMessage(.{
652+
.msg = try eb.printString("try .url = \"{+/}#{}\",", .{ uri, std.fmt.fmtSliceHexLower(&want_oid) }),
653+
}));
654+
return error.PackageFetchFailed;
655+
}
656+
657+
var want_oid_buf: [git.fmt_oid_length]u8 = undefined;
658+
_ = std.fmt.bufPrint(&want_oid_buf, "{}", .{std.fmt.fmtSliceHexLower(&want_oid)}) catch unreachable;
659+
var fetch_stream = try session.fetch(gpa, &.{&want_oid_buf});
660+
errdefer fetch_stream.deinit();
661+
662+
return .{
663+
.path = try gpa.dupe(u8, &want_oid_buf),
664+
.resource = .{ .git_fetch_stream = fetch_stream },
665+
};
666+
},
595667
.directory => unreachable, // Directories do not require fetching
596668
}
597669
}
@@ -602,6 +674,7 @@ const ReadableResource = struct {
602674
resource: union(enum) {
603675
file: fs.File,
604676
http_request: std.http.Client.Request,
677+
git_fetch_stream: git.Session.FetchStream,
605678
},
606679

607680
/// Unpack the package into the global cache directory.
@@ -617,7 +690,7 @@ const ReadableResource = struct {
617690
pkg_prog_node: *std.Progress.Node,
618691
) !PackageLocation {
619692
switch (rr.resource) {
620-
inline .file, .http_request => |*r| {
693+
inline .file, .http_request, .git_fetch_stream => |*r| {
621694
const s = fs.path.sep_str;
622695
const rand_int = std.crypto.random.int(u64);
623696
const tmp_dir_sub_path = "tmp" ++ s ++ Manifest.hex64(rand_int);
@@ -663,6 +736,7 @@ const ReadableResource = struct {
663736
// I have not checked what buffer sizes the xz decompression implementation uses
664737
// by default, so the same logic applies for buffering the reader as for gzip.
665738
.@"tar.xz" => try unpackTarball(allocator, prog_reader, tmp_directory.handle, std.compress.xz),
739+
.git_pack => try unpackGitPack(allocator, &prog_reader, git.parseOid(rr.path) catch unreachable, tmp_directory.handle),
666740
}
667741

668742
// Unpack completed - stop showing amount as progress
@@ -697,13 +771,15 @@ const ReadableResource = struct {
697771
const FileType = enum {
698772
@"tar.gz",
699773
@"tar.xz",
774+
git_pack,
700775
};
701776

702777
pub fn getSize(rr: ReadableResource) !?u64 {
703778
switch (rr.resource) {
779+
.file => |f| return (try f.metadata()).size(),
704780
// TODO: Handle case of chunked content-length
705781
.http_request => |req| return req.response.content_length,
706-
.file => |f| return (try f.metadata()).size(),
782+
.git_fetch_stream => |stream| return stream.request.response.content_length,
707783
}
708784
}
709785

@@ -734,6 +810,7 @@ const ReadableResource = struct {
734810
return report.fail(dep.location_tok, "Unsupported 'Content-Disposition' header value: '{s}' for Content-Type=application/octet-stream", .{content_disposition});
735811
} else return report.fail(dep.location_tok, "Unrecognized value for 'Content-Type' header: {s}", .{content_type});
736812
},
813+
.git_fetch_stream => return .git_pack,
737814
}
738815
}
739816

@@ -769,6 +846,7 @@ const ReadableResource = struct {
769846
switch (rr.resource) {
770847
.file => |file| file.close(),
771848
.http_request => |*req| req.deinit(),
849+
.git_fetch_stream => |*stream| stream.deinit(),
772850
}
773851
rr.* = undefined;
774852
}
@@ -947,7 +1025,7 @@ fn fetchAndUnpack(
9471025
/// is only intended to be human-readable for progress reporting.
9481026
name_for_prog: []const u8,
9491027
) !DependencyModule {
950-
assert(fetch_location == .file or fetch_location == .http_request);
1028+
assert(fetch_location != .directory);
9511029

9521030
const gpa = http_client.allocator;
9531031

@@ -1024,6 +1102,51 @@ fn unpackTarball(
10241102
});
10251103
}
10261104

1105+
fn unpackGitPack(
1106+
gpa: Allocator,
1107+
reader: anytype,
1108+
want_oid: git.Oid,
1109+
out_dir: fs.Dir,
1110+
) !void {
1111+
// The .git directory is used to store the packfile and associated index, but
1112+
// we do not attempt to replicate the exact structure of a real .git
1113+
// directory, since that isn't relevant for fetching a package.
1114+
{
1115+
var pack_dir = try out_dir.makeOpenPath(".git", .{});
1116+
defer pack_dir.close();
1117+
var pack_file = try pack_dir.createFile("pkg.pack", .{ .read = true });
1118+
defer pack_file.close();
1119+
var fifo = std.fifo.LinearFifo(u8, .{ .Static = 4096 }).init();
1120+
try fifo.pump(reader.reader(), pack_file.writer());
1121+
try pack_file.sync();
1122+
1123+
var index_file = try pack_dir.createFile("pkg.idx", .{ .read = true });
1124+
defer index_file.close();
1125+
{
1126+
var index_prog_node = reader.prog_node.start("Index pack", 0);
1127+
defer index_prog_node.end();
1128+
index_prog_node.activate();
1129+
index_prog_node.context.refresh();
1130+
var index_buffered_writer = std.io.bufferedWriter(index_file.writer());
1131+
try git.indexPack(gpa, pack_file, index_buffered_writer.writer());
1132+
try index_buffered_writer.flush();
1133+
try index_file.sync();
1134+
}
1135+
1136+
{
1137+
var checkout_prog_node = reader.prog_node.start("Checkout", 0);
1138+
defer checkout_prog_node.end();
1139+
checkout_prog_node.activate();
1140+
checkout_prog_node.context.refresh();
1141+
var repository = try git.Repository.init(gpa, pack_file, index_file);
1142+
defer repository.deinit();
1143+
try repository.checkout(out_dir, want_oid);
1144+
}
1145+
}
1146+
1147+
try out_dir.deleteTree(".git");
1148+
}
1149+
10271150
const HashedFile = struct {
10281151
fs_path: []const u8,
10291152
normalized_path: []const u8,

0 commit comments

Comments
 (0)