From b3cad98534a4a4406d848f7cbd28165ca005bc8a Mon Sep 17 00:00:00 2001 From: Adam Goertz Date: Wed, 12 Jul 2023 02:45:51 +0000 Subject: [PATCH 1/5] Support file:/// URIs and relative paths --- build.zig | 2 + lib/std/Uri.zig | 45 ++- lib/std/os/windows.zig | 1 + lib/std/os/windows/shlwapi.zig | 13 + src/Manifest.zig | 44 ++- src/Package.zig | 601 ++++++++++++++++++++++----------- 6 files changed, 498 insertions(+), 208 deletions(-) create mode 100644 lib/std/os/windows/shlwapi.zig diff --git a/build.zig b/build.zig index 8777060a89df..c4e9d1937304 100644 --- a/build.zig +++ b/build.zig @@ -336,6 +336,7 @@ pub fn build(b: *std.Build) !void { artifact.linkSystemLibrary("version"); artifact.linkSystemLibrary("uuid"); artifact.linkSystemLibrary("ole32"); + artifact.linkSystemLibrary("shlwapi"); } } } @@ -712,6 +713,7 @@ fn addStaticLlvmOptionsToExe(exe: *std.Build.Step.Compile) !void { exe.linkSystemLibrary("version"); exe.linkSystemLibrary("uuid"); exe.linkSystemLibrary("ole32"); + exe.linkSystemLibrary("shlwapi"); } } diff --git a/lib/std/Uri.zig b/lib/std/Uri.zig index b27a3d70126d..e2c23e5a1dc5 100644 --- a/lib/std/Uri.zig +++ b/lib/std/Uri.zig @@ -134,6 +134,7 @@ pub const ParseError = error{ UnexpectedCharacter, InvalidFormat, InvalidPort }; /// original `text`. Each component that is provided, will be non-`null`. pub fn parseWithoutScheme(text: []const u8) ParseError!Uri { var reader = SliceReader{ .slice = text }; + var uri = Uri{ .scheme = "", .user = null, @@ -145,13 +146,14 @@ pub fn parseWithoutScheme(text: []const u8) ParseError!Uri { .fragment = null, }; - if (reader.peekPrefix("//")) { // authority part + if (reader.peekPrefix("//")) a: { // authority part std.debug.assert(reader.get().? == '/'); std.debug.assert(reader.get().? == '/'); - const authority = reader.readUntil(isAuthoritySeparator); - if (authority.len == 0) - return error.InvalidFormat; + var authority = reader.readUntil(isAuthoritySeparator); + if (authority.len == 0) { + if (reader.peekPrefix("/")) break :a else return error.InvalidFormat; + } var start_of_host: usize = 0; if (std.mem.indexOf(u8, authority, "@")) |index| { @@ -224,7 +226,6 @@ pub fn format( try writer.writeAll(":"); if (uri.host) |host| { try writer.writeAll("//"); - if (uri.user) |user| { try writer.writeAll(user); if (uri.password) |password| { @@ -486,6 +487,23 @@ test "should fail gracefully" { try std.testing.expectEqual(@as(ParseError!Uri, error.InvalidFormat), parse("foobar://")); } +test "file" { + const parsed = try parse("file:///"); + try std.testing.expectEqualSlices(u8, "file", parsed.scheme); + try std.testing.expectEqual(@as(?[]const u8, null), parsed.host); + try std.testing.expectEqualSlices(u8, "/", parsed.path); + + const parsed2 = try parse("file:///an/absolute/path/to/something"); + try std.testing.expectEqualSlices(u8, "file", parsed2.scheme); + try std.testing.expectEqual(@as(?[]const u8, null), parsed2.host); + try std.testing.expectEqualSlices(u8, "/an/absolute/path/to/something", parsed2.path); + + const parsed3 = try parse("file://localhost/an/absolute/path/to/another/thing/"); + try std.testing.expectEqualSlices(u8, "file", parsed3.scheme); + try std.testing.expectEqualSlices(u8, "localhost", parsed3.host.?); + try std.testing.expectEqualSlices(u8, "/an/absolute/path/to/another/thing/", parsed3.path); +} + test "scheme" { try std.testing.expectEqualSlices(u8, "http", (try parse("http:_")).scheme); try std.testing.expectEqualSlices(u8, "scheme-mee", (try parse("scheme-mee:_")).scheme); @@ -695,3 +713,20 @@ test "URI query escaping" { defer std.testing.allocator.free(formatted_uri); try std.testing.expectEqualStrings("/?response-content-type=application%2Foctet-stream", formatted_uri); } + +test "format" { + const uri = Uri{ + .scheme = "file", + .user = null, + .password = null, + .host = null, + .port = null, + .path = "/foo/bar/baz", + .query = null, + .fragment = null, + }; + var buf = std.ArrayList(u8).init(std.testing.allocator); + defer buf.deinit(); + try uri.format("+/", .{}, buf.writer()); + try std.testing.expectEqualSlices(u8, "file:/foo/bar/baz", buf.items); +} diff --git a/lib/std/os/windows.zig b/lib/std/os/windows.zig index d40fee8db241..3522f238ecec 100644 --- a/lib/std/os/windows.zig +++ b/lib/std/os/windows.zig @@ -30,6 +30,7 @@ pub const gdi32 = @import("windows/gdi32.zig"); pub const winmm = @import("windows/winmm.zig"); pub const crypt32 = @import("windows/crypt32.zig"); pub const nls = @import("windows/nls.zig"); +pub const shlwapi = @import("windows/shlwapi.zig"); pub const self_process_handle = @as(HANDLE, @ptrFromInt(maxInt(usize))); diff --git a/lib/std/os/windows/shlwapi.zig b/lib/std/os/windows/shlwapi.zig new file mode 100644 index 000000000000..0f0ceed57605 --- /dev/null +++ b/lib/std/os/windows/shlwapi.zig @@ -0,0 +1,13 @@ +const std = @import("../../std.zig"); +const windows = std.os.windows; + +const DWORD = windows.DWORD; +const WINAPI = windows.WINAPI; +const HRESULT = windows.HRESULT; +const LPCSTR = windows.LPCSTR; +const LPSTR = windows.LPSTR; +const LPWSTR = windows.LPWSTR; +const LPCWSTR = windows.LPCWSTR; + +pub extern "shlwapi" fn PathCreateFromUrlW(pszUrl: LPCWSTR, pszPath: LPWSTR, pcchPath: *DWORD, dwFlags: DWORD) callconv(WINAPI) HRESULT; +pub extern "shlwapi" fn PathCreateFromUrlA(pszUrl: LPCSTR, pszPath: LPSTR, pcchPath: *DWORD, dwFlags: DWORD) callconv(WINAPI) HRESULT; diff --git a/src/Manifest.zig b/src/Manifest.zig index 199663556d4e..2ff54e613266 100644 --- a/src/Manifest.zig +++ b/src/Manifest.zig @@ -2,8 +2,11 @@ pub const basename = "build.zig.zon"; pub const Hash = std.crypto.hash.sha2.Sha256; pub const Dependency = struct { - url: []const u8, - url_tok: Ast.TokenIndex, + location: union(enum) { + url: []const u8, + path: []const u8, + }, + location_tok: Ast.TokenIndex, hash: ?[]const u8, hash_tok: Ast.TokenIndex, }; @@ -218,12 +221,12 @@ const Parse = struct { }; var dep: Dependency = .{ - .url = undefined, - .url_tok = undefined, + .location = undefined, + .location_tok = undefined, .hash = null, .hash_tok = undefined, }; - var have_url = false; + var has_location = false; for (struct_init.ast.fields) |field_init| { const name_token = ast.firstToken(field_init) - 2; @@ -232,12 +235,29 @@ const Parse = struct { // things manually provides an opportunity to do any additional verification // that is desirable on a per-field basis. if (mem.eql(u8, field_name, "url")) { - dep.url = parseString(p, field_init) catch |err| switch (err) { - error.ParseFailure => continue, - else => |e| return e, + if (has_location) { + return fail(p, main_tokens[field_init], "dependency should specify only one of 'url' and 'path' fields.", .{}); + } + dep.location = .{ + .url = parseString(p, field_init) catch |err| switch (err) { + error.ParseFailure => continue, + else => |e| return e, + }, + }; + has_location = true; + dep.location_tok = main_tokens[field_init]; + } else if (mem.eql(u8, field_name, "path")) { + if (has_location) { + return fail(p, main_tokens[field_init], "dependency should specify only one of 'url' and 'path' fields.", .{}); + } + dep.location = .{ + .path = parseString(p, field_init) catch |err| switch (err) { + error.ParseFailure => continue, + else => |e| return e, + }, }; - dep.url_tok = main_tokens[field_init]; - have_url = true; + has_location = true; + dep.location_tok = main_tokens[field_init]; } else if (mem.eql(u8, field_name, "hash")) { dep.hash = parseHash(p, field_init) catch |err| switch (err) { error.ParseFailure => continue, @@ -250,8 +270,8 @@ const Parse = struct { } } - if (!have_url) { - try appendError(p, main_tokens[node], "dependency is missing 'url' field", .{}); + if (!has_location) { + try appendError(p, main_tokens[node], "dependency requires location field, one of 'url' or 'path'.", .{}); } return dep; diff --git a/src/Package.zig b/src/Package.zig index d170baeae509..2429afd07371 100644 --- a/src/Package.zig +++ b/src/Package.zig @@ -316,56 +316,51 @@ pub fn fetchAndAddDependencies( for (manifest.dependencies.keys(), 0..) |name, i| { const dep = deps_list[i]; - const sub = try fetchAndUnpack( - thread_pool, - http_client, + const sub_pkg = try getCachedPackage( + http_client.allocator, global_cache_directory, dep, report, all_modules, root_prog_node, - name, - ); + ) orelse m: { + const mod = try fetchAndUnpack( + thread_pool, + http_client, + directory, + global_cache_directory, + dep, + report, + all_modules, + root_prog_node, + name, + ); - if (sub.mod) |mod| { - if (!sub.found_existing) { - try mod.fetchAndAddDependencies( - deps_pkg, - arena, - thread_pool, - http_client, - mod.root_src_directory, - global_cache_directory, - local_cache_directory, - dependencies_source, - error_bundle, - all_modules, - root_prog_node, - dep.hash.?, - ); - } + try mod.fetchAndAddDependencies( + deps_pkg, + arena, + thread_pool, + http_client, + mod.root_src_directory, + global_cache_directory, + local_cache_directory, + dependencies_source, + error_bundle, + all_modules, + root_prog_node, + dep.hash.?, + ); - try pkg.add(gpa, name, mod); - if (deps_pkg.table.get(dep.hash.?)) |other_sub| { - // This should be the same package (and hence module) since it's the same hash - // TODO: dedup multiple versions of the same package - assert(other_sub == mod); - } else { - try deps_pkg.add(gpa, dep.hash.?, mod); - } - } else if (!sub.found_existing) { - const pkg_dir_sub_path = "p" ++ fs.path.sep_str ++ (dep.hash.?)[0..hex_multihash_len]; - const build_root = try global_cache_directory.join(arena, &.{pkg_dir_sub_path}); - try dependencies_source.writer().print( - \\ pub const {} = struct {{ - \\ pub const build_root = "{}"; - \\ pub const deps: []const struct {{ []const u8, []const u8 }} = &.{{}}; - \\ }}; - \\ - , .{ - std.zig.fmtId(dep.hash.?), - std.zig.fmtEscapes(build_root), - }); + break :m mod; + }; + + try pkg.add(gpa, name, sub_pkg); + if (deps_pkg.table.get(dep.hash.?)) |other_sub| { + // This should be the same package (and hence module) since it's the same hash + // TODO: dedup multiple versions of the same package + assert(other_sub == sub_pkg); + } else { + try deps_pkg.add(gpa, dep.hash.?, sub_pkg); } } @@ -490,6 +485,316 @@ const Report = struct { } }; +const FetchLocation = union(SourceType) { + /// The absolute path to a file or directory. + /// This may be a file that requires unpacking (such as a .tar.gz), + /// or the path to the root directory of a package. + file: []const u8, + http_request: std.Uri, + + pub fn init(gpa: Allocator, uri: std.Uri, directory: Compilation.Directory, dep: Manifest.Dependency, report: Report) !FetchLocation { + const source_type = getPackageSourceType(uri) catch + return report.fail(dep.location_tok, "Unknown scheme: {s}", .{uri.scheme}); + + return switch (source_type) { + .file => f: { + const path = if (builtin.os.tag == .windows) p: { + var uri_str = std.ArrayList(u8).init(gpa); + defer uri_str.deinit(); + try uri.format("+/", .{}, uri_str.writer()); + const uri_str_z = try gpa.dupeZ(u8, uri_str.items); + defer gpa.free(uri_str_z); + + var buf: [std.os.windows.MAX_PATH:0]u8 = undefined; + var buf_len: std.os.windows.DWORD = std.os.windows.MAX_PATH; + const result = std.os.windows.shlwapi.PathCreateFromUrlA(uri_str_z, &buf, &buf_len, 0); + + if (result != std.os.windows.S_OK) return report.fail(dep.location_tok, "Invalid URI", .{}); + + break :p try gpa.dupe(u8, buf[0..buf_len]); + } else try std.Uri.unescapeString(gpa, uri.path); + defer gpa.free(path); + + const new_path = try fs.path.resolve(gpa, &.{ directory.path.?, path }); + + break :f .{ .file = new_path }; + }, + .http_request => r: { + break :r .{ .http_request = uri }; + }, + }; + } + + pub fn deinit(f: *FetchLocation, gpa: Allocator) void { + switch (f.*) { + .file => |path| gpa.free(path), + .http_request => {}, + } + f.* = undefined; + } + + const SourceType = enum { + file, + http_request, + }; + + fn getPackageSourceType(uri: std.Uri) error{UnknownScheme}!SourceType { + const package_source_map = std.ComptimeStringMap( + SourceType, + .{ + .{ "file", .file }, + .{ "http", .http_request }, + .{ "https", .http_request }, + }, + ); + return package_source_map.get(uri.scheme) orelse error.UnknownScheme; + } + + pub fn isDirectory(path: []const u8, root_dir: Compilation.Directory) !bool { + return if (mem.endsWith(u8, path, std.fs.path.sep_str)) + true + else if (std.fs.path.extension(path).len > 0) + false + else d: { + // It's common to write directories without a trailing '/'. + // This is some special casing logic to detect directories if + // the file type cannot be determined from the extension. + var dir = root_dir.handle.openDir(path, .{}) catch |err| switch (err) { + error.NotDir => break :d false, + else => break :d err, + }; + defer dir.close(); + break :d true; + }; + } + + pub fn fetch( + f: FetchLocation, + gpa: Allocator, + root_dir: Compilation.Directory, + http_client: *std.http.Client, + dep: Manifest.Dependency, + report: Report, + ) !ReadableResource { + switch (f) { + .file => |file| { + const is_dir = isDirectory(file, root_dir) catch + return report.fail(dep.location_tok, "File not found: {s}", .{file}); + + return if (is_dir) + .{ + .path = try gpa.dupe(u8, file), + .resource = .{ .directory = try fs.openIterableDirAbsolute(file, .{}) }, + } + else + .{ + .path = try gpa.dupe(u8, file), + .resource = .{ .file = try fs.openFileAbsolute(file, .{}) }, + }; + }, + .http_request => |uri| { + var h = std.http.Headers{ .allocator = gpa }; + defer h.deinit(); + + var req = try http_client.request(.GET, uri, h, .{}); + + try req.start(.{}); + try req.wait(); + + if (req.response.status != .ok) { + return report.fail(dep.location_tok, "Expected response status '200 OK' got '{} {s}'", .{ + @intFromEnum(req.response.status), + req.response.status.phrase() orelse "", + }); + } + + return .{ + .path = try gpa.dupe(u8, uri.path), + .resource = .{ .http_request = req }, + }; + }, + } + } +}; + +const ReadableResource = struct { + path: []const u8, + resource: union(enum) { + file: fs.File, + directory: fs.IterableDir, + http_request: std.http.Client.Request, + }, + + /// Unpack the package into the global cache directory. + /// If `ps` does not require unpacking (for example, if it is a directory), then no caching is performed. + /// In either case, the hash is computed and returned along with the path to the package. + pub fn unpack( + rr: *ReadableResource, + allocator: Allocator, + thread_pool: *ThreadPool, + global_cache_directory: Compilation.Directory, + dep: Manifest.Dependency, + report: Report, + pkg_prog_node: *std.Progress.Node, + ) !PackageLocation { + switch (rr.resource) { + .directory => |dir| { + const actual_hash = try computePackageHash(thread_pool, dir); + return .{ + .hash = actual_hash, + .dir_path = try allocator.dupe(u8, rr.path), + }; + }, + inline .file, .http_request => |*r| { + const s = fs.path.sep_str; + const rand_int = std.crypto.random.int(u64); + const tmp_dir_sub_path = "tmp" ++ s ++ Manifest.hex64(rand_int); + + const actual_hash = h: { + var tmp_directory: Compilation.Directory = d: { + const path = try global_cache_directory.join(allocator, &.{tmp_dir_sub_path}); + errdefer allocator.free(path); + + const iterable_dir = try global_cache_directory.handle.makeOpenPathIterable(tmp_dir_sub_path, .{}); + errdefer iterable_dir.close(); + + break :d .{ + .path = path, + .handle = iterable_dir.dir, + }; + }; + defer tmp_directory.closeAndFree(allocator); + + const opt_content_length = try rr.getSize(); + + var prog_reader: ProgressReader(@TypeOf(r.reader())) = .{ + .child_reader = r.reader(), + .prog_node = pkg_prog_node, + .unit = if (opt_content_length) |content_length| unit: { + const kib = content_length / 1024; + const mib = kib / 1024; + if (mib > 0) { + pkg_prog_node.setEstimatedTotalItems(@intCast(mib)); + pkg_prog_node.setUnit("MiB"); + break :unit .mib; + } else { + pkg_prog_node.setEstimatedTotalItems(@intCast(@max(1, kib))); + pkg_prog_node.setUnit("KiB"); + break :unit .kib; + } + } else .any, + }; + pkg_prog_node.context.refresh(); + + switch (try rr.getFileType(dep, report)) { + .@"tar.gz" => try unpackTarball(allocator, prog_reader, tmp_directory.handle, std.compress.gzip), + // I have not checked what buffer sizes the xz decompression implementation uses + // by default, so the same logic applies for buffering the reader as for gzip. + .@"tar.xz" => try unpackTarball(allocator, prog_reader, tmp_directory.handle, std.compress.xz), + } + + // Unpack completed - stop showing amount as progress + pkg_prog_node.setEstimatedTotalItems(0); + pkg_prog_node.setCompletedItems(0); + pkg_prog_node.context.refresh(); + + // TODO: delete files not included in the package prior to computing the package hash. + // for example, if the ini file has directives to include/not include certain files, + // apply those rules directly to the filesystem right here. This ensures that files + // not protected by the hash are not present on the file system. + + break :h try computePackageHash(thread_pool, .{ .dir = tmp_directory.handle }); + }; + + const pkg_dir_sub_path = "p" ++ s ++ Manifest.hexDigest(actual_hash); + const unpacked_path = try global_cache_directory.join(allocator, &.{pkg_dir_sub_path}); + errdefer allocator.free(unpacked_path); + + const relative_unpacked_path = try fs.path.relative(allocator, global_cache_directory.path.?, unpacked_path); + defer allocator.free(relative_unpacked_path); + try renameTmpIntoCache(global_cache_directory.handle, tmp_dir_sub_path, relative_unpacked_path); + + return .{ + .hash = actual_hash, + .dir_path = unpacked_path, + }; + }, + } + } + + const FileType = enum { + @"tar.gz", + @"tar.xz", + }; + + pub fn getSize(rr: ReadableResource) !?u64 { + switch (rr.resource) { + // TODO: Handle case of chunked content-length + .http_request => |req| return req.response.content_length, + .file => |f| return (try f.metadata()).size(), + .directory => unreachable, + } + } + + pub fn getFileType(rr: ReadableResource, dep: Manifest.Dependency, report: Report) !FileType { + switch (rr.resource) { + .file => { + return if (mem.endsWith(u8, rr.path, ".tar.gz")) + .@"tar.gz" + else if (mem.endsWith(u8, rr.path, ".tar.xz")) + .@"tar.xz" + else + return report.fail(dep.location_tok, "Unknown file type", .{}); + }, + .directory => return error.IsDir, + .http_request => |req| { + const content_type = req.response.headers.getFirstValue("Content-Type") orelse + return report.fail(dep.location_tok, "Missing 'Content-Type' header", .{}); + + // If the response has a different content type than the URI indicates, override + // the previously assumed file type. + return if (ascii.eqlIgnoreCase(content_type, "application/gzip") or + ascii.eqlIgnoreCase(content_type, "application/x-gzip") or + ascii.eqlIgnoreCase(content_type, "application/tar+gzip")) + .@"tar.gz" + else if (ascii.eqlIgnoreCase(content_type, "application/x-xz")) + .@"tar.xz" + else if (ascii.eqlIgnoreCase(content_type, "application/octet-stream")) ty: { + // support gitlab tarball urls such as https://gitlab.com///-/archive//-.tar.gz + // whose content-disposition header is: 'attachment; filename="-.tar.gz"' + const content_disposition = req.response.headers.getFirstValue("Content-Disposition") orelse + return report.fail(dep.location_tok, "Missing 'Content-Disposition' header for Content-Type=application/octet-stream", .{}); + if (mem.startsWith(u8, content_disposition, "attachment;") and + mem.endsWith(u8, content_disposition, ".tar.gz\"")) + { + break :ty .@"tar.gz"; + } else return report.fail(dep.location_tok, "Unsupported 'Content-Disposition' header value: '{s}' for Content-Type=application/octet-stream", .{content_disposition}); + } else return report.fail(dep.location_tok, "Unrecognized value for 'Content-Type' header: {s}", .{content_type}); + }, + } + } + + pub fn deinit(rr: *ReadableResource, gpa: Allocator) void { + gpa.free(rr.path); + switch (rr.resource) { + .file => |file| file.close(), + .directory => |*dir| dir.close(), + .http_request => |*req| req.deinit(), + } + rr.* = undefined; + } +}; + +pub const PackageLocation = struct { + hash: [Manifest.Hash.digest_length]u8, + dir_path: []const u8, + + pub fn deinit(pl: *PackageLocation, allocator: Allocator) void { + allocator.free(pl.dir_path); + pl.* = undefined; + } +}; + const hex_multihash_len = 2 * Manifest.multihash_len; const MultiHashHexDigest = [hex_multihash_len]u8; /// This is to avoid creating multiple modules for the same build.zig file. @@ -542,29 +847,24 @@ fn ProgressReader(comptime ReaderType: type) type { }; } -fn fetchAndUnpack( - thread_pool: *ThreadPool, - http_client: *std.http.Client, +fn getCachedPackage( + gpa: Allocator, global_cache_directory: Compilation.Directory, dep: Manifest.Dependency, report: Report, all_modules: *AllModules, root_prog_node: *std.Progress.Node, - /// This does not have to be any form of canonical or fully-qualified name: it - /// is only intended to be human-readable for progress reporting. - name_for_prog: []const u8, -) !struct { mod: ?*Package, found_existing: bool } { - const gpa = http_client.allocator; +) !?*Package { + _ = report; const s = fs.path.sep_str; - // Check if the expected_hash is already present in the global package // cache, and thereby avoid both fetching and unpacking. - if (dep.hash) |h| cached: { + if (dep.hash) |h| { const hex_digest = h[0..hex_multihash_len]; const pkg_dir_sub_path = "p" ++ s ++ hex_digest; var pkg_dir = global_cache_directory.handle.openDir(pkg_dir_sub_path, .{}) catch |err| switch (err) { - error.FileNotFound => break :cached, + error.FileNotFound => return null, else => |e| return e, }; errdefer pkg_dir.close(); @@ -574,16 +874,7 @@ fn fetchAndUnpack( const gop = try all_modules.getOrPut(gpa, hex_digest.*); if (gop.found_existing) { if (gop.value_ptr.*) |mod| { - return switch (mod) { - .zig_pkg => |pkg| .{ - .mod = pkg, - .found_existing = true, - }, - .non_zig_pkg => .{ - .mod = null, - .found_existing = true, - }, - }; + return mod; } } @@ -615,121 +906,60 @@ fn fetchAndUnpack( .root_src_path = owned_src_path, }; - gop.value_ptr.* = .{ .zig_pkg = ptr }; - return .{ - .mod = ptr, - .found_existing = false, - }; + gop.value_ptr.* = ptr; + return ptr; } + return null; +} + +fn fetchAndUnpack( + thread_pool: *ThreadPool, + http_client: *std.http.Client, + directory: Compilation.Directory, + global_cache_directory: Compilation.Directory, + dep: Manifest.Dependency, + report: Report, + all_modules: *AllModules, + root_prog_node: *std.Progress.Node, + /// This does not have to be any form of canonical or fully-qualified name: it + /// is only intended to be human-readable for progress reporting. + name_for_prog: []const u8, +) !*Package { + const gpa = http_client.allocator; + var pkg_prog_node = root_prog_node.start(name_for_prog, 0); defer pkg_prog_node.end(); pkg_prog_node.activate(); pkg_prog_node.context.refresh(); - const uri = try std.Uri.parse(dep.url); - - const rand_int = std.crypto.random.int(u64); - const tmp_dir_sub_path = "tmp" ++ s ++ Manifest.hex64(rand_int); - - const actual_hash = a: { - var tmp_directory: Compilation.Directory = d: { - const path = try global_cache_directory.join(gpa, &.{tmp_dir_sub_path}); - errdefer gpa.free(path); - - const iterable_dir = try global_cache_directory.handle.makeOpenPathIterable(tmp_dir_sub_path, .{}); - errdefer iterable_dir.close(); - - break :d .{ - .path = path, - .handle = iterable_dir.dir, - }; - }; - defer tmp_directory.closeAndFree(gpa); - - var h = std.http.Headers{ .allocator = gpa }; - defer h.deinit(); - - var req = try http_client.request(.GET, uri, h, .{}); - defer req.deinit(); - - try req.start(.{}); - try req.wait(); - - if (req.response.status != .ok) { - return report.fail(dep.url_tok, "Expected response status '200 OK' got '{} {s}'", .{ - @intFromEnum(req.response.status), - req.response.status.phrase() orelse "", - }); - } - - const content_type = req.response.headers.getFirstValue("Content-Type") orelse - return report.fail(dep.url_tok, "Missing 'Content-Type' header", .{}); - - var prog_reader: ProgressReader(std.http.Client.Request.Reader) = .{ - .child_reader = req.reader(), - .prog_node = &pkg_prog_node, - .unit = if (req.response.content_length) |content_length| unit: { - const kib = content_length / 1024; - const mib = kib / 1024; - if (mib > 0) { - pkg_prog_node.setEstimatedTotalItems(@intCast(mib)); - pkg_prog_node.setUnit("MiB"); - break :unit .mib; - } else { - pkg_prog_node.setEstimatedTotalItems(@intCast(@max(1, kib))); - pkg_prog_node.setUnit("KiB"); - break :unit .kib; - } - } else .any, - }; - pkg_prog_node.context.refresh(); - - if (ascii.eqlIgnoreCase(content_type, "application/gzip") or - ascii.eqlIgnoreCase(content_type, "application/x-gzip") or - ascii.eqlIgnoreCase(content_type, "application/tar+gzip")) - { - // I observed the gzip stream to read 1 byte at a time, so I am using a - // buffered reader on the front of it. - try unpackTarball(gpa, prog_reader.reader(), tmp_directory.handle, std.compress.gzip); - } else if (ascii.eqlIgnoreCase(content_type, "application/x-xz")) { - // I have not checked what buffer sizes the xz decompression implementation uses - // by default, so the same logic applies for buffering the reader as for gzip. - try unpackTarball(gpa, prog_reader.reader(), tmp_directory.handle, std.compress.xz); - } else if (ascii.eqlIgnoreCase(content_type, "application/octet-stream")) { - // support gitlab tarball urls such as https://gitlab.com///-/archive//-.tar.gz - // whose content-disposition header is: 'attachment; filename="-.tar.gz"' - const content_disposition = req.response.headers.getFirstValue("Content-Disposition") orelse - return report.fail(dep.url_tok, "Missing 'Content-Disposition' header for Content-Type=application/octet-stream", .{}); - if (isTarAttachment(content_disposition)) { - try unpackTarball(gpa, prog_reader.reader(), tmp_directory.handle, std.compress.gzip); - } else return report.fail(dep.url_tok, "Unsupported 'Content-Disposition' header value: '{s}' for Content-Type=application/octet-stream", .{content_disposition}); - } else { - return report.fail(dep.url_tok, "Unsupported 'Content-Type' header value: '{s}'", .{content_type}); - } - - // Download completed - stop showing downloaded amount as progress - pkg_prog_node.setEstimatedTotalItems(0); - pkg_prog_node.setCompletedItems(0); - pkg_prog_node.context.refresh(); - - // TODO: delete files not included in the package prior to computing the package hash. - // for example, if the ini file has directives to include/not include certain files, - // apply those rules directly to the filesystem right here. This ensures that files - // not protected by the hash are not present on the file system. + const uri = switch (dep.location) { + .url => |url| std.Uri.parse(url) catch |err| switch (err) { + error.UnexpectedCharacter => return report.fail(dep.location_tok, "failed to parse dependency location as URI.", .{}), + else => return err, + }, + .path => |path| std.Uri{ + .scheme = "file", + .user = null, + .password = null, + .host = null, + .port = null, + .path = path, + .query = null, + .fragment = null, + }, + }; - // TODO: raise an error for files that have illegal paths on some operating systems. - // For example, on Linux a path with a backslash should raise an error here. - // Of course, if the ignore rules above omit the file from the package, then everything - // is fine and no error should be raised. + var fetch_location = try FetchLocation.init(gpa, uri, directory, dep, report); + defer fetch_location.deinit(gpa); - break :a try computePackageHash(thread_pool, .{ .dir = tmp_directory.handle }); - }; + var readable_resource = try fetch_location.fetch(gpa, directory, http_client, dep, report); + defer readable_resource.deinit(gpa); - const pkg_dir_sub_path = "p" ++ s ++ Manifest.hexDigest(actual_hash); - try renameTmpIntoCache(global_cache_directory.handle, tmp_dir_sub_path, pkg_dir_sub_path); + var package_location = try readable_resource.unpack(gpa, thread_pool, global_cache_directory, dep, report, &pkg_prog_node); + defer package_location.deinit(gpa); - const actual_hex = Manifest.hexDigest(actual_hash); + const actual_hex = Manifest.hexDigest(package_location.hash); if (dep.hash) |h| { if (!mem.eql(u8, h, &actual_hex)) { return report.fail(dep.hash_tok, "hash mismatch: expected: {s}, found: {s}", .{ @@ -743,9 +973,9 @@ fn fetchAndUnpack( const eb = report.error_bundle; const notes_len = 1; try Report.addErrorMessage(report.ast.*, file_path, eb, notes_len, .{ - .tok = dep.url_tok, + .tok = dep.location_tok, .off = 0, - .msg = "url field is missing corresponding hash field", + .msg = "dependency is missing hash field", }); const notes_start = try eb.reserveNotes(notes_len); eb.extra.items[notes_start] = @intFromEnum(try eb.addErrorMessage(.{ @@ -754,35 +984,24 @@ fn fetchAndUnpack( return error.PackageFetchFailed; } - const build_zig_path = try std.fs.path.join(gpa, &.{ pkg_dir_sub_path, build_zig_basename }); - defer gpa.free(build_zig_path); - - global_cache_directory.handle.access(build_zig_path, .{}) catch |err| switch (err) { - error.FileNotFound => { - try all_modules.put(gpa, actual_hex, .non_zig_pkg); - return .{ - .mod = null, - .found_existing = false, - }; - }, - else => return err, - }; + const gop = try all_modules.getOrPut(gpa, actual_hex); - const mod = try createWithDir(gpa, global_cache_directory, pkg_dir_sub_path, build_zig_basename); - try all_modules.put(gpa, actual_hex, .{ .zig_pkg = mod }); - return .{ - .mod = mod, - .found_existing = false, - }; + if (gop.found_existing and gop.value_ptr.* != null) { + return gop.value_ptr.*.?; + } else { + const module = try create(gpa, package_location.dir_path, build_zig_basename); + gop.value_ptr.* = module; + return module; + } } fn unpackTarball( gpa: Allocator, - req_reader: anytype, + reader: anytype, out_dir: fs.Dir, comptime compression: type, ) !void { - var br = std.io.bufferedReaderSize(std.crypto.tls.max_ciphertext_record_len, req_reader); + var br = std.io.bufferedReaderSize(std.crypto.tls.max_ciphertext_record_len, reader); var decompress = try compression.decompress(gpa, br.reader()); defer decompress.deinit(); From 2f0e5b00b086981eafc0f9d2be573943495158bf Mon Sep 17 00:00:00 2001 From: Adam Goertz Date: Sun, 24 Sep 2023 01:24:49 +0000 Subject: [PATCH 2/5] Allow only relative paths. This commit makes the following changes: * Disallow file:/// URIs * Allow only relative paths in the .path field of build.zig.zon * Remote now-unneeded shlwapi dependency --- build.zig | 2 - lib/std/Uri.zig | 2 +- lib/std/os/windows.zig | 1 - lib/std/os/windows/shlwapi.zig | 13 - src/Package.zig | 479 +++++++++++++++++---------------- 5 files changed, 249 insertions(+), 248 deletions(-) delete mode 100644 lib/std/os/windows/shlwapi.zig diff --git a/build.zig b/build.zig index c4e9d1937304..8777060a89df 100644 --- a/build.zig +++ b/build.zig @@ -336,7 +336,6 @@ pub fn build(b: *std.Build) !void { artifact.linkSystemLibrary("version"); artifact.linkSystemLibrary("uuid"); artifact.linkSystemLibrary("ole32"); - artifact.linkSystemLibrary("shlwapi"); } } } @@ -713,7 +712,6 @@ fn addStaticLlvmOptionsToExe(exe: *std.Build.Step.Compile) !void { exe.linkSystemLibrary("version"); exe.linkSystemLibrary("uuid"); exe.linkSystemLibrary("ole32"); - exe.linkSystemLibrary("shlwapi"); } } diff --git a/lib/std/Uri.zig b/lib/std/Uri.zig index e2c23e5a1dc5..83e7da76494b 100644 --- a/lib/std/Uri.zig +++ b/lib/std/Uri.zig @@ -150,7 +150,7 @@ pub fn parseWithoutScheme(text: []const u8) ParseError!Uri { std.debug.assert(reader.get().? == '/'); std.debug.assert(reader.get().? == '/'); - var authority = reader.readUntil(isAuthoritySeparator); + const authority = reader.readUntil(isAuthoritySeparator); if (authority.len == 0) { if (reader.peekPrefix("/")) break :a else return error.InvalidFormat; } diff --git a/lib/std/os/windows.zig b/lib/std/os/windows.zig index 3522f238ecec..d40fee8db241 100644 --- a/lib/std/os/windows.zig +++ b/lib/std/os/windows.zig @@ -30,7 +30,6 @@ pub const gdi32 = @import("windows/gdi32.zig"); pub const winmm = @import("windows/winmm.zig"); pub const crypt32 = @import("windows/crypt32.zig"); pub const nls = @import("windows/nls.zig"); -pub const shlwapi = @import("windows/shlwapi.zig"); pub const self_process_handle = @as(HANDLE, @ptrFromInt(maxInt(usize))); diff --git a/lib/std/os/windows/shlwapi.zig b/lib/std/os/windows/shlwapi.zig deleted file mode 100644 index 0f0ceed57605..000000000000 --- a/lib/std/os/windows/shlwapi.zig +++ /dev/null @@ -1,13 +0,0 @@ -const std = @import("../../std.zig"); -const windows = std.os.windows; - -const DWORD = windows.DWORD; -const WINAPI = windows.WINAPI; -const HRESULT = windows.HRESULT; -const LPCSTR = windows.LPCSTR; -const LPSTR = windows.LPSTR; -const LPWSTR = windows.LPWSTR; -const LPCWSTR = windows.LPCWSTR; - -pub extern "shlwapi" fn PathCreateFromUrlW(pszUrl: LPCWSTR, pszPath: LPWSTR, pcchPath: *DWORD, dwFlags: DWORD) callconv(WINAPI) HRESULT; -pub extern "shlwapi" fn PathCreateFromUrlA(pszUrl: LPCSTR, pszPath: LPSTR, pcchPath: *DWORD, dwFlags: DWORD) callconv(WINAPI) HRESULT; diff --git a/src/Package.zig b/src/Package.zig index 2429afd07371..b8b95a4f5139 100644 --- a/src/Package.zig +++ b/src/Package.zig @@ -245,8 +245,6 @@ pub fn fetchAndAddDependencies( error.FileNotFound => { // Handle the same as no dependencies. if (this_hash) |hash| { - const pkg_dir_sub_path = "p" ++ fs.path.sep_str ++ hash[0..hex_multihash_len]; - const build_root = try global_cache_directory.join(arena, &.{pkg_dir_sub_path}); try dependencies_source.writer().print( \\ pub const {} = struct {{ \\ pub const build_root = "{}"; @@ -256,7 +254,7 @@ pub fn fetchAndAddDependencies( \\ , .{ std.zig.fmtId(hash), - std.zig.fmtEscapes(build_root), + std.zig.fmtEscapes(pkg.root_src_directory.path.?), std.zig.fmtEscapes(hash), }); } else { @@ -312,19 +310,15 @@ pub fn fetchAndAddDependencies( try dependencies_source.writer().writeAll("pub const packages = struct {\n"); } - const deps_list = manifest.dependencies.values(); - for (manifest.dependencies.keys(), 0..) |name, i| { - const dep = deps_list[i]; - - const sub_pkg = try getCachedPackage( - http_client.allocator, + for (manifest.dependencies.keys(), manifest.dependencies.values()) |name, *dep| { + const sub_mod, const found_existing = try getCachedPackage( + arena, global_cache_directory, - dep, - report, + dep.*, all_modules, root_prog_node, - ) orelse m: { - const mod = try fetchAndUnpack( + ) orelse .{ + try fetchAndUnpack( thread_pool, http_client, directory, @@ -334,39 +328,58 @@ pub fn fetchAndAddDependencies( all_modules, root_prog_node, name, - ); - - try mod.fetchAndAddDependencies( - deps_pkg, - arena, - thread_pool, - http_client, - mod.root_src_directory, - global_cache_directory, - local_cache_directory, - dependencies_source, - error_bundle, - all_modules, - root_prog_node, - dep.hash.?, - ); - - break :m mod; + ), + false, }; - try pkg.add(gpa, name, sub_pkg); - if (deps_pkg.table.get(dep.hash.?)) |other_sub| { - // This should be the same package (and hence module) since it's the same hash - // TODO: dedup multiple versions of the same package - assert(other_sub == sub_pkg); - } else { - try deps_pkg.add(gpa, dep.hash.?, sub_pkg); + assert(dep.hash != null); + + switch (sub_mod) { + .zig_pkg => |sub_pkg| { + if (!found_existing) { + try sub_pkg.fetchAndAddDependencies( + deps_pkg, + arena, + thread_pool, + http_client, + sub_pkg.root_src_directory, + global_cache_directory, + local_cache_directory, + dependencies_source, + error_bundle, + all_modules, + root_prog_node, + dep.hash.?, + ); + } + + try pkg.add(gpa, name, sub_pkg); + if (deps_pkg.table.get(dep.hash.?)) |other_sub| { + // This should be the same package (and hence module) since it's the same hash + // TODO: dedup multiple versions of the same package + assert(other_sub == sub_pkg); + } else { + try deps_pkg.add(gpa, dep.hash.?, sub_pkg); + } + }, + .non_zig_pkg => |sub_pkg| { + if (!found_existing) { + try dependencies_source.writer().print( + \\ pub const {} = struct {{ + \\ pub const build_root = "{}"; + \\ pub const deps: []const struct {{ []const u8, []const u8 }} = &.{{}}; + \\ }}; + \\ + , .{ + std.zig.fmtId(dep.hash.?), + std.zig.fmtEscapes(sub_pkg.root_src_directory.path.?), + }); + } + }, } } if (this_hash) |hash| { - const pkg_dir_sub_path = "p" ++ fs.path.sep_str ++ hash[0..hex_multihash_len]; - const build_root = try global_cache_directory.join(arena, &.{pkg_dir_sub_path}); try dependencies_source.writer().print( \\ pub const {} = struct {{ \\ pub const build_root = "{}"; @@ -375,7 +388,7 @@ pub fn fetchAndAddDependencies( \\ , .{ std.zig.fmtId(hash), - std.zig.fmtEscapes(build_root), + std.zig.fmtEscapes(pkg.root_src_directory.path.?), std.zig.fmtEscapes(hash), }); for (manifest.dependencies.keys(), manifest.dependencies.values()) |name, dep| { @@ -485,44 +498,40 @@ const Report = struct { } }; -const FetchLocation = union(SourceType) { +const FetchLocation = union(enum) { /// The absolute path to a file or directory. /// This may be a file that requires unpacking (such as a .tar.gz), /// or the path to the root directory of a package. file: []const u8, http_request: std.Uri, - pub fn init(gpa: Allocator, uri: std.Uri, directory: Compilation.Directory, dep: Manifest.Dependency, report: Report) !FetchLocation { - const source_type = getPackageSourceType(uri) catch - return report.fail(dep.location_tok, "Unknown scheme: {s}", .{uri.scheme}); - - return switch (source_type) { - .file => f: { - const path = if (builtin.os.tag == .windows) p: { - var uri_str = std.ArrayList(u8).init(gpa); - defer uri_str.deinit(); - try uri.format("+/", .{}, uri_str.writer()); - const uri_str_z = try gpa.dupeZ(u8, uri_str.items); - defer gpa.free(uri_str_z); - - var buf: [std.os.windows.MAX_PATH:0]u8 = undefined; - var buf_len: std.os.windows.DWORD = std.os.windows.MAX_PATH; - const result = std.os.windows.shlwapi.PathCreateFromUrlA(uri_str_z, &buf, &buf_len, 0); - - if (result != std.os.windows.S_OK) return report.fail(dep.location_tok, "Invalid URI", .{}); - - break :p try gpa.dupe(u8, buf[0..buf_len]); - } else try std.Uri.unescapeString(gpa, uri.path); - defer gpa.free(path); + pub fn init(gpa: Allocator, directory: Compilation.Directory, dep: Manifest.Dependency, report: Report) !FetchLocation { + switch (dep.location) { + .url => |url| { + const uri = std.Uri.parse(url) catch |err| switch (err) { + error.UnexpectedCharacter => return report.fail(dep.location_tok, "failed to parse dependency location as URI", .{}), + else => return err, + }; + if (ascii.eqlIgnoreCase(uri.scheme, "file")) { + return report.fail(dep.location_tok, "'file' scheme is not allowed for URLs. Use '.path' instead", .{}); + } + return .{ .http_request = uri }; + }, + .path => |path| { + const unescaped = try std.Uri.unescapeString(gpa, path); + defer gpa.free(unescaped); + const unnormalized_path = try unnormalizePath(gpa, unescaped); + defer gpa.free(unnormalized_path); + + if (fs.path.isAbsolute(unnormalized_path)) { + return report.fail(dep.location_tok, "Absolute paths are not allowed. Use a relative path instead", .{}); + } - const new_path = try fs.path.resolve(gpa, &.{ directory.path.?, path }); + const new_path = try fs.path.resolve(gpa, &.{ directory.path.?, unnormalized_path }); - break :f .{ .file = new_path }; + return .{ .file = new_path }; }, - .http_request => r: { - break :r .{ .http_request = uri }; - }, - }; + } } pub fn deinit(f: *FetchLocation, gpa: Allocator) void { @@ -533,41 +542,6 @@ const FetchLocation = union(SourceType) { f.* = undefined; } - const SourceType = enum { - file, - http_request, - }; - - fn getPackageSourceType(uri: std.Uri) error{UnknownScheme}!SourceType { - const package_source_map = std.ComptimeStringMap( - SourceType, - .{ - .{ "file", .file }, - .{ "http", .http_request }, - .{ "https", .http_request }, - }, - ); - return package_source_map.get(uri.scheme) orelse error.UnknownScheme; - } - - pub fn isDirectory(path: []const u8, root_dir: Compilation.Directory) !bool { - return if (mem.endsWith(u8, path, std.fs.path.sep_str)) - true - else if (std.fs.path.extension(path).len > 0) - false - else d: { - // It's common to write directories without a trailing '/'. - // This is some special casing logic to detect directories if - // the file type cannot be determined from the extension. - var dir = root_dir.handle.openDir(path, .{}) catch |err| switch (err) { - error.NotDir => break :d false, - else => break :d err, - }; - defer dir.close(); - break :d true; - }; - } - pub fn fetch( f: FetchLocation, gpa: Allocator, @@ -578,25 +552,28 @@ const FetchLocation = union(SourceType) { ) !ReadableResource { switch (f) { .file => |file| { - const is_dir = isDirectory(file, root_dir) catch - return report.fail(dep.location_tok, "File not found: {s}", .{file}); + const is_dir = isDirectory(root_dir, file) catch |err| switch (err) { + error.FileNotFound => return report.fail(dep.location_tok, "File not found: {s}", .{file}), + else => return err, + }; - return if (is_dir) - .{ - .path = try gpa.dupe(u8, file), - .resource = .{ .directory = try fs.openIterableDirAbsolute(file, .{}) }, - } - else - .{ - .path = try gpa.dupe(u8, file), - .resource = .{ .file = try fs.openFileAbsolute(file, .{}) }, - }; + const owned_path = try gpa.dupe(u8, file); + errdefer gpa.free(owned_path); + + return .{ + .path = owned_path, + .resource = if (is_dir) + .{ .directory = try fs.openIterableDirAbsolute(file, .{}) } + else + .{ .file = try fs.openFileAbsolute(file, .{}) }, + }; }, .http_request => |uri| { var h = std.http.Headers{ .allocator = gpa }; defer h.deinit(); var req = try http_client.request(.GET, uri, h, .{}); + errdefer req.deinit(); try req.start(.{}); try req.wait(); @@ -638,10 +615,9 @@ const ReadableResource = struct { pkg_prog_node: *std.Progress.Node, ) !PackageLocation { switch (rr.resource) { - .directory => |dir| { - const actual_hash = try computePackageHash(thread_pool, dir); + .directory => { return .{ - .hash = actual_hash, + .hash = computePathHash(rr.path), .dir_path = try allocator.dupe(u8, rr.path), }; }, @@ -739,11 +715,7 @@ const ReadableResource = struct { pub fn getFileType(rr: ReadableResource, dep: Manifest.Dependency, report: Report) !FileType { switch (rr.resource) { .file => { - return if (mem.endsWith(u8, rr.path, ".tar.gz")) - .@"tar.gz" - else if (mem.endsWith(u8, rr.path, ".tar.xz")) - .@"tar.xz" - else + return fileTypeFromPath(rr.path) orelse return report.fail(dep.location_tok, "Unknown file type", .{}); }, .directory => return error.IsDir, @@ -764,16 +736,40 @@ const ReadableResource = struct { // whose content-disposition header is: 'attachment; filename="-.tar.gz"' const content_disposition = req.response.headers.getFirstValue("Content-Disposition") orelse return report.fail(dep.location_tok, "Missing 'Content-Disposition' header for Content-Type=application/octet-stream", .{}); - if (mem.startsWith(u8, content_disposition, "attachment;") and - mem.endsWith(u8, content_disposition, ".tar.gz\"")) - { - break :ty .@"tar.gz"; - } else return report.fail(dep.location_tok, "Unsupported 'Content-Disposition' header value: '{s}' for Content-Type=application/octet-stream", .{content_disposition}); + break :ty getAttachmentType(content_disposition) orelse + return report.fail(dep.location_tok, "Unsupported 'Content-Disposition' header value: '{s}' for Content-Type=application/octet-stream", .{content_disposition}); } else return report.fail(dep.location_tok, "Unrecognized value for 'Content-Type' header: {s}", .{content_type}); }, } } + fn fileTypeFromPath(file_path: []const u8) ?FileType { + return if (ascii.endsWithIgnoreCase(file_path, ".tar.gz")) + .@"tar.gz" + else if (ascii.endsWithIgnoreCase(file_path, ".tar.xz")) + .@"tar.xz" + else + null; + } + + fn getAttachmentType(content_disposition: []const u8) ?FileType { + const disposition_type_end = ascii.indexOfIgnoreCase(content_disposition, "attachment;") orelse return null; + + var value_start = ascii.indexOfIgnoreCasePos(content_disposition, disposition_type_end + 1, "filename") orelse return null; + value_start += "filename".len; + if (content_disposition[value_start] == '*') { + value_start += 1; + } + if (content_disposition[value_start] != '=') return null; + value_start += 1; + + var value_end = mem.indexOfPos(u8, content_disposition, value_start, ";") orelse content_disposition.len; + if (content_disposition[value_end - 1] == '\"') { + value_end -= 1; + } + return fileTypeFromPath(content_disposition[value_start..value_end]); + } + pub fn deinit(rr: *ReadableResource, gpa: Allocator) void { gpa.free(rr.path); switch (rr.resource) { @@ -786,6 +782,8 @@ const ReadableResource = struct { }; pub const PackageLocation = struct { + /// For packages that require unpacking, this is the hash of the package contents. + /// For directories, this is the hash of the absolute file path. hash: [Manifest.Hash.digest_length]u8, dir_path: []const u8, @@ -797,13 +795,15 @@ pub const PackageLocation = struct { const hex_multihash_len = 2 * Manifest.multihash_len; const MultiHashHexDigest = [hex_multihash_len]u8; + +const DependencyModule = union(enum) { + zig_pkg: *Package, + non_zig_pkg: *Package, +}; /// This is to avoid creating multiple modules for the same build.zig file. /// If the value is `null`, the package is a known dependency, but has not yet /// been fetched. -pub const AllModules = std.AutoHashMapUnmanaged(MultiHashHexDigest, ?union(enum) { - zig_pkg: *Package, - non_zig_pkg: void, -}); +pub const AllModules = std.AutoHashMapUnmanaged(MultiHashHexDigest, ?DependencyModule); fn ProgressReader(comptime ReaderType: type) type { return struct { @@ -847,15 +847,18 @@ fn ProgressReader(comptime ReaderType: type) type { }; } +/// Get a cached package if it exists. +/// Returns `null` if the package has not been cached +/// If the package exists in the cache, returns a pointer to the package and a +/// boolean indicating whether this package has already been seen in the build +/// (i.e. whether or not its transitive dependencies have been fetched). fn getCachedPackage( gpa: Allocator, global_cache_directory: Compilation.Directory, dep: Manifest.Dependency, - report: Report, all_modules: *AllModules, root_prog_node: *std.Progress.Node, -) !?*Package { - _ = report; +) !?struct { DependencyModule, bool } { const s = fs.path.sep_str; // Check if the expected_hash is already present in the global package // cache, and thereby avoid both fetching and unpacking. @@ -874,27 +877,21 @@ fn getCachedPackage( const gop = try all_modules.getOrPut(gpa, hex_digest.*); if (gop.found_existing) { if (gop.value_ptr.*) |mod| { - return mod; + return .{ mod, true }; } } - pkg_dir.access(build_zig_basename, .{}) catch { - gop.value_ptr.* = .non_zig_pkg; - return .{ - .mod = null, - .found_existing = false, - }; - }; + root_prog_node.completeOne(); + + const is_zig_mod = if (pkg_dir.access(build_zig_basename, .{})) |_| true else |_| false; const build_root = try global_cache_directory.join(gpa, &.{pkg_dir_sub_path}); errdefer gpa.free(build_root); - root_prog_node.completeOne(); - const ptr = try gpa.create(Package); errdefer gpa.destroy(ptr); - const owned_src_path = try gpa.dupe(u8, build_zig_basename); + const owned_src_path = if (is_zig_mod) try gpa.dupe(u8, build_zig_basename) else ""; errdefer gpa.free(owned_src_path); ptr.* = .{ @@ -906,8 +903,12 @@ fn getCachedPackage( .root_src_path = owned_src_path, }; - gop.value_ptr.* = ptr; - return ptr; + gop.value_ptr.* = if (is_zig_mod) + .{ .zig_pkg = ptr } + else + .{ .non_zig_pkg = ptr }; + + return .{ gop.value_ptr.*.?, false }; } return null; @@ -918,14 +919,14 @@ fn fetchAndUnpack( http_client: *std.http.Client, directory: Compilation.Directory, global_cache_directory: Compilation.Directory, - dep: Manifest.Dependency, + dep: *Manifest.Dependency, report: Report, all_modules: *AllModules, root_prog_node: *std.Progress.Node, /// This does not have to be any form of canonical or fully-qualified name: it /// is only intended to be human-readable for progress reporting. name_for_prog: []const u8, -) !*Package { +) !DependencyModule { const gpa = http_client.allocator; var pkg_prog_node = root_prog_node.start(name_for_prog, 0); @@ -933,66 +934,65 @@ fn fetchAndUnpack( pkg_prog_node.activate(); pkg_prog_node.context.refresh(); - const uri = switch (dep.location) { - .url => |url| std.Uri.parse(url) catch |err| switch (err) { - error.UnexpectedCharacter => return report.fail(dep.location_tok, "failed to parse dependency location as URI.", .{}), - else => return err, - }, - .path => |path| std.Uri{ - .scheme = "file", - .user = null, - .password = null, - .host = null, - .port = null, - .path = path, - .query = null, - .fragment = null, - }, - }; - - var fetch_location = try FetchLocation.init(gpa, uri, directory, dep, report); + var fetch_location = try FetchLocation.init(gpa, directory, dep.*, report); defer fetch_location.deinit(gpa); - var readable_resource = try fetch_location.fetch(gpa, directory, http_client, dep, report); + var readable_resource = try fetch_location.fetch(gpa, directory, http_client, dep.*, report); defer readable_resource.deinit(gpa); - var package_location = try readable_resource.unpack(gpa, thread_pool, global_cache_directory, dep, report, &pkg_prog_node); + var package_location = try readable_resource.unpack(gpa, thread_pool, global_cache_directory, dep.*, report, &pkg_prog_node); defer package_location.deinit(gpa); const actual_hex = Manifest.hexDigest(package_location.hash); - if (dep.hash) |h| { - if (!mem.eql(u8, h, &actual_hex)) { - return report.fail(dep.hash_tok, "hash mismatch: expected: {s}, found: {s}", .{ - h, actual_hex, + if (readable_resource.resource != .directory) { + if (dep.hash) |h| { + if (!mem.eql(u8, h, &actual_hex)) { + return report.fail(dep.hash_tok, "hash mismatch: expected: {s}, found: {s}", .{ + h, actual_hex, + }); + } + } else { + const file_path = try report.directory.join(gpa, &.{Manifest.basename}); + defer gpa.free(file_path); + + const eb = report.error_bundle; + const notes_len = 1; + try Report.addErrorMessage(report.ast.*, file_path, eb, notes_len, .{ + .tok = dep.location_tok, + .off = 0, + .msg = "dependency is missing hash field", }); + const notes_start = try eb.reserveNotes(notes_len); + eb.extra.items[notes_start] = @intFromEnum(try eb.addErrorMessage(.{ + .msg = try eb.printString("expected .hash = \"{s}\",", .{&actual_hex}), + })); + return error.PackageFetchFailed; } } else { - const file_path = try report.directory.join(gpa, &.{Manifest.basename}); - defer gpa.free(file_path); - - const eb = report.error_bundle; - const notes_len = 1; - try Report.addErrorMessage(report.ast.*, file_path, eb, notes_len, .{ - .tok = dep.location_tok, - .off = 0, - .msg = "dependency is missing hash field", - }); - const notes_start = try eb.reserveNotes(notes_len); - eb.extra.items[notes_start] = @intFromEnum(try eb.addErrorMessage(.{ - .msg = try eb.printString("expected .hash = \"{s}\",", .{&actual_hex}), - })); - return error.PackageFetchFailed; + if (dep.hash != null) { + return report.fail(dep.hash_tok, "hash not allowed for directory package", .{}); + } + // Since directory dependencies don't provide a hash in build.zig.zon, + // set the hash here to be the hash of the absolute path to the dependency. + dep.hash = try gpa.dupe(u8, &actual_hex); } - const gop = try all_modules.getOrPut(gpa, actual_hex); + const build_zig_path = try std.fs.path.join(gpa, &.{ package_location.dir_path, build_zig_basename }); + defer gpa.free(build_zig_path); + assert(fs.path.isAbsolute(build_zig_path)); - if (gop.found_existing and gop.value_ptr.* != null) { - return gop.value_ptr.*.?; - } else { - const module = try create(gpa, package_location.dir_path, build_zig_basename); - gop.value_ptr.* = module; - return module; - } + global_cache_directory.handle.access(build_zig_path, .{}) catch |err| switch (err) { + error.FileNotFound => { + const module = try create(gpa, package_location.dir_path, ""); + try all_modules.put(gpa, actual_hex, .{ .non_zig_pkg = module }); + return .{ .non_zig_pkg = module }; + }, + else => return err, + }; + + const module = try create(gpa, package_location.dir_path, build_zig_basename); + try all_modules.put(gpa, actual_hex, .{ .zig_pkg = module }); + return .{ .zig_pkg = module }; } fn unpackTarball( @@ -1092,6 +1092,22 @@ fn computePackageHash( return hasher.finalResult(); } +/// Compute the hash of a file path. +fn computePathHash(path: []const u8) [Manifest.Hash.digest_length]u8 { + var hasher = Manifest.Hash.init(.{}); + hasher.update(path); + return hasher.finalResult(); +} + +fn isDirectory(root_dir: Compilation.Directory, path: []const u8) !bool { + var dir = root_dir.handle.openDir(path, .{}) catch |err| switch (err) { + error.NotDir => return false, + else => return err, + }; + defer dir.close(); + return true; +} + /// Make a file system path identical independently of operating system path inconsistencies. /// This converts backslashes into forward slashes. fn normalizePath(arena: Allocator, fs_path: []const u8) ![]const u8 { @@ -1110,6 +1126,25 @@ fn normalizePath(arena: Allocator, fs_path: []const u8) ![]const u8 { return normalized; } +/// Make a OS-specific file system path +/// This performs the inverse operation of normalizePath, +/// converting forward slashes into backslashes on Windows +fn unnormalizePath(arena: Allocator, fs_path: []const u8) ![]const u8 { + const canonical_sep = '/'; + + const unnormalized = try arena.dupe(u8, fs_path); + if (fs.path.sep == canonical_sep) + return unnormalized; + + for (unnormalized) |*byte| { + switch (byte.*) { + canonical_sep => byte.* = fs.path.sep, + else => continue, + } + } + return unnormalized; +} + fn workerHashFile(dir: fs.Dir, hashed_file: *HashedFile, wg: *WaitGroup) void { defer wg.finish(); hashed_file.failure = hashFileFallible(dir, hashed_file); @@ -1172,36 +1207,18 @@ fn renameTmpIntoCache( } } -fn isTarAttachment(content_disposition: []const u8) bool { - const disposition_type_end = ascii.indexOfIgnoreCase(content_disposition, "attachment;") orelse return false; - - var value_start = ascii.indexOfIgnoreCasePos(content_disposition, disposition_type_end + 1, "filename") orelse return false; - value_start += "filename".len; - if (content_disposition[value_start] == '*') { - value_start += 1; - } - if (content_disposition[value_start] != '=') return false; - value_start += 1; - - var value_end = mem.indexOfPos(u8, content_disposition, value_start, ";") orelse content_disposition.len; - if (content_disposition[value_end - 1] == '\"') { - value_end -= 1; - } - return ascii.endsWithIgnoreCase(content_disposition[value_start..value_end], ".tar.gz"); -} - -test "isTarAttachment" { - try std.testing.expect(isTarAttachment("attaChment; FILENAME=\"stuff.tar.gz\"; size=42")); - try std.testing.expect(isTarAttachment("attachment; filename*=\"stuff.tar.gz\"")); - try std.testing.expect(isTarAttachment("ATTACHMENT; filename=\"stuff.tar.gz\"")); - try std.testing.expect(isTarAttachment("attachment; FileName=\"stuff.tar.gz\"")); - try std.testing.expect(isTarAttachment("attachment; FileName*=UTF-8\'\'xyz%2Fstuff.tar.gz")); - - try std.testing.expect(!isTarAttachment("attachment FileName=\"stuff.tar.gz\"")); - try std.testing.expect(!isTarAttachment("attachment; FileName=\"stuff.tar\"")); - try std.testing.expect(!isTarAttachment("attachment; FileName\"stuff.gz\"")); - try std.testing.expect(!isTarAttachment("attachment; size=42")); - try std.testing.expect(!isTarAttachment("inline; size=42")); - try std.testing.expect(!isTarAttachment("FileName=\"stuff.tar.gz\"; attachment;")); - try std.testing.expect(!isTarAttachment("FileName=\"stuff.tar.gz\";")); +test "getAttachmentType" { + try std.testing.expectEqual(@as(?ReadableResource.FileType, .@"tar.gz"), ReadableResource.getAttachmentType("attaChment; FILENAME=\"stuff.tar.gz\"; size=42")); + try std.testing.expectEqual(@as(?ReadableResource.FileType, .@"tar.gz"), ReadableResource.getAttachmentType("attachment; filename*=\"stuff.tar.gz\"")); + try std.testing.expectEqual(@as(?ReadableResource.FileType, .@"tar.xz"), ReadableResource.getAttachmentType("ATTACHMENT; filename=\"stuff.tar.xz\"")); + try std.testing.expectEqual(@as(?ReadableResource.FileType, .@"tar.xz"), ReadableResource.getAttachmentType("attachment; FileName=\"stuff.tar.xz\"")); + try std.testing.expectEqual(@as(?ReadableResource.FileType, .@"tar.gz"), ReadableResource.getAttachmentType("attachment; FileName*=UTF-8\'\'xyz%2Fstuff.tar.gz")); + + try std.testing.expect(ReadableResource.getAttachmentType("attachment FileName=\"stuff.tar.gz\"") == null); + try std.testing.expect(ReadableResource.getAttachmentType("attachment; FileName=\"stuff.tar\"") == null); + try std.testing.expect(ReadableResource.getAttachmentType("attachment; FileName\"stuff.gz\"") == null); + try std.testing.expect(ReadableResource.getAttachmentType("attachment; size=42") == null); + try std.testing.expect(ReadableResource.getAttachmentType("inline; size=42") == null); + try std.testing.expect(ReadableResource.getAttachmentType("FileName=\"stuff.tar.gz\"; attachment;") == null); + try std.testing.expect(ReadableResource.getAttachmentType("FileName=\"stuff.tar.gz\";") == null); } From c6b92050055537d9edbdda10d5c215e5d9f327a0 Mon Sep 17 00:00:00 2001 From: AdamGoertz Date: Tue, 26 Sep 2023 18:56:28 -0400 Subject: [PATCH 3/5] Purge absolute paths and remove unneeded path processing No need to create paths with windows-specific path separators --- src/Package.zig | 69 +++++++++++++++++-------------------------------- 1 file changed, 23 insertions(+), 46 deletions(-) diff --git a/src/Package.zig b/src/Package.zig index b8b95a4f5139..a28531b2c080 100644 --- a/src/Package.zig +++ b/src/Package.zig @@ -499,13 +499,13 @@ const Report = struct { }; const FetchLocation = union(enum) { - /// The absolute path to a file or directory. + /// The relative path to a file or directory. /// This may be a file that requires unpacking (such as a .tar.gz), /// or the path to the root directory of a package. file: []const u8, http_request: std.Uri, - pub fn init(gpa: Allocator, directory: Compilation.Directory, dep: Manifest.Dependency, report: Report) !FetchLocation { + pub fn init(gpa: Allocator, dep: Manifest.Dependency, report: Report) !FetchLocation { switch (dep.location) { .url => |url| { const uri = std.Uri.parse(url) catch |err| switch (err) { @@ -518,18 +518,11 @@ const FetchLocation = union(enum) { return .{ .http_request = uri }; }, .path => |path| { - const unescaped = try std.Uri.unescapeString(gpa, path); - defer gpa.free(unescaped); - const unnormalized_path = try unnormalizePath(gpa, unescaped); - defer gpa.free(unnormalized_path); - - if (fs.path.isAbsolute(unnormalized_path)) { + if (fs.path.isAbsolute(path)) { return report.fail(dep.location_tok, "Absolute paths are not allowed. Use a relative path instead", .{}); } - const new_path = try fs.path.resolve(gpa, &.{ directory.path.?, unnormalized_path }); - - return .{ .file = new_path }; + return .{ .file = try gpa.dupe(u8, path) }; }, } } @@ -563,9 +556,9 @@ const FetchLocation = union(enum) { return .{ .path = owned_path, .resource = if (is_dir) - .{ .directory = try fs.openIterableDirAbsolute(file, .{}) } + .{ .directory = try root_dir.handle.openIterableDir(file, .{}) } else - .{ .file = try fs.openFileAbsolute(file, .{}) }, + .{ .file = try root_dir.handle.openFile(file, .{}) }, }; }, .http_request => |uri| { @@ -609,6 +602,7 @@ const ReadableResource = struct { rr: *ReadableResource, allocator: Allocator, thread_pool: *ThreadPool, + root_dir: Compilation.Directory, global_cache_directory: Compilation.Directory, dep: Manifest.Dependency, report: Report, @@ -618,7 +612,8 @@ const ReadableResource = struct { .directory => { return .{ .hash = computePathHash(rr.path), - .dir_path = try allocator.dupe(u8, rr.path), + .root_src_dir_path = try allocator.dupe(u8, rr.path), + .root_dir = root_dir, }; }, inline .file, .http_request => |*r| { @@ -684,15 +679,16 @@ const ReadableResource = struct { const pkg_dir_sub_path = "p" ++ s ++ Manifest.hexDigest(actual_hash); const unpacked_path = try global_cache_directory.join(allocator, &.{pkg_dir_sub_path}); - errdefer allocator.free(unpacked_path); + defer allocator.free(unpacked_path); const relative_unpacked_path = try fs.path.relative(allocator, global_cache_directory.path.?, unpacked_path); - defer allocator.free(relative_unpacked_path); + errdefer allocator.free(relative_unpacked_path); try renameTmpIntoCache(global_cache_directory.handle, tmp_dir_sub_path, relative_unpacked_path); return .{ .hash = actual_hash, - .dir_path = unpacked_path, + .root_src_dir_path = relative_unpacked_path, + .root_dir = global_cache_directory, }; }, } @@ -785,10 +781,11 @@ pub const PackageLocation = struct { /// For packages that require unpacking, this is the hash of the package contents. /// For directories, this is the hash of the absolute file path. hash: [Manifest.Hash.digest_length]u8, - dir_path: []const u8, + root_src_dir_path: []const u8, + root_dir: Compilation.Directory, pub fn deinit(pl: *PackageLocation, allocator: Allocator) void { - allocator.free(pl.dir_path); + allocator.free(pl.root_src_dir_path); pl.* = undefined; } }; @@ -934,13 +931,13 @@ fn fetchAndUnpack( pkg_prog_node.activate(); pkg_prog_node.context.refresh(); - var fetch_location = try FetchLocation.init(gpa, directory, dep.*, report); + var fetch_location = try FetchLocation.init(gpa, dep.*, report); defer fetch_location.deinit(gpa); var readable_resource = try fetch_location.fetch(gpa, directory, http_client, dep.*, report); defer readable_resource.deinit(gpa); - var package_location = try readable_resource.unpack(gpa, thread_pool, global_cache_directory, dep.*, report, &pkg_prog_node); + var package_location = try readable_resource.unpack(gpa, thread_pool, directory, global_cache_directory, dep.*, report, &pkg_prog_node); defer package_location.deinit(gpa); const actual_hex = Manifest.hexDigest(package_location.hash); @@ -973,24 +970,23 @@ fn fetchAndUnpack( return report.fail(dep.hash_tok, "hash not allowed for directory package", .{}); } // Since directory dependencies don't provide a hash in build.zig.zon, - // set the hash here to be the hash of the absolute path to the dependency. + // set the hash here to be the hash of the path to the dependency. dep.hash = try gpa.dupe(u8, &actual_hex); } - const build_zig_path = try std.fs.path.join(gpa, &.{ package_location.dir_path, build_zig_basename }); + const build_zig_path = try std.fs.path.join(gpa, &.{ package_location.root_src_dir_path, build_zig_basename }); defer gpa.free(build_zig_path); - assert(fs.path.isAbsolute(build_zig_path)); - global_cache_directory.handle.access(build_zig_path, .{}) catch |err| switch (err) { + package_location.root_dir.handle.access(build_zig_path, .{}) catch |err| switch (err) { error.FileNotFound => { - const module = try create(gpa, package_location.dir_path, ""); + const module = try createWithDir(gpa, package_location.root_dir, package_location.root_src_dir_path, ""); try all_modules.put(gpa, actual_hex, .{ .non_zig_pkg = module }); return .{ .non_zig_pkg = module }; }, else => return err, }; - const module = try create(gpa, package_location.dir_path, build_zig_basename); + const module = try createWithDir(gpa, package_location.root_dir, package_location.root_src_dir_path, build_zig_basename); try all_modules.put(gpa, actual_hex, .{ .zig_pkg = module }); return .{ .zig_pkg = module }; } @@ -1126,25 +1122,6 @@ fn normalizePath(arena: Allocator, fs_path: []const u8) ![]const u8 { return normalized; } -/// Make a OS-specific file system path -/// This performs the inverse operation of normalizePath, -/// converting forward slashes into backslashes on Windows -fn unnormalizePath(arena: Allocator, fs_path: []const u8) ![]const u8 { - const canonical_sep = '/'; - - const unnormalized = try arena.dupe(u8, fs_path); - if (fs.path.sep == canonical_sep) - return unnormalized; - - for (unnormalized) |*byte| { - switch (byte.*) { - canonical_sep => byte.* = fs.path.sep, - else => continue, - } - } - return unnormalized; -} - fn workerHashFile(dir: fs.Dir, hashed_file: *HashedFile, wg: *WaitGroup) void { defer wg.finish(); hashed_file.failure = hashFileFallible(dir, hashed_file); From 4594206e72475f1fb5900e986be039ebe54d43d9 Mon Sep 17 00:00:00 2001 From: AdamGoertz Date: Wed, 27 Sep 2023 01:29:28 -0400 Subject: [PATCH 4/5] Fix diamond dependencies with directory packages --- src/Package.zig | 84 +++++++++++++++++++++++++++++++------------------ 1 file changed, 54 insertions(+), 30 deletions(-) diff --git a/src/Package.zig b/src/Package.zig index a28531b2c080..c0508c3de757 100644 --- a/src/Package.zig +++ b/src/Package.zig @@ -311,19 +311,34 @@ pub fn fetchAndAddDependencies( } for (manifest.dependencies.keys(), manifest.dependencies.values()) |name, *dep| { + var fetch_location = try FetchLocation.init(gpa, dep.*, directory, report); + defer fetch_location.deinit(gpa); + + // Directories do not provide a hash in build.zig.zon. + // Hash the path to the module rather than its contents. + if (fetch_location == .directory) { + if (dep.hash != null) { + return report.fail(dep.hash_tok, "hash not allowed for directory package", .{}); + } + const hex_digest = Manifest.hexDigest(try computePathHash(gpa, directory, fetch_location.directory)); + dep.hash = try gpa.dupe(u8, &hex_digest); + } + const sub_mod, const found_existing = try getCachedPackage( arena, + fetch_location, global_cache_directory, dep.*, all_modules, root_prog_node, ) orelse .{ try fetchAndUnpack( + fetch_location, thread_pool, http_client, directory, global_cache_directory, - dep, + dep.*, report, all_modules, root_prog_node, @@ -503,9 +518,10 @@ const FetchLocation = union(enum) { /// This may be a file that requires unpacking (such as a .tar.gz), /// or the path to the root directory of a package. file: []const u8, + directory: []const u8, http_request: std.Uri, - pub fn init(gpa: Allocator, dep: Manifest.Dependency, report: Report) !FetchLocation { + pub fn init(gpa: Allocator, dep: Manifest.Dependency, root_dir: Compilation.Directory, report: Report) !FetchLocation { switch (dep.location) { .url => |url| { const uri = std.Uri.parse(url) catch |err| switch (err) { @@ -522,14 +538,22 @@ const FetchLocation = union(enum) { return report.fail(dep.location_tok, "Absolute paths are not allowed. Use a relative path instead", .{}); } - return .{ .file = try gpa.dupe(u8, path) }; + const is_dir = isDirectory(root_dir, path) catch |err| switch (err) { + error.FileNotFound => return report.fail(dep.location_tok, "File not found: {s}", .{path}), + else => return err, + }; + + return if (is_dir) + .{ .directory = try gpa.dupe(u8, path) } + else + .{ .file = try gpa.dupe(u8, path) }; }, } } pub fn deinit(f: *FetchLocation, gpa: Allocator) void { switch (f.*) { - .file => |path| gpa.free(path), + inline .file, .directory => |path| gpa.free(path), .http_request => {}, } f.* = undefined; @@ -545,20 +569,19 @@ const FetchLocation = union(enum) { ) !ReadableResource { switch (f) { .file => |file| { - const is_dir = isDirectory(root_dir, file) catch |err| switch (err) { - error.FileNotFound => return report.fail(dep.location_tok, "File not found: {s}", .{file}), - else => return err, - }; - const owned_path = try gpa.dupe(u8, file); errdefer gpa.free(owned_path); - return .{ .path = owned_path, - .resource = if (is_dir) - .{ .directory = try root_dir.handle.openIterableDir(file, .{}) } - else - .{ .file = try root_dir.handle.openFile(file, .{}) }, + .resource = .{ .file = try root_dir.handle.openFile(file, .{}) }, + }; + }, + .directory => |dir| { + const owned_path = try gpa.dupe(u8, dir); + errdefer gpa.free(owned_path); + return .{ + .path = owned_path, + .resource = .{ .directory = try root_dir.handle.openIterableDir(dir, .{}) }, }; }, .http_request => |uri| { @@ -611,7 +634,7 @@ const ReadableResource = struct { switch (rr.resource) { .directory => { return .{ - .hash = computePathHash(rr.path), + .hash = try computePathHash(allocator, root_dir, rr.path), .root_src_dir_path = try allocator.dupe(u8, rr.path), .root_dir = root_dir, }; @@ -851,11 +874,19 @@ fn ProgressReader(comptime ReaderType: type) type { /// (i.e. whether or not its transitive dependencies have been fetched). fn getCachedPackage( gpa: Allocator, + fetch_location: FetchLocation, global_cache_directory: Compilation.Directory, dep: Manifest.Dependency, all_modules: *AllModules, root_prog_node: *std.Progress.Node, ) !?struct { DependencyModule, bool } { + // There is no fixed location to check for directory modules. + // Instead, check whether it is already listed in all_modules. + if (fetch_location == .directory) { + const hex_digest = dep.hash.?[0..hex_multihash_len]; + return if (all_modules.get(hex_digest.*)) |mod| .{ mod.?, true } else null; + } + const s = fs.path.sep_str; // Check if the expected_hash is already present in the global package // cache, and thereby avoid both fetching and unpacking. @@ -912,11 +943,12 @@ fn getCachedPackage( } fn fetchAndUnpack( + fetch_location: FetchLocation, thread_pool: *ThreadPool, http_client: *std.http.Client, directory: Compilation.Directory, global_cache_directory: Compilation.Directory, - dep: *Manifest.Dependency, + dep: Manifest.Dependency, report: Report, all_modules: *AllModules, root_prog_node: *std.Progress.Node, @@ -931,13 +963,10 @@ fn fetchAndUnpack( pkg_prog_node.activate(); pkg_prog_node.context.refresh(); - var fetch_location = try FetchLocation.init(gpa, dep.*, report); - defer fetch_location.deinit(gpa); - - var readable_resource = try fetch_location.fetch(gpa, directory, http_client, dep.*, report); + var readable_resource = try fetch_location.fetch(gpa, directory, http_client, dep, report); defer readable_resource.deinit(gpa); - var package_location = try readable_resource.unpack(gpa, thread_pool, directory, global_cache_directory, dep.*, report, &pkg_prog_node); + var package_location = try readable_resource.unpack(gpa, thread_pool, directory, global_cache_directory, dep, report, &pkg_prog_node); defer package_location.deinit(gpa); const actual_hex = Manifest.hexDigest(package_location.hash); @@ -965,13 +994,6 @@ fn fetchAndUnpack( })); return error.PackageFetchFailed; } - } else { - if (dep.hash != null) { - return report.fail(dep.hash_tok, "hash not allowed for directory package", .{}); - } - // Since directory dependencies don't provide a hash in build.zig.zon, - // set the hash here to be the hash of the path to the dependency. - dep.hash = try gpa.dupe(u8, &actual_hex); } const build_zig_path = try std.fs.path.join(gpa, &.{ package_location.root_src_dir_path, build_zig_basename }); @@ -1089,9 +1111,11 @@ fn computePackageHash( } /// Compute the hash of a file path. -fn computePathHash(path: []const u8) [Manifest.Hash.digest_length]u8 { +fn computePathHash(gpa: Allocator, dir: Compilation.Directory, path: []const u8) ![Manifest.Hash.digest_length]u8 { + const resolved_path = try std.fs.path.resolve(gpa, &.{ dir.path.?, path }); + defer gpa.free(resolved_path); var hasher = Manifest.Hash.init(.{}); - hasher.update(path); + hasher.update(resolved_path); return hasher.finalResult(); } From e07e182fc1ef34a894e81fd360b47646c7c1a023 Mon Sep 17 00:00:00 2001 From: Adam Goertz Date: Thu, 28 Sep 2023 00:25:12 +0000 Subject: [PATCH 5/5] Extract logic for directory packages In addition to improving readability, this also fixes a subtle bug where the Progress node could display the wrong total number of packages to fetch. --- src/Package.zig | 219 +++++++++++++++++++++++------------------------- 1 file changed, 104 insertions(+), 115 deletions(-) diff --git a/src/Package.zig b/src/Package.zig index c0508c3de757..790ddef253be 100644 --- a/src/Package.zig +++ b/src/Package.zig @@ -316,36 +316,30 @@ pub fn fetchAndAddDependencies( // Directories do not provide a hash in build.zig.zon. // Hash the path to the module rather than its contents. - if (fetch_location == .directory) { - if (dep.hash != null) { - return report.fail(dep.hash_tok, "hash not allowed for directory package", .{}); - } - const hex_digest = Manifest.hexDigest(try computePathHash(gpa, directory, fetch_location.directory)); - dep.hash = try gpa.dupe(u8, &hex_digest); - } - - const sub_mod, const found_existing = try getCachedPackage( - arena, - fetch_location, - global_cache_directory, - dep.*, - all_modules, - root_prog_node, - ) orelse .{ - try fetchAndUnpack( - fetch_location, - thread_pool, - http_client, - directory, + const sub_mod, const found_existing = if (fetch_location == .directory) + try getDirectoryModule(gpa, fetch_location, directory, all_modules, dep, report) + else + try getCachedPackage( + gpa, global_cache_directory, dep.*, - report, all_modules, root_prog_node, - name, - ), - false, - }; + ) orelse .{ + try fetchAndUnpack( + fetch_location, + thread_pool, + http_client, + directory, + global_cache_directory, + dep.*, + report, + all_modules, + root_prog_node, + name, + ), + false, + }; assert(dep.hash != null); @@ -576,14 +570,6 @@ const FetchLocation = union(enum) { .resource = .{ .file = try root_dir.handle.openFile(file, .{}) }, }; }, - .directory => |dir| { - const owned_path = try gpa.dupe(u8, dir); - errdefer gpa.free(owned_path); - return .{ - .path = owned_path, - .resource = .{ .directory = try root_dir.handle.openIterableDir(dir, .{}) }, - }; - }, .http_request => |uri| { var h = std.http.Headers{ .allocator = gpa }; defer h.deinit(); @@ -606,6 +592,7 @@ const FetchLocation = union(enum) { .resource = .{ .http_request = req }, }; }, + .directory => unreachable, // Directories do not require fetching } } }; @@ -614,7 +601,6 @@ const ReadableResource = struct { path: []const u8, resource: union(enum) { file: fs.File, - directory: fs.IterableDir, http_request: std.http.Client.Request, }, @@ -625,20 +611,12 @@ const ReadableResource = struct { rr: *ReadableResource, allocator: Allocator, thread_pool: *ThreadPool, - root_dir: Compilation.Directory, global_cache_directory: Compilation.Directory, dep: Manifest.Dependency, report: Report, pkg_prog_node: *std.Progress.Node, ) !PackageLocation { switch (rr.resource) { - .directory => { - return .{ - .hash = try computePathHash(allocator, root_dir, rr.path), - .root_src_dir_path = try allocator.dupe(u8, rr.path), - .root_dir = root_dir, - }; - }, inline .file, .http_request => |*r| { const s = fs.path.sep_str; const rand_int = std.crypto.random.int(u64); @@ -710,8 +688,7 @@ const ReadableResource = struct { return .{ .hash = actual_hash, - .root_src_dir_path = relative_unpacked_path, - .root_dir = global_cache_directory, + .relative_unpacked_path = relative_unpacked_path, }; }, } @@ -727,7 +704,6 @@ const ReadableResource = struct { // TODO: Handle case of chunked content-length .http_request => |req| return req.response.content_length, .file => |f| return (try f.metadata()).size(), - .directory => unreachable, } } @@ -737,7 +713,6 @@ const ReadableResource = struct { return fileTypeFromPath(rr.path) orelse return report.fail(dep.location_tok, "Unknown file type", .{}); }, - .directory => return error.IsDir, .http_request => |req| { const content_type = req.response.headers.getFirstValue("Content-Type") orelse return report.fail(dep.location_tok, "Missing 'Content-Type' header", .{}); @@ -793,7 +768,6 @@ const ReadableResource = struct { gpa.free(rr.path); switch (rr.resource) { .file => |file| file.close(), - .directory => |*dir| dir.close(), .http_request => |*req| req.deinit(), } rr.* = undefined; @@ -804,11 +778,10 @@ pub const PackageLocation = struct { /// For packages that require unpacking, this is the hash of the package contents. /// For directories, this is the hash of the absolute file path. hash: [Manifest.Hash.digest_length]u8, - root_src_dir_path: []const u8, - root_dir: Compilation.Directory, + relative_unpacked_path: []const u8, pub fn deinit(pl: *PackageLocation, allocator: Allocator) void { - allocator.free(pl.root_src_dir_path); + allocator.free(pl.relative_unpacked_path); pl.* = undefined; } }; @@ -874,19 +847,11 @@ fn ProgressReader(comptime ReaderType: type) type { /// (i.e. whether or not its transitive dependencies have been fetched). fn getCachedPackage( gpa: Allocator, - fetch_location: FetchLocation, global_cache_directory: Compilation.Directory, dep: Manifest.Dependency, all_modules: *AllModules, root_prog_node: *std.Progress.Node, ) !?struct { DependencyModule, bool } { - // There is no fixed location to check for directory modules. - // Instead, check whether it is already listed in all_modules. - if (fetch_location == .directory) { - const hex_digest = dep.hash.?[0..hex_multihash_len]; - return if (all_modules.get(hex_digest.*)) |mod| .{ mod.?, true } else null; - } - const s = fs.path.sep_str; // Check if the expected_hash is already present in the global package // cache, and thereby avoid both fetching and unpacking. @@ -912,34 +877,60 @@ fn getCachedPackage( root_prog_node.completeOne(); const is_zig_mod = if (pkg_dir.access(build_zig_basename, .{})) |_| true else |_| false; + const basename = if (is_zig_mod) build_zig_basename else ""; + const pkg = try createWithDir(gpa, global_cache_directory, pkg_dir_sub_path, basename); - const build_root = try global_cache_directory.join(gpa, &.{pkg_dir_sub_path}); - errdefer gpa.free(build_root); - - const ptr = try gpa.create(Package); - errdefer gpa.destroy(ptr); + const module: DependencyModule = if (is_zig_mod) + .{ .zig_pkg = pkg } + else + .{ .non_zig_pkg = pkg }; - const owned_src_path = if (is_zig_mod) try gpa.dupe(u8, build_zig_basename) else ""; - errdefer gpa.free(owned_src_path); + try all_modules.put(gpa, hex_digest.*, module); + return .{ module, false }; + } - ptr.* = .{ - .root_src_directory = .{ - .path = build_root, - .handle = pkg_dir, - }, - .root_src_directory_owned = true, - .root_src_path = owned_src_path, - }; + return null; +} - gop.value_ptr.* = if (is_zig_mod) - .{ .zig_pkg = ptr } - else - .{ .non_zig_pkg = ptr }; +fn getDirectoryModule( + gpa: Allocator, + fetch_location: FetchLocation, + directory: Compilation.Directory, + all_modules: *AllModules, + dep: *Manifest.Dependency, + report: Report, +) !struct { DependencyModule, bool } { + assert(fetch_location == .directory); - return .{ gop.value_ptr.*.?, false }; + if (dep.hash != null) { + return report.fail(dep.hash_tok, "hash not allowed for directory package", .{}); } - return null; + const hash = try computePathHash(gpa, directory, fetch_location.directory); + const hex_digest = Manifest.hexDigest(hash); + dep.hash = try gpa.dupe(u8, &hex_digest); + + // There is no fixed location to check for directory modules. + // Instead, check whether it is already listed in all_modules. + if (all_modules.get(hex_digest)) |mod| return .{ mod.?, true }; + + var pkg_dir = directory.handle.openDir(fetch_location.directory, .{}) catch |err| switch (err) { + error.FileNotFound => return report.fail(dep.location_tok, "File not found: {s}", .{fetch_location.directory}), + else => |e| return e, + }; + defer pkg_dir.close(); + + const is_zig_mod = if (pkg_dir.access(build_zig_basename, .{})) |_| true else |_| false; + const basename = if (is_zig_mod) build_zig_basename else ""; + + const pkg = try createWithDir(gpa, directory, fetch_location.directory, basename); + const module: DependencyModule = if (is_zig_mod) + .{ .zig_pkg = pkg } + else + .{ .non_zig_pkg = pkg }; + + try all_modules.put(gpa, hex_digest, module); + return .{ module, false }; } fn fetchAndUnpack( @@ -956,6 +947,8 @@ fn fetchAndUnpack( /// is only intended to be human-readable for progress reporting. name_for_prog: []const u8, ) !DependencyModule { + assert(fetch_location == .file or fetch_location == .http_request); + const gpa = http_client.allocator; var pkg_prog_node = root_prog_node.start(name_for_prog, 0); @@ -966,51 +959,47 @@ fn fetchAndUnpack( var readable_resource = try fetch_location.fetch(gpa, directory, http_client, dep, report); defer readable_resource.deinit(gpa); - var package_location = try readable_resource.unpack(gpa, thread_pool, directory, global_cache_directory, dep, report, &pkg_prog_node); + var package_location = try readable_resource.unpack(gpa, thread_pool, global_cache_directory, dep, report, &pkg_prog_node); defer package_location.deinit(gpa); const actual_hex = Manifest.hexDigest(package_location.hash); - if (readable_resource.resource != .directory) { - if (dep.hash) |h| { - if (!mem.eql(u8, h, &actual_hex)) { - return report.fail(dep.hash_tok, "hash mismatch: expected: {s}, found: {s}", .{ - h, actual_hex, - }); - } - } else { - const file_path = try report.directory.join(gpa, &.{Manifest.basename}); - defer gpa.free(file_path); - - const eb = report.error_bundle; - const notes_len = 1; - try Report.addErrorMessage(report.ast.*, file_path, eb, notes_len, .{ - .tok = dep.location_tok, - .off = 0, - .msg = "dependency is missing hash field", + if (dep.hash) |h| { + if (!mem.eql(u8, h, &actual_hex)) { + return report.fail(dep.hash_tok, "hash mismatch: expected: {s}, found: {s}", .{ + h, actual_hex, }); - const notes_start = try eb.reserveNotes(notes_len); - eb.extra.items[notes_start] = @intFromEnum(try eb.addErrorMessage(.{ - .msg = try eb.printString("expected .hash = \"{s}\",", .{&actual_hex}), - })); - return error.PackageFetchFailed; } + } else { + const file_path = try report.directory.join(gpa, &.{Manifest.basename}); + defer gpa.free(file_path); + + const eb = report.error_bundle; + const notes_len = 1; + try Report.addErrorMessage(report.ast.*, file_path, eb, notes_len, .{ + .tok = dep.location_tok, + .off = 0, + .msg = "dependency is missing hash field", + }); + const notes_start = try eb.reserveNotes(notes_len); + eb.extra.items[notes_start] = @intFromEnum(try eb.addErrorMessage(.{ + .msg = try eb.printString("expected .hash = \"{s}\",", .{&actual_hex}), + })); + return error.PackageFetchFailed; } - const build_zig_path = try std.fs.path.join(gpa, &.{ package_location.root_src_dir_path, build_zig_basename }); + const build_zig_path = try fs.path.join(gpa, &.{ package_location.relative_unpacked_path, build_zig_basename }); defer gpa.free(build_zig_path); - package_location.root_dir.handle.access(build_zig_path, .{}) catch |err| switch (err) { - error.FileNotFound => { - const module = try createWithDir(gpa, package_location.root_dir, package_location.root_src_dir_path, ""); - try all_modules.put(gpa, actual_hex, .{ .non_zig_pkg = module }); - return .{ .non_zig_pkg = module }; - }, - else => return err, - }; + const is_zig_mod = if (global_cache_directory.handle.access(build_zig_path, .{})) |_| true else |_| false; + const basename = if (is_zig_mod) build_zig_basename else ""; + const pkg = try createWithDir(gpa, global_cache_directory, package_location.relative_unpacked_path, basename); + const module: DependencyModule = if (is_zig_mod) + .{ .zig_pkg = pkg } + else + .{ .non_zig_pkg = pkg }; - const module = try createWithDir(gpa, package_location.root_dir, package_location.root_src_dir_path, build_zig_basename); - try all_modules.put(gpa, actual_hex, .{ .zig_pkg = module }); - return .{ .zig_pkg = module }; + try all_modules.put(gpa, actual_hex, module); + return module; } fn unpackTarball(