Skip to content

implement new package hash format: $name-$semver-$hash #22994

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 15 commits into from
Feb 28, 2025
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
3 changes: 2 additions & 1 deletion build.zig.zon
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
// The Zig compiler is not intended to be consumed as a package.
// The sole purpose of this manifest file is to test the compiler.
.{
.name = "zig",
.name = .zig,
.version = "0.0.0",
.dependencies = .{
.standalone_test_cases = .{
Expand All @@ -12,4 +12,5 @@
},
},
.paths = .{""},
.fingerprint = 0xc1ce108124179e16,
}
32 changes: 31 additions & 1 deletion doc/build.zig.zon.md
Original file line number Diff line number Diff line change
Expand Up @@ -10,7 +10,7 @@ build.zig.

### `name`

String. Required.
Enum literal. Required.

This is the default name used by packages depending on this one. For example,
when a user runs `zig fetch --save <url>`, this field is used as the key in the
Expand All @@ -20,12 +20,42 @@ will stick with this provided value.
It is redundant to include "zig" in this name because it is already within the
Zig package namespace.

Must be a valid bare Zig identifier (don't `@` me), limited to 32 bytes.

Together with `fingerprint`, this represents a globally unique package identifier.

### `fingerprint`

Together with `name`, this represents a globally unique package identifier. This
field is auto-initialized by the toolchain when the package is first created,
and then *never changes*. This allows Zig to unambiguously detect when one
package is an updated version of another.

When forking a Zig project, this fingerprint should be regenerated if the upstream
project is still maintained. Otherwise, the fork is *hostile*, attempting to
take control over the original project's identity. The fingerprint can be regenerated
by deleting the field and running `zig build`.

This 64-bit integer is the combination of a 32-bit id component and a 32-bit
checksum.

The id component within the fingerprint has these restrictions:

`0x00000000` is reserved for legacy packages.

`0xffffffff` is reserved to represent "naked" packages.

The checksum is computed from `name` and serves to protect Zig users from
accidental id collisions.

### `version`

String. Required.

[semver](https://semver.org/)

Limited to 32 bytes.

### `minimum_zig_version`

String. Optional.
Expand Down
6 changes: 3 additions & 3 deletions lib/init/build.zig
Original file line number Diff line number Diff line change
Expand Up @@ -42,14 +42,14 @@ pub fn build(b: *std.Build) void {
// Modules can depend on one another using the `std.Build.Module.addImport` function.
// This is what allows Zig source code to use `@import("foo")` where 'foo' is not a
// file path. In this case, we set up `exe_mod` to import `lib_mod`.
exe_mod.addImport("$_lib", lib_mod);
exe_mod.addImport(".NAME_lib", lib_mod);

// Now, we will create a static library based on the module we created above.
// This creates a `std.Build.Step.Compile`, which is the build step responsible
// for actually invoking the compiler.
const lib = b.addLibrary(.{
.linkage = .static,
.name = "$",
.name = ".NAME",
.root_module = lib_mod,
});

Expand All @@ -61,7 +61,7 @@ pub fn build(b: *std.Build) void {
// This creates another `std.Build.Step.Compile`, but this one builds an executable
// rather than a static library.
const exe = b.addExecutable(.{
.name = "$",
.name = ".NAME",
.root_module = exe_mod,
});

Expand Down
20 changes: 19 additions & 1 deletion lib/init/build.zig.zon
Original file line number Diff line number Diff line change
Expand Up @@ -6,12 +6,30 @@
//
// It is redundant to include "zig" in this name because it is already
// within the Zig package namespace.
.name = "$",
.name = .LITNAME,

// This is a [Semantic Version](https://semver.org/).
// In a future version of Zig it will be used for package deduplication.
.version = "0.0.0",

// Together with name, this represents a globally unique package
// identifier. This field is generated by the Zig toolchain when the
// package is first created, and then *never changes*. This allows
// unambiguous detection of one package being an updated version of
// another.
//
// When forking a Zig project, this id should be regenerated (delete the
// field and run `zig build`) if the upstream project is still maintained.
// Otherwise, the fork is *hostile*, attempting to take control over the
// original project's identity. Thus it is recommended to leave the comment
// on the following line intact, so that it shows up in code reviews that
// modify the field.
.fingerprint = .FINGERPRINT, // Changing this has security and trust implications.

// Tracks the earliest Zig version that the package considers to be a
// supported use case.
.minimum_zig_version = ".ZIGVER",

// This field is optional.
// This is currently advisory only; Zig does not yet do anything
// with this value.
Expand Down
2 changes: 1 addition & 1 deletion lib/init/src/main.zig
Original file line number Diff line number Diff line change
Expand Up @@ -43,4 +43,4 @@ test "fuzz example" {
const std = @import("std");

/// This imports the separate module containing `root.zig`. Take a look in `build.zig` for details.
const lib = @import("$_lib");
const lib = @import(".NAME_lib");
7 changes: 0 additions & 7 deletions lib/std/array_list.zig
Original file line number Diff line number Diff line change
Expand Up @@ -2250,10 +2250,3 @@ test "return OutOfMemory when capacity would exceed maximum usize integer value"
try testing.expectError(error.OutOfMemory, list.ensureUnusedCapacity(2));
}
}

test "ArrayListAligned with non-native alignment compiles unusedCapabitySlice" {
var list = ArrayListAligned(u8, 4).init(testing.allocator);
defer list.deinit();
try list.appendNTimes(1, 4);
_ = list.unusedCapacitySlice();
}
192 changes: 192 additions & 0 deletions src/Package.zig
Original file line number Diff line number Diff line change
@@ -1,8 +1,200 @@
const std = @import("std");
const assert = std.debug.assert;

pub const Module = @import("Package/Module.zig");
pub const Fetch = @import("Package/Fetch.zig");
pub const build_zig_basename = "build.zig";
pub const Manifest = @import("Package/Manifest.zig");

pub const multihash_len = 1 + 1 + Hash.Algo.digest_length;
pub const multihash_hex_digest_len = 2 * multihash_len;
pub const MultiHashHexDigest = [multihash_hex_digest_len]u8;

pub const Fingerprint = packed struct(u64) {
id: u32,
checksum: u32,

pub fn generate(name: []const u8) Fingerprint {
return .{
.id = std.crypto.random.intRangeLessThan(u32, 1, 0xffffffff),
.checksum = std.hash.Crc32.hash(name),
};
}

pub fn validate(n: Fingerprint, name: []const u8) bool {
switch (n.id) {
0x00000000, 0xffffffff => return false,
else => return std.hash.Crc32.hash(name) == n.checksum,
}
}

pub fn int(n: Fingerprint) u64 {
return @bitCast(n);
}
};

/// A user-readable, file system safe hash that identifies an exact package
/// snapshot, including file contents.
///
/// The hash is not only to prevent collisions but must resist attacks where
/// the adversary fully controls the contents being hashed. Thus, it contains
/// a full SHA-256 digest.
///
/// This data structure can be used to store the legacy hash format too. Legacy
/// hash format is scheduled to be removed after 0.14.0 is tagged.
///
/// There's also a third way this structure is used. When using path rather than
/// hash, a unique hash is still needed, so one is computed based on the path.
pub const Hash = struct {
/// Maximum size of a package hash. Unused bytes at the end are
/// filled with zeroes.
bytes: [max_len]u8,

pub const Algo = std.crypto.hash.sha2.Sha256;
pub const Digest = [Algo.digest_length]u8;

/// Example: "nnnn-vvvv-hhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhh"
pub const max_len = 32 + 1 + 32 + 1 + (32 + 32 + 200) / 6;

pub fn fromSlice(s: []const u8) Hash {
assert(s.len <= max_len);
var result: Hash = undefined;
@memcpy(result.bytes[0..s.len], s);
@memset(result.bytes[s.len..], 0);
return result;
}

pub fn toSlice(ph: *const Hash) []const u8 {
var end: usize = ph.bytes.len;
while (true) {
end -= 1;
if (ph.bytes[end] != 0) return ph.bytes[0 .. end + 1];
}
}

pub fn eql(a: *const Hash, b: *const Hash) bool {
return std.mem.eql(u8, &a.bytes, &b.bytes);
}

/// Distinguishes whether the legacy multihash format is being stored here.
pub fn isOld(h: *const Hash) bool {
if (h.bytes.len < 2) return false;
const their_multihash_func = std.fmt.parseInt(u8, h.bytes[0..2], 16) catch return false;
if (@as(MultihashFunction, @enumFromInt(their_multihash_func)) != multihash_function) return false;
if (h.toSlice().len != multihash_hex_digest_len) return false;
return std.mem.indexOfScalar(u8, &h.bytes, '-') == null;
}

test isOld {
const h: Hash = .fromSlice("1220138f4aba0c01e66b68ed9e1e1e74614c06e4743d88bc58af4f1c3dd0aae5fea7");
try std.testing.expect(h.isOld());
}

/// Produces "$name-$semver-$hashplus".
/// * name is the name field from build.zig.zon, asserted to be at most 32
/// bytes and assumed be a valid zig identifier
/// * semver is the version field from build.zig.zon, asserted to be at
/// most 32 bytes
/// * hashplus is the following 33-byte array, base64 encoded using -_ to make
/// it filesystem safe:
/// - (4 bytes) LE u32 Package ID
/// - (4 bytes) LE u32 total decompressed size in bytes, overflow saturated
/// - (25 bytes) truncated SHA-256 digest of hashed files of the package
pub fn init(digest: Digest, name: []const u8, ver: []const u8, id: u32, size: u32) Hash {
assert(name.len <= 32);
assert(ver.len <= 32);
var result: Hash = undefined;
var buf: std.ArrayListUnmanaged(u8) = .initBuffer(&result.bytes);
buf.appendSliceAssumeCapacity(name);
buf.appendAssumeCapacity('-');
buf.appendSliceAssumeCapacity(ver);
buf.appendAssumeCapacity('-');
var hashplus: [33]u8 = undefined;
std.mem.writeInt(u32, hashplus[0..4], id, .little);
std.mem.writeInt(u32, hashplus[4..8], size, .little);
hashplus[8..].* = digest[0..25].*;
_ = std.base64.url_safe_no_pad.Encoder.encode(buf.addManyAsArrayAssumeCapacity(44), &hashplus);
@memset(buf.unusedCapacitySlice(), 0);
return result;
}

/// Produces a unique hash based on the path provided. The result should
/// not be user-visible.
pub fn initPath(sub_path: []const u8, is_global: bool) Hash {
var result: Hash = .{ .bytes = @splat(0) };
var i: usize = 0;
if (is_global) {
result.bytes[0] = '/';
i += 1;
}
if (i + sub_path.len <= result.bytes.len) {
@memcpy(result.bytes[i..][0..sub_path.len], sub_path);
return result;
}
var bin_digest: [Algo.digest_length]u8 = undefined;
Algo.hash(sub_path, &bin_digest, .{});
_ = std.fmt.bufPrint(result.bytes[i..], "{}", .{std.fmt.fmtSliceHexLower(&bin_digest)}) catch unreachable;
return result;
}
};

pub const MultihashFunction = enum(u16) {
identity = 0x00,
sha1 = 0x11,
@"sha2-256" = 0x12,
@"sha2-512" = 0x13,
@"sha3-512" = 0x14,
@"sha3-384" = 0x15,
@"sha3-256" = 0x16,
@"sha3-224" = 0x17,
@"sha2-384" = 0x20,
@"sha2-256-trunc254-padded" = 0x1012,
@"sha2-224" = 0x1013,
@"sha2-512-224" = 0x1014,
@"sha2-512-256" = 0x1015,
@"blake2b-256" = 0xb220,
_,
};

pub const multihash_function: MultihashFunction = switch (Hash.Algo) {
std.crypto.hash.sha2.Sha256 => .@"sha2-256",
else => unreachable,
};

pub fn multiHashHexDigest(digest: Hash.Digest) MultiHashHexDigest {
const hex_charset = std.fmt.hex_charset;

var result: MultiHashHexDigest = undefined;

result[0] = hex_charset[@intFromEnum(multihash_function) >> 4];
result[1] = hex_charset[@intFromEnum(multihash_function) & 15];

result[2] = hex_charset[Hash.Algo.digest_length >> 4];
result[3] = hex_charset[Hash.Algo.digest_length & 15];

for (digest, 0..) |byte, i| {
result[4 + i * 2] = hex_charset[byte >> 4];
result[5 + i * 2] = hex_charset[byte & 15];
}
return result;
}

comptime {
// We avoid unnecessary uleb128 code in hexDigest by asserting here the
// values are small enough to be contained in the one-byte encoding.
assert(@intFromEnum(multihash_function) < 127);
assert(Hash.Algo.digest_length < 127);
}

test Hash {
const example_digest: Hash.Digest = .{
0xc7, 0xf5, 0x71, 0xb7, 0xb4, 0xe7, 0x6f, 0x3c, 0xdb, 0x87, 0x7a, 0x7f, 0xdd, 0xf9, 0x77, 0x87,
0x9d, 0xd3, 0x86, 0xfa, 0x73, 0x57, 0x9a, 0xf7, 0x9d, 0x1e, 0xdb, 0x8f, 0x3a, 0xd9, 0xbd, 0x9f,
};
const result: Hash = .init(example_digest, "nasm", "2.16.1-3", 0xcafebabe, 10 * 1024 * 1024);
try std.testing.expectEqualStrings("nasm-2.16.1-3-vrr-ygAAoADH9XG3tOdvPNuHen_d-XeHndOG-nNXmved", result.toSlice());
}

test {
_ = Fetch;
}
Loading