ziglang · andrewrk · Feb 28, 2025 · Feb 23, 2025 · Feb 23, 2025 · Feb 24, 2025
diff --git a/build.zig.zon b/build.zig.zon
@@ -1,7 +1,7 @@
 // The Zig compiler is not intended to be consumed as a package.
 // The sole purpose of this manifest file is to test the compiler.
 .{
-    .name = "zig",
+    .name = .zig,
     .version = "0.0.0",
     .dependencies = .{
         .standalone_test_cases = .{
@@ -12,4 +12,5 @@
         },
     },
     .paths = .{""},
+    .fingerprint = 0xc1ce108124179e16,
 }
diff --git a/doc/build.zig.zon.md b/doc/build.zig.zon.md
@@ -10,7 +10,7 @@ build.zig.
 
 ### `name`
 
-String. Required.
+Enum literal. Required.
 
 This is the default name used by packages depending on this one. For example,
 when a user runs `zig fetch --save <url>`, this field is used as the key in the
@@ -20,12 +20,42 @@ will stick with this provided value.
 It is redundant to include "zig" in this name because it is already within the
 Zig package namespace.
 
+Must be a valid bare Zig identifier (don't `@` me), limited to 32 bytes.
+
+Together with `fingerprint`, this represents a globally unique package identifier.
+
+### `fingerprint`
+
+Together with `name`, this represents a globally unique package identifier. This
+field is auto-initialized by the toolchain when the package is first created,
+and then *never changes*. This allows Zig to unambiguously detect when one
+package is an updated version of another.
+
+When forking a Zig project, this fingerprint should be regenerated if the upstream
+project is still maintained. Otherwise, the fork is *hostile*, attempting to
+take control over the original project's identity. The fingerprint can be regenerated
+by deleting the field and running `zig build`.
+
+This 64-bit integer is the combination of a 32-bit id component and a 32-bit
+checksum.
+
+The id component within the fingerprint has these restrictions:
+
+`0x00000000` is reserved for legacy packages.
+
+`0xffffffff` is reserved to represent "naked" packages.
+
+The checksum is computed from `name` and serves to protect Zig users from
+accidental id collisions.
+
 ### `version`
 
 String. Required.
 
 [semver](https://semver.org/)
 
+Limited to 32 bytes.
+
 ### `minimum_zig_version`
 
 String. Optional.

diff --git a/lib/init/build.zig b/lib/init/build.zig
@@ -42,14 +42,14 @@ pub fn build(b: *std.Build) void {
     // Modules can depend on one another using the `std.Build.Module.addImport` function.
     // This is what allows Zig source code to use `@import("foo")` where 'foo' is not a
     // file path. In this case, we set up `exe_mod` to import `lib_mod`.
-    exe_mod.addImport("$_lib", lib_mod);
+    exe_mod.addImport(".NAME_lib", lib_mod);
 
     // Now, we will create a static library based on the module we created above.
     // This creates a `std.Build.Step.Compile`, which is the build step responsible
     // for actually invoking the compiler.
     const lib = b.addLibrary(.{
         .linkage = .static,
-        .name = "$",
+        .name = ".NAME",
         .root_module = lib_mod,
     });
 
@@ -61,7 +61,7 @@ pub fn build(b: *std.Build) void {
     // This creates another `std.Build.Step.Compile`, but this one builds an executable
     // rather than a static library.
     const exe = b.addExecutable(.{
-        .name = "$",
+        .name = ".NAME",
         .root_module = exe_mod,
     });
 

diff --git a/lib/init/build.zig.zon b/lib/init/build.zig.zon
@@ -6,12 +6,30 @@
     //
     // It is redundant to include "zig" in this name because it is already
     // within the Zig package namespace.
-    .name = "$",
+    .name = .LITNAME,
 
     // This is a [Semantic Version](https://semver.org/).
     // In a future version of Zig it will be used for package deduplication.
     .version = "0.0.0",
 
+    // Together with name, this represents a globally unique package
+    // identifier. This field is generated by the Zig toolchain when the
+    // package is first created, and then *never changes*. This allows
+    // unambiguous detection of one package being an updated version of
+    // another.
+    //
+    // When forking a Zig project, this id should be regenerated (delete the
+    // field and run `zig build`) if the upstream project is still maintained.
+    // Otherwise, the fork is *hostile*, attempting to take control over the
+    // original project's identity. Thus it is recommended to leave the comment
+    // on the following line intact, so that it shows up in code reviews that
+    // modify the field.
+    .fingerprint = .FINGERPRINT, // Changing this has security and trust implications.
+
+    // Tracks the earliest Zig version that the package considers to be a
+    // supported use case.
+    .minimum_zig_version = ".ZIGVER",
+
     // This field is optional.
     // This is currently advisory only; Zig does not yet do anything
     // with this value.

diff --git a/lib/init/src/main.zig b/lib/init/src/main.zig
@@ -43,4 +43,4 @@ test "fuzz example" {
 const std = @import("std");
 
 /// This imports the separate module containing `root.zig`. Take a look in `build.zig` for details.
-const lib = @import("$_lib");
+const lib = @import(".NAME_lib");
diff --git a/lib/std/array_list.zig b/lib/std/array_list.zig
@@ -2250,10 +2250,3 @@ test "return OutOfMemory when capacity would exceed maximum usize integer value"
         try testing.expectError(error.OutOfMemory, list.ensureUnusedCapacity(2));
     }
 }
-
-test "ArrayListAligned with non-native alignment compiles unusedCapabitySlice" {
-    var list = ArrayListAligned(u8, 4).init(testing.allocator);
-    defer list.deinit();
-    try list.appendNTimes(1, 4);
-    _ = list.unusedCapacitySlice();
-}
diff --git a/src/Package.zig b/src/Package.zig
@@ -1,8 +1,200 @@
+const std = @import("std");
+const assert = std.debug.assert;
+
 pub const Module = @import("Package/Module.zig");
 pub const Fetch = @import("Package/Fetch.zig");
 pub const build_zig_basename = "build.zig";
 pub const Manifest = @import("Package/Manifest.zig");
 
+pub const multihash_len = 1 + 1 + Hash.Algo.digest_length;
+pub const multihash_hex_digest_len = 2 * multihash_len;
+pub const MultiHashHexDigest = [multihash_hex_digest_len]u8;
+
+pub const Fingerprint = packed struct(u64) {
+    id: u32,
+    checksum: u32,
+
+    pub fn generate(name: []const u8) Fingerprint {
+        return .{
+            .id = std.crypto.random.intRangeLessThan(u32, 1, 0xffffffff),
+            .checksum = std.hash.Crc32.hash(name),
+        };
+    }
+
+    pub fn validate(n: Fingerprint, name: []const u8) bool {
+        switch (n.id) {
+            0x00000000, 0xffffffff => return false,
+            else => return std.hash.Crc32.hash(name) == n.checksum,
+        }
+    }
+
+    pub fn int(n: Fingerprint) u64 {
+        return @bitCast(n);
+    }
+};
+
+/// A user-readable, file system safe hash that identifies an exact package
+/// snapshot, including file contents.
+///
+/// The hash is not only to prevent collisions but must resist attacks where
+/// the adversary fully controls the contents being hashed. Thus, it contains
+/// a full SHA-256 digest.
+///
+/// This data structure can be used to store the legacy hash format too. Legacy
+/// hash format is scheduled to be removed after 0.14.0 is tagged.
+///
+/// There's also a third way this structure is used. When using path rather than
+/// hash, a unique hash is still needed, so one is computed based on the path.
+pub const Hash = struct {
+    /// Maximum size of a package hash. Unused bytes at the end are
+    /// filled with zeroes.
+    bytes: [max_len]u8,
+
+    pub const Algo = std.crypto.hash.sha2.Sha256;
+    pub const Digest = [Algo.digest_length]u8;
+
+    /// Example: "nnnn-vvvv-hhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhh"
+    pub const max_len = 32 + 1 + 32 + 1 + (32 + 32 + 200) / 6;
+
+    pub fn fromSlice(s: []const u8) Hash {
+        assert(s.len <= max_len);
+        var result: Hash = undefined;
+        @memcpy(result.bytes[0..s.len], s);
+        @memset(result.bytes[s.len..], 0);
+        return result;
+    }
+
+    pub fn toSlice(ph: *const Hash) []const u8 {
+        var end: usize = ph.bytes.len;
+        while (true) {
+            end -= 1;
+            if (ph.bytes[end] != 0) return ph.bytes[0 .. end + 1];
+        }
+    }
+
+    pub fn eql(a: *const Hash, b: *const Hash) bool {
+        return std.mem.eql(u8, &a.bytes, &b.bytes);
+    }
+
+    /// Distinguishes whether the legacy multihash format is being stored here.
+    pub fn isOld(h: *const Hash) bool {
+        if (h.bytes.len < 2) return false;
+        const their_multihash_func = std.fmt.parseInt(u8, h.bytes[0..2], 16) catch return false;
+        if (@as(MultihashFunction, @enumFromInt(their_multihash_func)) != multihash_function) return false;
+        if (h.toSlice().len != multihash_hex_digest_len) return false;
+        return std.mem.indexOfScalar(u8, &h.bytes, '-') == null;
+    }
+
+    test isOld {
+        const h: Hash = .fromSlice("1220138f4aba0c01e66b68ed9e1e1e74614c06e4743d88bc58af4f1c3dd0aae5fea7");
+        try std.testing.expect(h.isOld());
+    }
+
+    /// Produces "$name-$semver-$hashplus".
+    /// * name is the name field from build.zig.zon, asserted to be at most 32
+    ///   bytes and assumed be a valid zig identifier
+    /// * semver is the version field from build.zig.zon, asserted to be at
+    ///   most 32 bytes
+    /// * hashplus is the following 33-byte array, base64 encoded using -_ to make
+    ///   it filesystem safe:
+    ///   - (4 bytes) LE u32 Package ID
+    ///   - (4 bytes) LE u32 total decompressed size in bytes, overflow saturated
+    ///   - (25 bytes) truncated SHA-256 digest of hashed files of the package
+    pub fn init(digest: Digest, name: []const u8, ver: []const u8, id: u32, size: u32) Hash {
+        assert(name.len <= 32);
+        assert(ver.len <= 32);
+        var result: Hash = undefined;
+        var buf: std.ArrayListUnmanaged(u8) = .initBuffer(&result.bytes);
+        buf.appendSliceAssumeCapacity(name);
+        buf.appendAssumeCapacity('-');
+        buf.appendSliceAssumeCapacity(ver);
+        buf.appendAssumeCapacity('-');
+        var hashplus: [33]u8 = undefined;
+        std.mem.writeInt(u32, hashplus[0..4], id, .little);
+        std.mem.writeInt(u32, hashplus[4..8], size, .little);
+        hashplus[8..].* = digest[0..25].*;
+        _ = std.base64.url_safe_no_pad.Encoder.encode(buf.addManyAsArrayAssumeCapacity(44), &hashplus);
+        @memset(buf.unusedCapacitySlice(), 0);
+        return result;
+    }
+
+    /// Produces a unique hash based on the path provided. The result should
+    /// not be user-visible.
+    pub fn initPath(sub_path: []const u8, is_global: bool) Hash {
+        var result: Hash = .{ .bytes = @splat(0) };
+        var i: usize = 0;
+        if (is_global) {
+            result.bytes[0] = '/';
+            i += 1;
+        }
+        if (i + sub_path.len <= result.bytes.len) {
+            @memcpy(result.bytes[i..][0..sub_path.len], sub_path);
+            return result;
+        }
+        var bin_digest: [Algo.digest_length]u8 = undefined;
+        Algo.hash(sub_path, &bin_digest, .{});
+        _ = std.fmt.bufPrint(result.bytes[i..], "{}", .{std.fmt.fmtSliceHexLower(&bin_digest)}) catch unreachable;
+        return result;
+    }
+};
+
+pub const MultihashFunction = enum(u16) {
+    identity = 0x00,
+    sha1 = 0x11,
+    @"sha2-256" = 0x12,
+    @"sha2-512" = 0x13,
+    @"sha3-512" = 0x14,
+    @"sha3-384" = 0x15,
+    @"sha3-256" = 0x16,
+    @"sha3-224" = 0x17,
+    @"sha2-384" = 0x20,
+    @"sha2-256-trunc254-padded" = 0x1012,
+    @"sha2-224" = 0x1013,
+    @"sha2-512-224" = 0x1014,
+    @"sha2-512-256" = 0x1015,
+    @"blake2b-256" = 0xb220,
+    _,
+};
+
+pub const multihash_function: MultihashFunction = switch (Hash.Algo) {
+    std.crypto.hash.sha2.Sha256 => .@"sha2-256",
+    else => unreachable,
+};
+
+pub fn multiHashHexDigest(digest: Hash.Digest) MultiHashHexDigest {
+    const hex_charset = std.fmt.hex_charset;
+
+    var result: MultiHashHexDigest = undefined;
+
+    result[0] = hex_charset[@intFromEnum(multihash_function) >> 4];
+    result[1] = hex_charset[@intFromEnum(multihash_function) & 15];
+
+    result[2] = hex_charset[Hash.Algo.digest_length >> 4];
+    result[3] = hex_charset[Hash.Algo.digest_length & 15];
+
+    for (digest, 0..) |byte, i| {
+        result[4 + i * 2] = hex_charset[byte >> 4];
+        result[5 + i * 2] = hex_charset[byte & 15];
+    }
+    return result;
+}
+
+comptime {
+    // We avoid unnecessary uleb128 code in hexDigest by asserting here the
+    // values are small enough to be contained in the one-byte encoding.
+    assert(@intFromEnum(multihash_function) < 127);
+    assert(Hash.Algo.digest_length < 127);
+}
+
+test Hash {
+    const example_digest: Hash.Digest = .{
+        0xc7, 0xf5, 0x71, 0xb7, 0xb4, 0xe7, 0x6f, 0x3c, 0xdb, 0x87, 0x7a, 0x7f, 0xdd, 0xf9, 0x77, 0x87,
+        0x9d, 0xd3, 0x86, 0xfa, 0x73, 0x57, 0x9a, 0xf7, 0x9d, 0x1e, 0xdb, 0x8f, 0x3a, 0xd9, 0xbd, 0x9f,
+    };
+    const result: Hash = .init(example_digest, "nasm", "2.16.1-3", 0xcafebabe, 10 * 1024 * 1024);
+    try std.testing.expectEqualStrings("nasm-2.16.1-3-vrr-ygAAoADH9XG3tOdvPNuHen_d-XeHndOG-nNXmved", result.toSlice());
+}
+
 test {
     _ = Fetch;
 }