Skip to content

Commit 3bcefa5

Browse files
committed
cmd/link/internal/ld: rewrite LC_UUID for darwin external links
When building Go binaries using external linking, rewrite the LC_UUID Macho load command to replace the content placed there by the external linker, so as to ensure that we get reproducible builds. Updates #64947. Change-Id: I263a89d1a067807404febbc801d4dade33bc3288 Reviewed-on: https://go-review.googlesource.com/c/go/+/586079 LUCI-TryBot-Result: Go LUCI <[email protected]> Reviewed-by: Cherry Mui <[email protected]>
1 parent f9ba2cf commit 3bcefa5

File tree

3 files changed

+154
-19
lines changed

3 files changed

+154
-19
lines changed

src/cmd/link/internal/ld/lib.go

Lines changed: 37 additions & 18 deletions
Original file line numberDiff line numberDiff line change
@@ -1356,6 +1356,8 @@ INSERT AFTER .debug_types;
13561356
return path
13571357
}
13581358

1359+
type machoUpdateFunc func(ctxt *Link, exef *os.File, exem *macho.File, outexe string) error
1360+
13591361
// archive builds a .a archive from the hostobj object files.
13601362
func (ctxt *Link) archive() {
13611363
if ctxt.BuildMode != BuildModeCArchive {
@@ -1969,6 +1971,30 @@ func (ctxt *Link) hostlink() {
19691971
ctxt.Logf("%s", out)
19701972
}
19711973

1974+
// Helper for updating a Macho binary in some way (shared between
1975+
// dwarf combining and UUID update).
1976+
updateMachoOutFile := func(op string, updateFunc machoUpdateFunc) {
1977+
// For os.Rename to work reliably, must be in same directory as outfile.
1978+
rewrittenOutput := *flagOutfile + "~"
1979+
exef, err := os.Open(*flagOutfile)
1980+
if err != nil {
1981+
Exitf("%s: %s failed: %v", os.Args[0], op, err)
1982+
}
1983+
defer exef.Close()
1984+
exem, err := macho.NewFile(exef)
1985+
if err != nil {
1986+
Exitf("%s: parsing Mach-O header failed: %v", os.Args[0], err)
1987+
}
1988+
if err := updateFunc(ctxt, exef, exem, rewrittenOutput); err != nil {
1989+
Exitf("%s: %s failed: %v", os.Args[0], op, err)
1990+
}
1991+
os.Remove(*flagOutfile)
1992+
if err := os.Rename(rewrittenOutput, *flagOutfile); err != nil {
1993+
Exitf("%s: %v", os.Args[0], err)
1994+
}
1995+
}
1996+
1997+
uuidUpdated := false
19721998
if combineDwarf {
19731999
// Find "dsymutils" and "strip" tools using CC --print-prog-name.
19742000
var cc []string
@@ -2028,24 +2054,17 @@ func (ctxt *Link) hostlink() {
20282054
if _, err := os.Stat(dsym); os.IsNotExist(err) {
20292055
return
20302056
}
2031-
// For os.Rename to work reliably, must be in same directory as outfile.
2032-
combinedOutput := *flagOutfile + "~"
2033-
exef, err := os.Open(*flagOutfile)
2034-
if err != nil {
2035-
Exitf("%s: combining dwarf failed: %v", os.Args[0], err)
2036-
}
2037-
defer exef.Close()
2038-
exem, err := macho.NewFile(exef)
2039-
if err != nil {
2040-
Exitf("%s: parsing Mach-O header failed: %v", os.Args[0], err)
2041-
}
2042-
if err := machoCombineDwarf(ctxt, exef, exem, dsym, combinedOutput); err != nil {
2043-
Exitf("%s: combining dwarf failed: %v", os.Args[0], err)
2044-
}
2045-
os.Remove(*flagOutfile)
2046-
if err := os.Rename(combinedOutput, *flagOutfile); err != nil {
2047-
Exitf("%s: %v", os.Args[0], err)
2048-
}
2057+
updateMachoOutFile("combining dwarf",
2058+
func(ctxt *Link, exef *os.File, exem *macho.File, outexe string) error {
2059+
return machoCombineDwarf(ctxt, exef, exem, dsym, outexe)
2060+
})
2061+
uuidUpdated = true
2062+
}
2063+
if ctxt.IsDarwin() && !uuidUpdated && *flagBuildid != "" {
2064+
updateMachoOutFile("rewriting uuid",
2065+
func(ctxt *Link, exef *os.File, exem *macho.File, outexe string) error {
2066+
return machoRewriteUuid(ctxt, exef, exem, outexe)
2067+
})
20492068
}
20502069
if ctxt.NeedCodeSign() {
20512070
err := machoCodeSign(ctxt, *flagOutfile)

src/cmd/link/internal/ld/macho_combine_dwarf.go

Lines changed: 14 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -44,6 +44,12 @@ type encryptionInfoCmd struct {
4444
CryptId uint32
4545
}
4646

47+
type uuidCmd struct {
48+
Cmd macho.LoadCmd
49+
Len uint32
50+
Uuid [16]byte
51+
}
52+
4753
type loadCmdReader struct {
4854
offset, next int64
4955
f *os.File
@@ -227,8 +233,15 @@ func machoCombineDwarf(ctxt *Link, exef *os.File, exem *macho.File, dsym, outexe
227233
err = machoUpdateLoadCommand(reader, linkseg, linkoffset, &linkEditDataCmd{}, "DataOff")
228234
case LC_ENCRYPTION_INFO, LC_ENCRYPTION_INFO_64:
229235
err = machoUpdateLoadCommand(reader, linkseg, linkoffset, &encryptionInfoCmd{}, "CryptOff")
236+
case LC_UUID:
237+
var u uuidCmd
238+
err = reader.ReadAt(0, &u)
239+
if err == nil {
240+
copy(u.Uuid[:], uuidFromGoBuildId(*flagBuildid))
241+
err = reader.WriteAt(0, &u)
242+
}
230243
case macho.LoadCmdDylib, macho.LoadCmdThread, macho.LoadCmdUnixThread,
231-
LC_PREBOUND_DYLIB, LC_UUID, LC_VERSION_MIN_MACOSX, LC_VERSION_MIN_IPHONEOS, LC_SOURCE_VERSION,
244+
LC_PREBOUND_DYLIB, LC_VERSION_MIN_MACOSX, LC_VERSION_MIN_IPHONEOS, LC_SOURCE_VERSION,
232245
LC_MAIN, LC_LOAD_DYLINKER, LC_LOAD_WEAK_DYLIB, LC_REEXPORT_DYLIB, LC_RPATH, LC_ID_DYLIB,
233246
LC_SYMSEG, LC_LOADFVMLIB, LC_IDFVMLIB, LC_IDENT, LC_FVMFILE, LC_PREPAGE, LC_ID_DYLINKER,
234247
LC_ROUTINES, LC_SUB_FRAMEWORK, LC_SUB_UMBRELLA, LC_SUB_CLIENT, LC_SUB_LIBRARY, LC_TWOLEVEL_HINTS,
Lines changed: 103 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,103 @@
1+
// Copyright 2024 The Go Authors. All rights reserved.
2+
// Use of this source code is governed by a BSD-style
3+
// license that can be found in the LICENSE file.
4+
5+
package ld
6+
7+
// This file provides helper functions for updating/rewriting the UUID
8+
// load command within a Go go binary generated on Darwin using
9+
// external linking. Why is it necessary to update the UUID load
10+
// command? See issue #64947 for more detail, but the short answer is
11+
// that newer versions of the Macos toolchain (the newer linker in
12+
// particular) appear to compute the UUID based not just on the
13+
// content of the object files being linked but also on things like
14+
// the timestamps/paths of the objects; this makes it
15+
// difficult/impossible to support reproducible builds. Since we try
16+
// hard to maintain build reproducibility for Go, the APIs here
17+
// compute a new UUID (based on the Go build ID) and write it to the
18+
// final executable generated by the external linker.
19+
20+
import (
21+
"cmd/internal/notsha256"
22+
"debug/macho"
23+
"io"
24+
"os"
25+
"unsafe"
26+
)
27+
28+
// uuidFromGoBuildId hashes the Go build ID and returns a slice of 16
29+
// bytes suitable for use as the payload in a Macho LC_UUID load
30+
// command.
31+
func uuidFromGoBuildId(buildID string) []byte {
32+
if buildID == "" {
33+
return make([]byte, 16)
34+
}
35+
hashedBuildID := notsha256.Sum256([]byte(buildID))
36+
rv := hashedBuildID[:16]
37+
38+
// RFC 4122 conformance (see RFC 4122 Sections 4.2.2, 4.1.3). We
39+
// want the "version" of this UUID to appear as 'hashed' as opposed
40+
// to random or time-based. This is something of a fiction since
41+
// we're not actually hashing using MD5 or SHA1, but it seems better
42+
// to use this UUID flavor than any of the others. This is similar
43+
// to how other linkers handle this (for example this code in lld:
44+
// https://github.com/llvm/llvm-project/blob/2a3a79ce4c2149d7787d56f9841b66cacc9061d0/lld/MachO/Writer.cpp#L524).
45+
rv[6] &= 0xcf
46+
rv[6] |= 0x30
47+
rv[8] &= 0x3f
48+
rv[8] |= 0xc0
49+
50+
return rv
51+
}
52+
53+
// machoRewriteUuid copies over the contents of the Macho executable
54+
// exef into the output file outexe, and in the process updates the
55+
// LC_UUID command to a new value recomputed from the Go build id.
56+
func machoRewriteUuid(ctxt *Link, exef *os.File, exem *macho.File, outexe string) error {
57+
outf, err := os.OpenFile(outexe, os.O_RDWR|os.O_CREATE|os.O_TRUNC, 0755)
58+
if err != nil {
59+
return err
60+
}
61+
defer outf.Close()
62+
63+
// Copy over the file.
64+
if _, err := io.Copy(outf, exef); err != nil {
65+
return err
66+
}
67+
68+
// Locate the portion of the binary containing the load commands.
69+
cmdOffset := unsafe.Sizeof(exem.FileHeader)
70+
if is64bit := exem.Magic == macho.Magic64; is64bit {
71+
// mach_header_64 has one extra uint32.
72+
cmdOffset += unsafe.Sizeof(exem.Magic)
73+
}
74+
if _, err := outf.Seek(int64(cmdOffset), 0); err != nil {
75+
return err
76+
}
77+
78+
// Read the load commands, looking for the LC_UUID cmd. If/when we
79+
// locate it, overwrite it with a new value produced by
80+
// uuidFromGoBuildId.
81+
reader := loadCmdReader{next: int64(cmdOffset),
82+
f: outf, order: exem.ByteOrder}
83+
for i := uint32(0); i < exem.Ncmd; i++ {
84+
cmd, err := reader.Next()
85+
if err != nil {
86+
return err
87+
}
88+
if cmd.Cmd == LC_UUID {
89+
var u uuidCmd
90+
if err := reader.ReadAt(0, &u); err != nil {
91+
return err
92+
}
93+
copy(u.Uuid[:], uuidFromGoBuildId(*flagBuildid))
94+
if err := reader.WriteAt(0, &u); err != nil {
95+
return err
96+
}
97+
break
98+
}
99+
}
100+
101+
// We're done
102+
return nil
103+
}

0 commit comments

Comments
 (0)