Skip to content

Commit a28605c

Browse files
committed
std.zig.system: use both PATH and hardcoded locations to find env
Should help systems that have main `env` binary in different location than hardcoded `/usr/bin/env` **during build** (not neccessarily always), like Nix/Guix, Termux, Gentoo Prefix etc. Related: https://www.github.com/ziglang/zig/issues/12156 https://www.github.com/ziglang/zig/issues/14146 https://www.github.com/ziglang/zig/issues/14577 https://www.github.com/ziglang/zig/issues/15898 Source for logic: https://www.github.com/ziglang/zig/issues/14146#issuecomment-2308984936 Signed-off-by: Eric Joldasov <[email protected]>
1 parent 25a9e01 commit a28605c

File tree

1 file changed

+139
-107
lines changed

1 file changed

+139
-107
lines changed

lib/std/zig/system.zig

Lines changed: 139 additions & 107 deletions
Original file line numberDiff line numberDiff line change
@@ -932,6 +932,106 @@ fn glibcVerFromSoFile(file: fs.File) !std.SemanticVersion {
932932
return max_ver;
933933
}
934934

935+
/// This functions tries to open file located at `start_path`, and then guesses
936+
/// whether it is a script or an ELF file.
937+
///
938+
/// If it finds "shebang line", file is considered a script, and logic is re-run
939+
/// using interpreter referenced after "#!" symbols. If interpreter is itself also a script,
940+
/// logic becomes recursive until non-script file is found.
941+
///
942+
/// If it finds ELF magic sequence, file is considered an ELF file and function returns.
943+
fn resolveElfFileRecursively(cwd: fs.Dir, start_path: []const u8) error{UnableToFindElfFile}!fs.File {
944+
var current_path = start_path;
945+
946+
// According to `man 2 execve`:
947+
//
948+
// The kernel imposes a maximum length on the text
949+
// that follows the "#!" characters at the start of a script;
950+
// characters beyond the limit are ignored.
951+
// Before Linux 5.1, the limit is 127 characters.
952+
// Since Linux 5.1, the limit is 255 characters.
953+
//
954+
// Tests show that bash and zsh consider 255 as total limit,
955+
// *including* "#!" characters and ignoring newline.
956+
// For safety, we set max length as 255 + \n (1).
957+
var buffer: [255 + 1]u8 = undefined;
958+
while (true) {
959+
// Interpreter path can be relative on Linux, but
960+
// for simplicity we are asserting it is an absolute path.
961+
assert(std.fs.path.isAbsolute(current_path));
962+
const file = cwd.openFile(current_path, .{}) catch |err| switch (err) {
963+
error.NoSpaceLeft => unreachable,
964+
error.NameTooLong => unreachable,
965+
error.PathAlreadyExists => unreachable,
966+
error.SharingViolation => unreachable,
967+
error.InvalidUtf8 => unreachable, // WASI only
968+
error.InvalidWtf8 => unreachable, // Windows only
969+
error.BadPathName => unreachable,
970+
error.PipeBusy => unreachable,
971+
error.FileLocksNotSupported => unreachable,
972+
error.WouldBlock => unreachable,
973+
error.FileBusy => unreachable, // opened without write permissions
974+
error.AntivirusInterference => unreachable, // Windows-only error
975+
976+
error.IsDir,
977+
error.NotDir,
978+
979+
error.AccessDenied,
980+
error.DeviceBusy,
981+
error.FileTooBig,
982+
error.SymLinkLoop,
983+
error.ProcessFdQuotaExceeded,
984+
error.SystemFdQuotaExceeded,
985+
error.SystemResources,
986+
987+
error.FileNotFound,
988+
error.NetworkNotFound,
989+
error.NoDevice,
990+
error.Unexpected,
991+
=> return error.UnableToFindElfFile,
992+
};
993+
var is_elf_file = false;
994+
defer if (is_elf_file == false) file.close();
995+
996+
// Shortest working interpreter path is "#!/i" (4)
997+
// (interpreter is "/i", assuming all paths are absolute, like in above comment).
998+
// ELF magic number length is also 4.
999+
//
1000+
// If file is shorter than that, it is definitely not ELF file
1001+
// nor file with "shebang" line.
1002+
const min_len = 4;
1003+
1004+
const len = preadAtLeast(file, &buffer, 0, min_len) catch return error.UnableToFindElfFile;
1005+
const content = buffer[0..len];
1006+
1007+
if (mem.eql(u8, content[0..4], std.elf.MAGIC)) {
1008+
// It is very likely ELF file!
1009+
is_elf_file = true;
1010+
return file;
1011+
} else if (mem.eql(u8, content[0..2], "#!")) {
1012+
// We detected shebang, now parse entire line.
1013+
1014+
// Trim leading "#!", spaces and tabs.
1015+
const trimmed_line = mem.trimLeft(u8, content[2..], &.{ ' ', '\t' });
1016+
1017+
// This line can have:
1018+
// * Interpreter path only,
1019+
// * Interpreter path and arguments, all separated by space, tab or NUL character.
1020+
// And optionally newline at the end.
1021+
const path_maybe_args = mem.trimRight(u8, trimmed_line, "\n");
1022+
1023+
// Separate path and args.
1024+
const path_end = mem.indexOfAny(u8, path_maybe_args, &.{ ' ', '\t', 0 }) orelse path_maybe_args.len;
1025+
1026+
current_path = path_maybe_args[0..path_end];
1027+
continue;
1028+
} else {
1029+
// Not a ELF file, not a shell script with "shebang line", invalid duck.
1030+
return error.UnableToFindElfFile;
1031+
}
1032+
}
1033+
}
1034+
9351035
/// In the past, this function attempted to use the executable's own binary if it was dynamically
9361036
/// linked to answer both the C ABI question and the dynamic linker question. However, this
9371037
/// could be problematic on a system that uses a RUNPATH for the compiler binary, locking
@@ -940,11 +1040,14 @@ fn glibcVerFromSoFile(file: fs.File) !std.SemanticVersion {
9401040
/// the dynamic linker will match that of the compiler binary. Executables with these versions
9411041
/// mismatching will fail to run.
9421042
///
943-
/// Therefore, this function works the same regardless of whether the compiler binary is
944-
/// dynamically or statically linked. It inspects `/usr/bin/env` as an ELF file to find the
945-
/// answer to these questions, or if there is a shebang line, then it chases the referenced
946-
/// file recursively. If that does not provide the answer, then the function falls back to
947-
/// defaults.
1043+
/// Therefore, this function now does not inspect the executable's own binary.
1044+
/// Instead, it tries to find `env` program in PATH or in hardcoded location, and uses it
1045+
/// to find suitable ELF file. If `env` program is an executable, work is done and function starts to
1046+
/// inspect inner structure of a file. But if `env` is a script or other non-ELF file, it uses
1047+
/// interpreter path instead and tries to search ELF file again, going recursively in case interpreter
1048+
/// is also a script/non-ELF file.
1049+
///
1050+
/// If nothing was found, then the function falls back to defaults.
9481051
fn detectAbiAndDynamicLinker(
9491052
cpu: Target.Cpu,
9501053
os: Target.Os,
@@ -1004,112 +1107,44 @@ fn detectAbiAndDynamicLinker(
10041107
}
10051108
const ld_info_list = ld_info_list_buffer[0..ld_info_list_len];
10061109

1007-
// Best case scenario: the executable is dynamically linked, and we can iterate
1008-
// over our own shared objects and find a dynamic linker.
1009-
const elf_file = elf_file: {
1010-
// This block looks for a shebang line in /usr/bin/env,
1011-
// if it finds one, then instead of using /usr/bin/env as the ELF file to examine, it uses the file it references instead,
1012-
// doing the same logic recursively in case it finds another shebang line.
1110+
const cwd = std.fs.cwd();
1111+
1112+
// Algorithm is:
1113+
// 1a) try_path: If PATH is non-empty and `env` file was found in one of the directories, use that.
1114+
// 1b) try_path: If `env` was not found or PATH is empty, try hardcoded path below.
1115+
// 2a) try_hardcoded: If `env` was found in hardcoded location, use that.
1116+
// 2b) try_hardcoded: If `env` was not found, fall back to default ABI and dynamic linker.
1117+
// Source: https://github.com/ziglang/zig/issues/14146#issuecomment-2308984936
1118+
const elf_file = (try_path: {
1119+
const PATH = std.posix.getenv("PATH") orelse break :try_path null;
1120+
var it = mem.tokenizeScalar(u8, PATH, fs.path.delimiter);
1121+
1122+
var buf: [std.fs.max_path_bytes + 1]u8 = undefined;
1123+
var fbs: std.heap.FixedBufferAllocator = .init(&buf);
1124+
const allocator = fbs.allocator();
1125+
1126+
while (it.next()) |path| : (fbs.reset()) {
1127+
const start_path = std.fs.path.joinZ(allocator, &.{ path, "env" }) catch |err| switch (err) {
1128+
error.OutOfMemory => continue,
1129+
};
10131130

1014-
var file_name: []const u8 = switch (os.tag) {
1131+
break :try_path resolveElfFileRecursively(cwd, start_path) catch |err| switch (err) {
1132+
error.UnableToFindElfFile => continue,
1133+
};
1134+
} else break :try_path null;
1135+
} orelse try_hardcoded: {
1136+
const hardcoded_file_name = switch (os.tag) {
10151137
// Since /usr/bin/env is hard-coded into the shebang line of many portable scripts, it's a
10161138
// reasonably reliable path to start with.
10171139
else => "/usr/bin/env",
10181140
// Haiku does not have a /usr root directory.
10191141
.haiku => "/bin/env",
10201142
};
10211143

1022-
// According to `man 2 execve`:
1023-
//
1024-
// The kernel imposes a maximum length on the text
1025-
// that follows the "#!" characters at the start of a script;
1026-
// characters beyond the limit are ignored.
1027-
// Before Linux 5.1, the limit is 127 characters.
1028-
// Since Linux 5.1, the limit is 255 characters.
1029-
//
1030-
// Tests show that bash and zsh consider 255 as total limit,
1031-
// *including* "#!" characters and ignoring newline.
1032-
// For safety, we set max length as 255 + \n (1).
1033-
var buffer: [255 + 1]u8 = undefined;
1034-
while (true) {
1035-
// Interpreter path can be relative on Linux, but
1036-
// for simplicity we are asserting it is an absolute path.
1037-
const file = fs.openFileAbsolute(file_name, .{}) catch |err| switch (err) {
1038-
error.NoSpaceLeft => unreachable,
1039-
error.NameTooLong => unreachable,
1040-
error.PathAlreadyExists => unreachable,
1041-
error.SharingViolation => unreachable,
1042-
error.InvalidUtf8 => unreachable, // WASI only
1043-
error.InvalidWtf8 => unreachable, // Windows only
1044-
error.BadPathName => unreachable,
1045-
error.PipeBusy => unreachable,
1046-
error.FileLocksNotSupported => unreachable,
1047-
error.WouldBlock => unreachable,
1048-
error.FileBusy => unreachable, // opened without write permissions
1049-
error.AntivirusInterference => unreachable, // Windows-only error
1050-
1051-
error.IsDir,
1052-
error.NotDir,
1053-
error.AccessDenied,
1054-
error.NoDevice,
1055-
error.FileNotFound,
1056-
error.NetworkNotFound,
1057-
error.FileTooBig,
1058-
error.Unexpected,
1059-
=> |e| {
1060-
std.log.warn("Encountered error: {s}, falling back to default ABI and dynamic linker.", .{@errorName(e)});
1061-
return defaultAbiAndDynamicLinker(cpu, os, query);
1062-
},
1063-
1064-
else => |e| return e,
1065-
};
1066-
var is_elf_file = false;
1067-
defer if (is_elf_file == false) file.close();
1068-
1069-
// Shortest working interpreter path is "#!/i" (4)
1070-
// (interpreter is "/i", assuming all paths are absolute, like in above comment).
1071-
// ELF magic number length is also 4.
1072-
//
1073-
// If file is shorter than that, it is definitely not ELF file
1074-
// nor file with "shebang" line.
1075-
const min_len: usize = 4;
1076-
1077-
const len = preadAtLeast(file, &buffer, 0, min_len) catch |err| switch (err) {
1078-
error.UnexpectedEndOfFile,
1079-
error.UnableToReadElfFile,
1080-
=> return defaultAbiAndDynamicLinker(cpu, os, query),
1081-
1082-
else => |e| return e,
1083-
};
1084-
const content = buffer[0..len];
1085-
1086-
if (mem.eql(u8, content[0..4], std.elf.MAGIC)) {
1087-
// It is very likely ELF file!
1088-
is_elf_file = true;
1089-
break :elf_file file;
1090-
} else if (mem.eql(u8, content[0..2], "#!")) {
1091-
// We detected shebang, now parse entire line.
1092-
1093-
// Trim leading "#!", spaces and tabs.
1094-
const trimmed_line = mem.trimLeft(u8, content[2..], &.{ ' ', '\t' });
1095-
1096-
// This line can have:
1097-
// * Interpreter path only,
1098-
// * Interpreter path and arguments, all separated by space, tab or NUL character.
1099-
// And optionally newline at the end.
1100-
const path_maybe_args = mem.trimRight(u8, trimmed_line, "\n");
1101-
1102-
// Separate path and args.
1103-
const path_end = mem.indexOfAny(u8, path_maybe_args, &.{ ' ', '\t', 0 }) orelse path_maybe_args.len;
1104-
1105-
file_name = path_maybe_args[0..path_end];
1106-
continue;
1107-
} else {
1108-
// Not a ELF file, not a shell script with "shebang line", invalid duck.
1109-
return defaultAbiAndDynamicLinker(cpu, os, query);
1110-
}
1111-
}
1112-
};
1144+
break :try_hardcoded resolveElfFileRecursively(cwd, hardcoded_file_name) catch |err| switch (err) {
1145+
error.UnableToFindElfFile => null,
1146+
};
1147+
}) orelse return defaultAbiAndDynamicLinker(cpu, os, query);
11131148
defer elf_file.close();
11141149

11151150
// TODO: inline this function and combine the buffer we already read above to find
@@ -1132,10 +1167,7 @@ fn detectAbiAndDynamicLinker(
11321167
error.UnexpectedEndOfFile,
11331168
error.NameTooLong,
11341169
// Finally, we fall back on the standard path.
1135-
=> |e| {
1136-
std.log.warn("Encountered error: {s}, falling back to default ABI and dynamic linker.", .{@errorName(e)});
1137-
return defaultAbiAndDynamicLinker(cpu, os, query);
1138-
},
1170+
=> defaultAbiAndDynamicLinker(cpu, os, query),
11391171
};
11401172
}
11411173

0 commit comments

Comments
 (0)