Skip to content

Commit fa1e05c

Browse files
committed
std.zig.system: use both PATH and hardcoded locations to find env
Should help systems that have main `env` binary in different location than hardcoded `/usr/bin/env` **during build** (not neccessarily always), like Nix/Guix, Termux, Gentoo Prefix etc. Related: https://www.github.com/ziglang/zig/issues/12156 https://www.github.com/ziglang/zig/issues/14146 https://www.github.com/ziglang/zig/issues/14577 https://www.github.com/ziglang/zig/issues/15898 Source for logic: https://www.github.com/ziglang/zig/issues/14146#issuecomment-2308984936 Signed-off-by: Eric Joldasov <[email protected]>
1 parent 4a62e0d commit fa1e05c

File tree

1 file changed

+139
-108
lines changed

1 file changed

+139
-108
lines changed

lib/std/zig/system.zig

Lines changed: 139 additions & 108 deletions
Original file line numberDiff line numberDiff line change
@@ -985,6 +985,106 @@ fn glibcVerFromSoFile(file: fs.File) !std.SemanticVersion {
985985
return max_ver;
986986
}
987987

988+
/// This functions tries to open file located at `start_path`, and then guesses
989+
/// whether it is a script or an ELF file.
990+
///
991+
/// If it finds "shebang line", file is considered a script, and logic is re-run
992+
/// using interpreter referenced after "#!" symbols. If interpreter is itself also a script,
993+
/// logic becomes recursive until non-script file is found.
994+
///
995+
/// If it finds ELF magic sequence, file is considered an ELF file and function returns.
996+
fn resolveElfFileRecursively(cwd: fs.Dir, start_path: []const u8) error{UnableToFindElfFile}!fs.File {
997+
var current_path = start_path;
998+
999+
// According to `man 2 execve`:
1000+
//
1001+
// The kernel imposes a maximum length on the text
1002+
// that follows the "#!" characters at the start of a script;
1003+
// characters beyond the limit are ignored.
1004+
// Before Linux 5.1, the limit is 127 characters.
1005+
// Since Linux 5.1, the limit is 255 characters.
1006+
//
1007+
// Tests show that bash and zsh consider 255 as total limit,
1008+
// *including* "#!" characters and ignoring newline.
1009+
// For safety, we set max length as 255 + \n (1).
1010+
var buffer: [255 + 1]u8 = undefined;
1011+
while (true) {
1012+
// Interpreter path can be relative on Linux, but
1013+
// for simplicity we are asserting it is an absolute path.
1014+
assert(std.fs.path.isAbsolute(current_path));
1015+
const file = cwd.openFile(current_path, .{}) catch |err| switch (err) {
1016+
error.NoSpaceLeft => unreachable,
1017+
error.NameTooLong => unreachable,
1018+
error.PathAlreadyExists => unreachable,
1019+
error.SharingViolation => unreachable,
1020+
error.InvalidUtf8 => unreachable, // WASI only
1021+
error.InvalidWtf8 => unreachable, // Windows only
1022+
error.BadPathName => unreachable,
1023+
error.PipeBusy => unreachable,
1024+
error.FileLocksNotSupported => unreachable,
1025+
error.WouldBlock => unreachable,
1026+
error.FileBusy => unreachable, // opened without write permissions
1027+
error.AntivirusInterference => unreachable, // Windows-only error
1028+
1029+
error.IsDir,
1030+
error.NotDir,
1031+
1032+
error.AccessDenied,
1033+
error.DeviceBusy,
1034+
error.FileTooBig,
1035+
error.SymLinkLoop,
1036+
error.ProcessFdQuotaExceeded,
1037+
error.SystemFdQuotaExceeded,
1038+
error.SystemResources,
1039+
1040+
error.FileNotFound,
1041+
error.NetworkNotFound,
1042+
error.NoDevice,
1043+
error.Unexpected,
1044+
=> return error.UnableToFindElfFile,
1045+
};
1046+
var is_elf_file = false;
1047+
defer if (is_elf_file == false) file.close();
1048+
1049+
// Shortest working interpreter path is "#!/i" (4)
1050+
// (interpreter is "/i", assuming all paths are absolute, like in above comment).
1051+
// ELF magic number length is also 4.
1052+
//
1053+
// If file is shorter than that, it is definitely not ELF file
1054+
// nor file with "shebang" line.
1055+
const min_len = 4;
1056+
1057+
const len = preadAtLeast(file, &buffer, 0, min_len) catch return error.UnableToFindElfFile;
1058+
const content = buffer[0..len];
1059+
1060+
if (mem.eql(u8, content[0..4], std.elf.MAGIC)) {
1061+
// It is very likely ELF file!
1062+
is_elf_file = true;
1063+
return file;
1064+
} else if (mem.eql(u8, content[0..2], "#!")) {
1065+
// We detected shebang, now parse entire line.
1066+
1067+
// Trim leading "#!", spaces and tabs.
1068+
const trimmed_line = mem.trimLeft(u8, content[2..], &.{ ' ', '\t' });
1069+
1070+
// This line can have:
1071+
// * Interpreter path only,
1072+
// * Interpreter path and arguments, all separated by space, tab or NUL character.
1073+
// And optionally newline at the end.
1074+
const path_maybe_args = mem.trimRight(u8, trimmed_line, "\n");
1075+
1076+
// Separate path and args.
1077+
const path_end = mem.indexOfAny(u8, path_maybe_args, &.{ ' ', '\t', 0 }) orelse path_maybe_args.len;
1078+
1079+
current_path = path_maybe_args[0..path_end];
1080+
continue;
1081+
} else {
1082+
// Not a ELF file, not a shell script with "shebang line", invalid duck.
1083+
return error.UnableToFindElfFile;
1084+
}
1085+
}
1086+
}
1087+
9881088
/// In the past, this function attempted to use the executable's own binary if it was dynamically
9891089
/// linked to answer both the C ABI question and the dynamic linker question. However, this
9901090
/// could be problematic on a system that uses a RUNPATH for the compiler binary, locking
@@ -993,11 +1093,14 @@ fn glibcVerFromSoFile(file: fs.File) !std.SemanticVersion {
9931093
/// the dynamic linker will match that of the compiler binary. Executables with these versions
9941094
/// mismatching will fail to run.
9951095
///
996-
/// Therefore, this function works the same regardless of whether the compiler binary is
997-
/// dynamically or statically linked. It inspects `/usr/bin/env` as an ELF file to find the
998-
/// answer to these questions, or if there is a shebang line, then it chases the referenced
999-
/// file recursively. If that does not provide the answer, then the function falls back to
1000-
/// defaults.
1096+
/// Therefore, this function now does not inspect the executable's own binary.
1097+
/// Instead, it tries to find `env` program in PATH or in hardcoded location, and uses it
1098+
/// to find suitable ELF file. If `env` program is an executable, work is done and function starts to
1099+
/// inspect inner structure of a file. But if `env` is a script or other non-ELF file, it uses
1100+
/// interpreter path instead and tries to search ELF file again, going recursively in case interpreter
1101+
/// is also a script/non-ELF file.
1102+
///
1103+
/// If nothing was found, then the function falls back to defaults.
10011104
fn detectAbiAndDynamicLinker(
10021105
cpu: Target.Cpu,
10031106
os: Target.Os,
@@ -1065,113 +1168,44 @@ fn detectAbiAndDynamicLinker(
10651168

10661169
const ld_info_list = ld_info_list_buffer[0..ld_info_list_len];
10671170

1068-
// Best case scenario: the executable is dynamically linked, and we can iterate
1069-
// over our own shared objects and find a dynamic linker.
1070-
const elf_file = elf_file: {
1071-
// This block looks for a shebang line in /usr/bin/env,
1072-
// if it finds one, then instead of using /usr/bin/env as the ELF file to examine, it uses the file it references instead,
1073-
// doing the same logic recursively in case it finds another shebang line.
1171+
const cwd = std.fs.cwd();
1172+
1173+
// Algorithm is:
1174+
// 1a) try_path: If PATH is non-empty and `env` file was found in one of the directories, use that.
1175+
// 1b) try_path: If `env` was not found or PATH is empty, try hardcoded path below.
1176+
// 2a) try_hardcoded: If `env` was found in hardcoded location, use that.
1177+
// 2b) try_hardcoded: If `env` was not found, fall back to default ABI and dynamic linker.
1178+
// Source: https://github.com/ziglang/zig/issues/14146#issuecomment-2308984936
1179+
const elf_file = (try_path: {
1180+
const PATH = std.posix.getenv("PATH") orelse break :try_path null;
1181+
var it = mem.tokenizeScalar(u8, PATH, fs.path.delimiter);
1182+
1183+
var buf: [std.fs.max_path_bytes + 1]u8 = undefined;
1184+
var fbs: std.heap.FixedBufferAllocator = .init(&buf);
1185+
const allocator = fbs.allocator();
1186+
1187+
while (it.next()) |path| : (fbs.reset()) {
1188+
const start_path = std.fs.path.joinZ(allocator, &.{ path, "env" }) catch |err| switch (err) {
1189+
error.OutOfMemory => continue,
1190+
};
10741191

1075-
var file_name: []const u8 = switch (os.tag) {
1192+
break :try_path resolveElfFileRecursively(cwd, start_path) catch |err| switch (err) {
1193+
error.UnableToFindElfFile => continue,
1194+
};
1195+
} else break :try_path null;
1196+
} orelse try_hardcoded: {
1197+
const hardcoded_file_name = switch (os.tag) {
10761198
// Since /usr/bin/env is hard-coded into the shebang line of many portable scripts, it's a
10771199
// reasonably reliable path to start with.
10781200
else => "/usr/bin/env",
10791201
// Haiku does not have a /usr root directory.
10801202
.haiku => "/bin/env",
10811203
};
10821204

1083-
// According to `man 2 execve`:
1084-
//
1085-
// The kernel imposes a maximum length on the text
1086-
// that follows the "#!" characters at the start of a script;
1087-
// characters beyond the limit are ignored.
1088-
// Before Linux 5.1, the limit is 127 characters.
1089-
// Since Linux 5.1, the limit is 255 characters.
1090-
//
1091-
// Tests show that bash and zsh consider 255 as total limit,
1092-
// *including* "#!" characters and ignoring newline.
1093-
// For safety, we set max length as 255 + \n (1).
1094-
var buffer: [255 + 1]u8 = undefined;
1095-
while (true) {
1096-
// Interpreter path can be relative on Linux, but
1097-
// for simplicity we are asserting it is an absolute path.
1098-
const file = fs.openFileAbsolute(file_name, .{}) catch |err| switch (err) {
1099-
error.NoSpaceLeft => unreachable,
1100-
error.NameTooLong => unreachable,
1101-
error.PathAlreadyExists => unreachable,
1102-
error.SharingViolation => unreachable,
1103-
error.InvalidUtf8 => unreachable, // WASI only
1104-
error.InvalidWtf8 => unreachable, // Windows only
1105-
error.BadPathName => unreachable,
1106-
error.PipeBusy => unreachable,
1107-
error.FileLocksNotSupported => unreachable,
1108-
error.WouldBlock => unreachable,
1109-
error.FileBusy => unreachable, // opened without write permissions
1110-
error.AntivirusInterference => unreachable, // Windows-only error
1111-
1112-
error.IsDir,
1113-
error.NotDir,
1114-
error.AccessDenied,
1115-
error.NoDevice,
1116-
error.FileNotFound,
1117-
error.NetworkNotFound,
1118-
error.FileTooBig,
1119-
error.Unexpected,
1120-
=> |e| {
1121-
std.log.warn("Encountered error: {s}, falling back to default ABI and dynamic linker.", .{@errorName(e)});
1122-
return defaultAbiAndDynamicLinker(cpu, os, query);
1123-
},
1124-
1125-
else => |e| return e,
1126-
};
1127-
var is_elf_file = false;
1128-
defer if (is_elf_file == false) file.close();
1129-
1130-
// Shortest working interpreter path is "#!/i" (4)
1131-
// (interpreter is "/i", assuming all paths are absolute, like in above comment).
1132-
// ELF magic number length is also 4.
1133-
//
1134-
// If file is shorter than that, it is definitely not ELF file
1135-
// nor file with "shebang" line.
1136-
const min_len: usize = 4;
1137-
1138-
const len = preadAtLeast(file, &buffer, 0, min_len) catch |err| switch (err) {
1139-
error.UnexpectedEndOfFile,
1140-
error.UnableToReadElfFile,
1141-
error.ProcessNotFound,
1142-
=> return defaultAbiAndDynamicLinker(cpu, os, query),
1143-
1144-
else => |e| return e,
1145-
};
1146-
const content = buffer[0..len];
1147-
1148-
if (mem.eql(u8, content[0..4], std.elf.MAGIC)) {
1149-
// It is very likely ELF file!
1150-
is_elf_file = true;
1151-
break :elf_file file;
1152-
} else if (mem.eql(u8, content[0..2], "#!")) {
1153-
// We detected shebang, now parse entire line.
1154-
1155-
// Trim leading "#!", spaces and tabs.
1156-
const trimmed_line = mem.trimLeft(u8, content[2..], &.{ ' ', '\t' });
1157-
1158-
// This line can have:
1159-
// * Interpreter path only,
1160-
// * Interpreter path and arguments, all separated by space, tab or NUL character.
1161-
// And optionally newline at the end.
1162-
const path_maybe_args = mem.trimRight(u8, trimmed_line, "\n");
1163-
1164-
// Separate path and args.
1165-
const path_end = mem.indexOfAny(u8, path_maybe_args, &.{ ' ', '\t', 0 }) orelse path_maybe_args.len;
1166-
1167-
file_name = path_maybe_args[0..path_end];
1168-
continue;
1169-
} else {
1170-
// Not a ELF file, not a shell script with "shebang line", invalid duck.
1171-
return defaultAbiAndDynamicLinker(cpu, os, query);
1172-
}
1173-
}
1174-
};
1205+
break :try_hardcoded resolveElfFileRecursively(cwd, hardcoded_file_name) catch |err| switch (err) {
1206+
error.UnableToFindElfFile => null,
1207+
};
1208+
}) orelse return defaultAbiAndDynamicLinker(cpu, os, query);
11751209
defer elf_file.close();
11761210

11771211
// TODO: inline this function and combine the buffer we already read above to find
@@ -1195,10 +1229,7 @@ fn detectAbiAndDynamicLinker(
11951229
error.UnexpectedEndOfFile,
11961230
error.NameTooLong,
11971231
// Finally, we fall back on the standard path.
1198-
=> |e| {
1199-
std.log.warn("Encountered error: {s}, falling back to default ABI and dynamic linker.", .{@errorName(e)});
1200-
return defaultAbiAndDynamicLinker(cpu, os, query);
1201-
},
1232+
=> defaultAbiAndDynamicLinker(cpu, os, query),
12021233
};
12031234
}
12041235

0 commit comments

Comments
 (0)