Skip to content

std.zig.system: use both PATH and hardcoded locations to find env #21540

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 2 commits into from
Jan 21, 2025
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
8 changes: 2 additions & 6 deletions lib/std/fs/path.zig
Original file line number Diff line number Diff line change
Expand Up @@ -90,9 +90,7 @@ fn joinSepMaybeZ(allocator: Allocator, separator: u8, comptime sepPredicate: fn
var sum: usize = paths[first_path_index].len;
var prev_path = paths[first_path_index];
assert(prev_path.len > 0);
var i: usize = first_path_index + 1;
while (i < paths.len) : (i += 1) {
const this_path = paths[i];
for (paths[first_path_index + 1 ..]) |this_path| {
if (this_path.len == 0) continue;
const prev_sep = sepPredicate(prev_path[prev_path.len - 1]);
const this_sep = sepPredicate(this_path[0]);
Expand All @@ -112,9 +110,7 @@ fn joinSepMaybeZ(allocator: Allocator, separator: u8, comptime sepPredicate: fn
var buf_index: usize = paths[first_path_index].len;
var prev_path = paths[first_path_index];
assert(prev_path.len > 0);
var i: usize = first_path_index + 1;
while (i < paths.len) : (i += 1) {
const this_path = paths[i];
for (paths[first_path_index + 1 ..]) |this_path| {
if (this_path.len == 0) continue;
const prev_sep = sepPredicate(prev_path[prev_path.len - 1]);
const this_sep = sepPredicate(this_path[0]);
Expand Down
247 changes: 139 additions & 108 deletions lib/std/zig/system.zig
Original file line number Diff line number Diff line change
Expand Up @@ -995,6 +995,106 @@ fn glibcVerFromSoFile(file: fs.File) !std.SemanticVersion {
return max_ver;
}

/// This functions tries to open file located at `start_path`, and then guesses
/// whether it is a script or an ELF file.
///
/// If it finds "shebang line", file is considered a script, and logic is re-run
/// using interpreter referenced after "#!" symbols. If interpreter is itself also a script,
/// logic becomes recursive until non-script file is found.
///
/// If it finds ELF magic sequence, file is considered an ELF file and function returns.
fn resolveElfFileRecursively(cwd: fs.Dir, start_path: []const u8) error{UnableToFindElfFile}!fs.File {
var current_path = start_path;

// According to `man 2 execve`:
//
// The kernel imposes a maximum length on the text
// that follows the "#!" characters at the start of a script;
// characters beyond the limit are ignored.
// Before Linux 5.1, the limit is 127 characters.
// Since Linux 5.1, the limit is 255 characters.
//
// Tests show that bash and zsh consider 255 as total limit,
// *including* "#!" characters and ignoring newline.
// For safety, we set max length as 255 + \n (1).
var buffer: [255 + 1]u8 = undefined;
while (true) {
// Interpreter path can be relative on Linux, but
// for simplicity we are asserting it is an absolute path.
assert(std.fs.path.isAbsolute(current_path));
const file = cwd.openFile(current_path, .{}) catch |err| switch (err) {
error.NoSpaceLeft => unreachable,
error.NameTooLong => unreachable,
error.PathAlreadyExists => unreachable,
error.SharingViolation => unreachable,
error.InvalidUtf8 => unreachable, // WASI only
error.InvalidWtf8 => unreachable, // Windows only
error.BadPathName => unreachable,
error.PipeBusy => unreachable,
error.FileLocksNotSupported => unreachable,
error.WouldBlock => unreachable,
error.FileBusy => unreachable, // opened without write permissions
error.AntivirusInterference => unreachable, // Windows-only error

error.IsDir,
error.NotDir,

error.AccessDenied,
error.DeviceBusy,
error.FileTooBig,
error.SymLinkLoop,
error.ProcessFdQuotaExceeded,
error.SystemFdQuotaExceeded,
error.SystemResources,

error.FileNotFound,
error.NetworkNotFound,
error.NoDevice,
error.Unexpected,
=> return error.UnableToFindElfFile,
};
var is_elf_file = false;
defer if (is_elf_file == false) file.close();

// Shortest working interpreter path is "#!/i" (4)
// (interpreter is "/i", assuming all paths are absolute, like in above comment).
// ELF magic number length is also 4.
//
// If file is shorter than that, it is definitely not ELF file
// nor file with "shebang" line.
const min_len = 4;

const len = preadAtLeast(file, &buffer, 0, min_len) catch return error.UnableToFindElfFile;
const content = buffer[0..len];

if (mem.eql(u8, content[0..4], std.elf.MAGIC)) {
// It is very likely ELF file!
is_elf_file = true;
return file;
} else if (mem.eql(u8, content[0..2], "#!")) {
// We detected shebang, now parse entire line.

// Trim leading "#!", spaces and tabs.
const trimmed_line = mem.trimLeft(u8, content[2..], &.{ ' ', '\t' });

// This line can have:
// * Interpreter path only,
// * Interpreter path and arguments, all separated by space, tab or NUL character.
// And optionally newline at the end.
const path_maybe_args = mem.trimRight(u8, trimmed_line, "\n");

// Separate path and args.
const path_end = mem.indexOfAny(u8, path_maybe_args, &.{ ' ', '\t', 0 }) orelse path_maybe_args.len;

current_path = path_maybe_args[0..path_end];
continue;
} else {
// Not a ELF file, not a shell script with "shebang line", invalid duck.
return error.UnableToFindElfFile;
}
}
}

/// In the past, this function attempted to use the executable's own binary if it was dynamically
/// linked to answer both the C ABI question and the dynamic linker question. However, this
/// could be problematic on a system that uses a RUNPATH for the compiler binary, locking
Expand All @@ -1003,11 +1103,14 @@ fn glibcVerFromSoFile(file: fs.File) !std.SemanticVersion {
/// the dynamic linker will match that of the compiler binary. Executables with these versions
/// mismatching will fail to run.
///
/// Therefore, this function works the same regardless of whether the compiler binary is
/// dynamically or statically linked. It inspects `/usr/bin/env` as an ELF file to find the
/// answer to these questions, or if there is a shebang line, then it chases the referenced
/// file recursively. If that does not provide the answer, then the function falls back to
/// defaults.
/// Therefore, this function now does not inspect the executable's own binary.
/// Instead, it tries to find `env` program in PATH or in hardcoded location, and uses it
/// to find suitable ELF file. If `env` program is an executable, work is done and function starts to
/// inspect inner structure of a file. But if `env` is a script or other non-ELF file, it uses
/// interpreter path instead and tries to search ELF file again, going recursively in case interpreter
/// is also a script/non-ELF file.
///
/// If nothing was found, then the function falls back to defaults.
fn detectAbiAndDynamicLinker(
cpu: Target.Cpu,
os: Target.Os,
Expand Down Expand Up @@ -1075,113 +1178,44 @@ fn detectAbiAndDynamicLinker(

const ld_info_list = ld_info_list_buffer[0..ld_info_list_len];

// Best case scenario: the executable is dynamically linked, and we can iterate
// over our own shared objects and find a dynamic linker.
const elf_file = elf_file: {
// This block looks for a shebang line in /usr/bin/env,
// if it finds one, then instead of using /usr/bin/env as the ELF file to examine, it uses the file it references instead,
// doing the same logic recursively in case it finds another shebang line.
const cwd = std.fs.cwd();

// Algorithm is:
// 1a) try_path: If PATH is non-empty and `env` file was found in one of the directories, use that.
// 1b) try_path: If `env` was not found or PATH is empty, try hardcoded path below.
// 2a) try_hardcoded: If `env` was found in hardcoded location, use that.
// 2b) try_hardcoded: If `env` was not found, fall back to default ABI and dynamic linker.
// Source: https://github.com/ziglang/zig/issues/14146#issuecomment-2308984936
const elf_file = (try_path: {
const PATH = std.posix.getenv("PATH") orelse break :try_path null;
var it = mem.tokenizeScalar(u8, PATH, fs.path.delimiter);

var buf: [std.fs.max_path_bytes + 1]u8 = undefined;
var fbs: std.heap.FixedBufferAllocator = .init(&buf);
const allocator = fbs.allocator();

while (it.next()) |path| : (fbs.reset()) {
const start_path = std.fs.path.joinZ(allocator, &.{ path, "env" }) catch |err| switch (err) {
error.OutOfMemory => continue,
};

var file_name: []const u8 = switch (os.tag) {
break :try_path resolveElfFileRecursively(cwd, start_path) catch |err| switch (err) {
error.UnableToFindElfFile => continue,
};
} else break :try_path null;
} orelse try_hardcoded: {
const hardcoded_file_name = switch (os.tag) {
// Since /usr/bin/env is hard-coded into the shebang line of many portable scripts, it's a
// reasonably reliable path to start with.
else => "/usr/bin/env",
// Haiku does not have a /usr root directory.
.haiku => "/bin/env",
};

// According to `man 2 execve`:
//
// The kernel imposes a maximum length on the text
// that follows the "#!" characters at the start of a script;
// characters beyond the limit are ignored.
// Before Linux 5.1, the limit is 127 characters.
// Since Linux 5.1, the limit is 255 characters.
//
// Tests show that bash and zsh consider 255 as total limit,
// *including* "#!" characters and ignoring newline.
// For safety, we set max length as 255 + \n (1).
var buffer: [255 + 1]u8 = undefined;
while (true) {
// Interpreter path can be relative on Linux, but
// for simplicity we are asserting it is an absolute path.
const file = fs.openFileAbsolute(file_name, .{}) catch |err| switch (err) {
error.NoSpaceLeft => unreachable,
error.NameTooLong => unreachable,
error.PathAlreadyExists => unreachable,
error.SharingViolation => unreachable,
error.InvalidUtf8 => unreachable, // WASI only
error.InvalidWtf8 => unreachable, // Windows only
error.BadPathName => unreachable,
error.PipeBusy => unreachable,
error.FileLocksNotSupported => unreachable,
error.WouldBlock => unreachable,
error.FileBusy => unreachable, // opened without write permissions
error.AntivirusInterference => unreachable, // Windows-only error

error.IsDir,
error.NotDir,
error.AccessDenied,
error.NoDevice,
error.FileNotFound,
error.NetworkNotFound,
error.FileTooBig,
error.Unexpected,
=> |e| {
std.log.warn("Encountered error: {s}, falling back to default ABI and dynamic linker.", .{@errorName(e)});
return defaultAbiAndDynamicLinker(cpu, os, query);
},

else => |e| return e,
};
var is_elf_file = false;
defer if (is_elf_file == false) file.close();

// Shortest working interpreter path is "#!/i" (4)
// (interpreter is "/i", assuming all paths are absolute, like in above comment).
// ELF magic number length is also 4.
//
// If file is shorter than that, it is definitely not ELF file
// nor file with "shebang" line.
const min_len: usize = 4;

const len = preadAtLeast(file, &buffer, 0, min_len) catch |err| switch (err) {
error.UnexpectedEndOfFile,
error.UnableToReadElfFile,
error.ProcessNotFound,
=> return defaultAbiAndDynamicLinker(cpu, os, query),

else => |e| return e,
};
const content = buffer[0..len];

if (mem.eql(u8, content[0..4], std.elf.MAGIC)) {
// It is very likely ELF file!
is_elf_file = true;
break :elf_file file;
} else if (mem.eql(u8, content[0..2], "#!")) {
// We detected shebang, now parse entire line.

// Trim leading "#!", spaces and tabs.
const trimmed_line = mem.trimLeft(u8, content[2..], &.{ ' ', '\t' });

// This line can have:
// * Interpreter path only,
// * Interpreter path and arguments, all separated by space, tab or NUL character.
// And optionally newline at the end.
const path_maybe_args = mem.trimRight(u8, trimmed_line, "\n");

// Separate path and args.
const path_end = mem.indexOfAny(u8, path_maybe_args, &.{ ' ', '\t', 0 }) orelse path_maybe_args.len;

file_name = path_maybe_args[0..path_end];
continue;
} else {
// Not a ELF file, not a shell script with "shebang line", invalid duck.
return defaultAbiAndDynamicLinker(cpu, os, query);
}
}
};
break :try_hardcoded resolveElfFileRecursively(cwd, hardcoded_file_name) catch |err| switch (err) {
error.UnableToFindElfFile => null,
};
}) orelse return defaultAbiAndDynamicLinker(cpu, os, query);
defer elf_file.close();

// TODO: inline this function and combine the buffer we already read above to find
Expand All @@ -1205,10 +1239,7 @@ fn detectAbiAndDynamicLinker(
error.UnexpectedEndOfFile,
error.NameTooLong,
// Finally, we fall back on the standard path.
=> |e| {
std.log.warn("Encountered error: {s}, falling back to default ABI and dynamic linker.", .{@errorName(e)});
return defaultAbiAndDynamicLinker(cpu, os, query);
},
=> defaultAbiAndDynamicLinker(cpu, os, query),
};
}

Expand Down
Loading