Description
Zig Version
On 0.9.0-dev.1444+e2a2e6c14
, though should apply to all versions of Zig with std
which include this interface.
Steps to Reproduce
To reproduce this error, an example using the current Random
interface, and a modified copy is provided. These can be benchmarked against each other using a tool such as hyperfine
to show the significant performance difference. (The while loop is used just to increase the number of times we run the function so that the time taken is easily measurable).
Example 1 (Current interface):
const std = @import("std");
pub fn main() !void {
var seed: u64 = undefined;
try std.os.getrandom(std.mem.asBytes(&seed));
var prng = std.rand.DefaultPrng.init(seed);
var rand = &prng.random;
var i: usize = 0;
var r: i32 = undefined;
while (i < 100000000) : (i += 1) {
r = rand.int(i32);
std.mem.doNotOptimizeAway(&r);
}
}
Example 2 (Modified interface):
const std = @import("std");
pub fn main() !void {
var seed: u64 = undefined;
try std.os.getrandom(std.mem.asBytes(&seed));
var prng = std.rand.DefaultPrng.init(seed);
var rand = Random.init(&prng);
var i: usize = 0;
var r: i32 = undefined;
while (i < 100000000) : (i += 1) {
r = rand.int(i32);
std.mem.doNotOptimizeAway(&r);
}
}
// Modified `Random` interface by @SpexGuy
const Random = struct {
self: *c_void,
fill: fn(*c_void, []u8) void,
pub fn init(ptr: anytype) Random {
const Ptr = @TypeOf(ptr);
const gen = struct {
fn fill_erased(erased: *c_void, bytes: []u8) void {
const alignment = @typeInfo(Ptr).Pointer.alignment;
const self = @ptrCast(Ptr, @alignCast(alignment, erased));
self.fill(bytes);
}
};
return .{
.self = ptr,
.fill = gen.fill_erased,
};
}
/// Returns a random int `i` such that `0 <= i <= maxInt(T)`.
/// `i` is evenly distributed.
pub fn int(self: Random, comptime T: type) T {
const bits = @typeInfo(T).Int.bits;
const UnsignedT = std.meta.Int(.unsigned, bits);
const ByteAlignedT = std.meta.Int(.unsigned, @divTrunc(bits + 7, 8) * 8);
var rand_bytes: [@sizeOf(ByteAlignedT)]u8 = undefined;
self.fill(self.self, rand_bytes[0..]);
// use LE instead of native endian for better portability maybe?
// TODO: endian portability is pointless if the underlying prng isn't endian portable.
// TODO: document the endian portability of this library.
const byte_aligned_result = std.mem.readIntSliceLittle(ByteAlignedT, &rand_bytes);
const unsigned_result = @truncate(UnsignedT, byte_aligned_result);
return @bitCast(T, unsigned_result);
}
};
// These functions below are modified from the `Xoshiro256` implementation
fn fill(self: *std.rand.Xoshiro256, buf: []u8) void {
var i: usize = 0;
const aligned_len = buf.len - (buf.len & 7);
// Complete 8 byte segments.
while (i < aligned_len) : (i += 8) {
var n = next(self);
comptime var j: usize = 0;
inline while (j < 8) : (j += 1) {
buf[i + j] = @truncate(u8, n);
n >>= 8;
}
}
// Remaining. (cuts the stream)
if (i != buf.len) {
var n = next(self);
while (i < buf.len) : (i += 1) {
buf[i] = @truncate(u8, n);
n >>= 8;
}
}
}
fn next(self: *std.rand.Xoshiro256) u64 {
const r = std.math.rotl(u64, self.s[0] +% self.s[3], 23) +% self.s[0];
const t = self.s[1] << 17;
self.s[2] ^= self.s[0];
self.s[3] ^= self.s[1];
self.s[1] ^= self.s[2];
self.s[0] ^= self.s[3];
self.s[2] ^= t;
self.s[3] = std.math.rotl(u64, self.s[3], 45);
return r;
}
The examples were built with the command zig build-exe -O ReleaseFast example1.zig && zig build-exe -O ReleaseFast example2.zig
, and benchmarked with the command hyperfine -w 10 ./example{1,2}
.
Behaviour
The poor performance of the current Random
interface has a significant impact on the performance of the random generators in the standard library. Example 2 performs significantly better than both example 1; on my desktop system, example 2 was measured to run at over 2 times the speed of example 1.
cc: @SpexGuy @cryptocode