-
-
Notifications
You must be signed in to change notification settings - Fork 2.8k
std: add MultiArrayListUnmanaged #7854
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Conversation
What do you think about this alternate implementation that uses only 1 allocation for all the fields? (It passes the test case) // SPDX-License-Identifier: MIT
// Copyright (c) 2015-2021 Zig Contributors
// This file is part of [zig](https://ziglang.org/), which is MIT licensed.
// The MIT license requires this copyright notice to be included in all copies
// and substantial portions of the software.
const std = @import("std.zig");
const assert = std.debug.assert;
const meta = std.meta;
const mem = std.mem;
const Allocator = mem.Allocator;
pub fn MultiArrayList(comptime S: type) type {
return struct {
const Self = @This();
pub const Field = meta.FieldEnum(S);
bytes: [*]align(@alignOf(S)) u8 = undefined,
len: usize = 0,
capacity: usize = 0,
const fields = meta.fields(S);
/// `sizes.bytes` is an array of @sizeOf each S field. Sorted by alignment, descending.
/// `sizes.indexes` is an array mapping from field to its index in the `sizes.bytes` array.
const sizes = blk: {
const Data = struct {
size: usize,
size_index: usize,
alignment: usize,
};
var data: [fields.len]Data = undefined;
for (fields) |field_info, i| {
data[i] = .{
.size = @sizeOf(field_info.field_type),
.size_index = i,
.alignment = field_info.alignment,
};
}
const Sort = struct {
fn lessThan(trash: *i32, lhs: Data, rhs: Data) bool {
return lhs.alignment >= rhs.alignment;
}
};
var trash: i32 = undefined; // workaround for stage1 compiler bug
std.sort.sort(Data, &data, &trash, Sort.lessThan);
var sizes_bytes: [fields.len]usize = undefined;
var sizes_indexes: [fields.len]usize = undefined;
for (data) |elem, i| {
sizes_bytes[i] = elem.size;
sizes_indexes[elem.size_index] = i;
}
break :blk .{
.bytes = sizes_bytes,
.indexes = sizes_indexes,
};
};
/// Release all allocated memory.
pub fn deinit(self: *Self, gpa: *Allocator) void {
gpa.free(self.allocatedBytes());
self.* = undefined;
}
pub fn items(self: *Self, comptime field: Field) []FieldType(field) {
const T = FieldType(field);
var offset: usize = 0;
const size_index = sizes.indexes[@enumToInt(field)];
for (sizes.bytes[0..size_index]) |field_size| {
offset += field_size * self.capacity;
}
const ptr = @ptrCast([*]T, @alignCast(@alignOf(T), self.bytes + offset));
return ptr[0..self.len];
}
/// Extend the list by 1 element. Allocates more memory as necessary.
pub fn append(self: *Self, gpa: *Allocator, elem: S) !void {
try self.ensureCapacity(gpa, self.len + 1);
self.appendAssumeCapacity(elem);
}
/// Extend the list by 1 element, but asserting `self.capacity`
/// is sufficient to hold an additional item.
pub fn appendAssumeCapacity(self: *Self, elem: S) void {
assert(self.len < self.capacity);
self.len += 1;
inline for (fields) |field_info, i| {
self.items(@intToEnum(Field, i))[self.len - 1] = @field(elem, field_info.name);
}
}
/// Adjust the list's length to `new_len`.
/// Does not initialize added items, if any.
pub fn resize(self: *Self, gpa: *Allocator, new_len: usize) !void {
try self.ensureCapacity(gpa, new_len);
self.items.len = new_len;
}
/// Attempt to reduce allocated capacity to `new_len`.
/// If `new_len` is greater than zero, this may fail to reduce the capacity,
/// but the data remains intact and the length is updated to new_len.
pub fn shrinkAndFree(self: *Self, gpa: *Allocator, new_len: usize) void {
if (new_len == 0) {
gpa.free(self.allocatedBytes());
self.* = .{};
return;
}
assert(new_len <= self.capacity);
assert(new_len <= self.len);
var other = Self{};
other.setCapacity(gpa, new_len) catch {
self.len = new_len;
// TODO memset the invalidated items to undefined
return;
};
other.len = new_len;
inline for (fields) |field_info, i| {
const field = @intToEnum(Field, i);
mem.copy(field_info.field_type, other.items(field), self.items(field));
}
gpa.free(self.allocatedBytes());
self.* = other;
}
/// Reduce length to `new_len`.
/// Invalidates pointers to elements `items[new_len..]`.
/// Keeps capacity the same.
pub fn shrinkRetainingCapacity(self: *Self, new_len: usize) void {
self.len = new_len;
}
/// Modify the array so that it can hold at least `new_capacity` items.
/// Implements super-linear growth to achieve amortized O(1) append operations.
/// Invalidates pointers if additional memory is needed.
pub fn ensureCapacity(self: *Self, gpa: *Allocator, new_capacity: usize) !void {
var better_capacity = self.capacity;
if (better_capacity >= new_capacity) return;
while (true) {
better_capacity += better_capacity / 2 + 8;
if (better_capacity >= new_capacity) break;
}
return self.setCapacity(gpa, better_capacity);
}
/// Modify the array so that it can hold exactly `new_capacity` items.
/// Invalidates pointers if additional memory is needed.
pub fn setCapacity(self: *Self, gpa: *Allocator, new_capacity: usize) !void {
const bytes = try gpa.reallocAdvanced(
self.allocatedBytes(),
@alignOf(S),
capacityInBytes(new_capacity),
.exact,
);
self.bytes = bytes.ptr;
self.capacity = new_capacity;
}
fn capacityInBytes(capacity: usize) usize {
// TODO this looks like a job for SIMD (need that @reduce builtin tho)
var bytes_count: usize = 0;
for (sizes.bytes) |field_size| {
bytes_count += capacity * field_size;
}
return bytes_count;
}
fn allocatedBytes(self: Self) []align(@alignOf(S)) u8 {
return self.bytes[0..capacityInBytes(self.capacity)];
}
fn FieldType(field: Field) type {
return meta.fieldInfo(S, field).field_type;
}
};
}
test "basic usage" {
const testing = std.testing;
const ally = testing.allocator;
const Foo = struct {
a: u32,
b: []const u8,
c: u8,
};
var list = MultiArrayList(Foo){};
defer list.deinit(ally);
try list.ensureCapacity(ally, 2);
list.appendAssumeCapacity(.{
.a = 1,
.b = "foobar",
.c = 'a',
});
list.appendAssumeCapacity(.{
.a = 2,
.b = "zigzag",
.c = 'b',
});
testing.expectEqualSlices(u32, list.items(.a), &[_]u32{ 1, 2 });
testing.expectEqualSlices(u8, list.items(.c), &[_]u8{ 'a', 'b' });
testing.expectEqual(@as(usize, 2), list.items(.b).len);
testing.expectEqualStrings("foobar", list.items(.b)[0]);
testing.expectEqualStrings("zigzag", list.items(.b)[1]);
try list.append(ally, .{
.a = 3,
.b = "fizzbuzz",
.c = 'c',
});
testing.expectEqualSlices(u32, list.items(.a), &[_]u32{ 1, 2, 3 });
testing.expectEqualSlices(u8, list.items(.c), &[_]u8{ 'a', 'b', 'c' });
testing.expectEqual(@as(usize, 3), list.items(.b).len);
testing.expectEqualStrings("foobar", list.items(.b)[0]);
testing.expectEqualStrings("zigzag", list.items(.b)[1]);
testing.expectEqualStrings("fizzbuzz", list.items(.b)[2]);
} |
@ifreund you might want to ping me on IRC before you do any more work on this because I started adding more functionality to my fork of this and using it locally and I think you might like it |
Also known as "Struct-Of-Arrays" or "SOA". The purpose of this data structure is to provide a similar API to ArrayList but instead of the element type being a struct, the fields of the struct are in N different arrays, all with the same length and capacity. Having this abstraction means we can put them in the same allocation, avoiding overhead with the allocator. It also saves a tiny bit of overhead from the redundant capacity and length fields, since each struct element shares the same value. This is an alternate implementation to #7854.
058f687
to
9eb7f4f
Compare
9eb7f4f
to
d9c1ce4
Compare
I think the next step here is to swap out the master branch implementation with this one, and run some benchmarks to see which one has better perf / peak memory usage. The main difference is single allocation vs one allocation per field. Depending on the results of those benchmarks, we will know which implementation to keep. |
Also known as "Struct-Of-Arrays" or "SOA". The purpose of this data structure is to provide a similar API to ArrayList but instead of the element type being a struct, the fields of the struct are in N different arrays, all with the same length and capacity. Having this abstraction means we can put them in the same allocation, avoiding overhead with the allocator. It also saves a tiny bit of overhead from the redundant capacity and length fields, since each struct element shares the same value. This is an alternate implementation to ziglang#7854.
Extracted into #8415. |
This was requested on IRC by @andrewrk for use in the self hosted compiler. This is a draft PR as I'm not 100% settled on the API yet, which could also use expansion to support more operations similar to ArrayList.