Skip to content

Commit 0808d98

Browse files
committed
add std.MultiArrayList
Also known as "Struct-Of-Arrays" or "SOA". The purpose of this data structure is to provide a similar API to ArrayList but instead of the element type being a struct, the fields of the struct are in N different arrays, all with the same length and capacity. Having this abstraction means we can put them in the same allocation, avoiding overhead with the allocator. It also saves a tiny bit of overhead from the redundant capacity and length fields, since each struct element shares the same value. This is an alternate implementation to #7854.
1 parent 881ecdc commit 0808d98

File tree

2 files changed

+354
-0
lines changed

2 files changed

+354
-0
lines changed

lib/std/multi_array_list.zig

Lines changed: 353 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,353 @@
1+
// SPDX-License-Identifier: MIT
2+
// Copyright (c) 2015-2021 Zig Contributors
3+
// This file is part of [zig](https://ziglang.org/), which is MIT licensed.
4+
// The MIT license requires this copyright notice to be included in all copies
5+
// and substantial portions of the software.
6+
const std = @import("std.zig");
7+
const assert = std.debug.assert;
8+
const meta = std.meta;
9+
const mem = std.mem;
10+
const Allocator = mem.Allocator;
11+
12+
pub fn MultiArrayList(comptime S: type) type {
13+
return struct {
14+
bytes: [*]align(@alignOf(S)) u8 = undefined,
15+
len: usize = 0,
16+
capacity: usize = 0,
17+
18+
pub const Elem = S;
19+
20+
pub const Field = meta.FieldEnum(S);
21+
22+
pub const Slice = struct {
23+
/// The index corresponds to sizes.bytes, not in field order.
24+
ptrs: [fields.len][*]u8,
25+
len: usize,
26+
capacity: usize,
27+
28+
pub fn items(self: Slice, comptime field: Field) []FieldType(field) {
29+
const byte_ptr = self.ptrs[@enumToInt(field)];
30+
const F = FieldType(field);
31+
const casted_ptr = @ptrCast([*]F, @alignCast(@alignOf(F), byte_ptr));
32+
return casted_ptr[0..self.len];
33+
}
34+
35+
pub fn toMultiArrayList(self: Slice) Self {
36+
if (self.ptrs.len == 0) {
37+
return .{};
38+
}
39+
const unaligned_ptr = self.ptrs[sizes.fields[0]];
40+
const aligned_ptr = @alignCast(@alignOf(S), unaligned_ptr);
41+
const casted_ptr = @ptrCast([*]align(@alignOf(S)) u8, aligned_ptr);
42+
return .{
43+
.bytes = casted_ptr,
44+
.len = self.len,
45+
.capacity = self.capacity,
46+
};
47+
}
48+
49+
pub fn deinit(self: *Slice, gpa: *Allocator) void {
50+
var other = self.toMultiArrayList();
51+
other.deinit(gpa);
52+
self.* = undefined;
53+
}
54+
};
55+
56+
const Self = @This();
57+
58+
const fields = meta.fields(S);
59+
/// `sizes.bytes` is an array of @sizeOf each S field. Sorted by alignment, descending.
60+
/// `sizes.indexes` is an array mapping from field to its index in the `sizes.bytes` array.
61+
/// `sizes.fields` is an array with the field indexes of the `sizes.bytes` array.
62+
const sizes = blk: {
63+
const Data = struct {
64+
size: usize,
65+
size_index: usize,
66+
alignment: usize,
67+
};
68+
var data: [fields.len]Data = undefined;
69+
for (fields) |field_info, i| {
70+
data[i] = .{
71+
.size = @sizeOf(field_info.field_type),
72+
.size_index = i,
73+
.alignment = field_info.alignment,
74+
};
75+
}
76+
const Sort = struct {
77+
fn lessThan(trash: *i32, lhs: Data, rhs: Data) bool {
78+
return lhs.alignment >= rhs.alignment;
79+
}
80+
};
81+
var trash: i32 = undefined; // workaround for stage1 compiler bug
82+
std.sort.sort(Data, &data, &trash, Sort.lessThan);
83+
var sizes_bytes: [fields.len]usize = undefined;
84+
var sizes_indexes: [fields.len]usize = undefined;
85+
var field_indexes: [fields.len]usize = undefined;
86+
for (data) |elem, i| {
87+
sizes_bytes[i] = elem.size;
88+
sizes_indexes[elem.size_index] = i;
89+
field_indexes[i] = elem.size_index;
90+
}
91+
break :blk .{
92+
.bytes = sizes_bytes,
93+
.indexes = sizes_indexes,
94+
.fields = field_indexes,
95+
};
96+
};
97+
98+
/// Release all allocated memory.
99+
pub fn deinit(self: *Self, gpa: *Allocator) void {
100+
gpa.free(self.allocatedBytes());
101+
self.* = undefined;
102+
}
103+
104+
/// The caller owns the returned memory. Empties this MultiArrayList.
105+
pub fn toOwnedSlice(self: *Self) Slice {
106+
const result = self.slice();
107+
self.* = .{};
108+
return result;
109+
}
110+
111+
pub fn slice(self: Self) Slice {
112+
var result: Slice = .{
113+
.ptrs = undefined,
114+
.len = self.len,
115+
.capacity = self.capacity,
116+
};
117+
var ptr: [*]u8 = self.bytes;
118+
for (sizes.bytes) |field_size, i| {
119+
result.ptrs[sizes.fields[i]] = ptr;
120+
ptr += field_size * self.capacity;
121+
}
122+
return result;
123+
}
124+
125+
pub fn items(self: Self, comptime field: Field) []FieldType(field) {
126+
return self.slice().items(field);
127+
}
128+
129+
/// Overwrite one array element with new data.
130+
pub fn set(self: *Self, index: usize, elem: S) void {
131+
const slices = self.slice();
132+
inline for (fields) |field_info, i| {
133+
slices.items(@intToEnum(Field, i))[index] = @field(elem, field_info.name);
134+
}
135+
}
136+
137+
/// Obtain all the data for one array element.
138+
pub fn get(self: *Self, index: usize) S {
139+
const slices = self.slice();
140+
var result: S = undefined;
141+
inline for (fields) |field_info, i| {
142+
@field(elem, field_info.name) = slices.items(@intToEnum(Field, i))[index];
143+
}
144+
return result;
145+
}
146+
147+
/// Extend the list by 1 element. Allocates more memory as necessary.
148+
pub fn append(self: *Self, gpa: *Allocator, elem: S) !void {
149+
try self.ensureCapacity(gpa, self.len + 1);
150+
self.appendAssumeCapacity(elem);
151+
}
152+
153+
/// Extend the list by 1 element, but asserting `self.capacity`
154+
/// is sufficient to hold an additional item.
155+
pub fn appendAssumeCapacity(self: *Self, elem: S) void {
156+
assert(self.len < self.capacity);
157+
self.len += 1;
158+
self.set(self.len - 1, elem);
159+
}
160+
161+
/// Adjust the list's length to `new_len`.
162+
/// Does not initialize added items, if any.
163+
pub fn resize(self: *Self, gpa: *Allocator, new_len: usize) !void {
164+
try self.ensureCapacity(gpa, new_len);
165+
self.len = new_len;
166+
}
167+
168+
/// Attempt to reduce allocated capacity to `new_len`.
169+
/// If `new_len` is greater than zero, this may fail to reduce the capacity,
170+
/// but the data remains intact and the length is updated to new_len.
171+
pub fn shrinkAndFree(self: *Self, gpa: *Allocator, new_len: usize) void {
172+
if (new_len == 0) {
173+
gpa.free(self.allocatedBytes());
174+
self.* = .{};
175+
return;
176+
}
177+
assert(new_len <= self.capacity);
178+
assert(new_len <= self.len);
179+
180+
const other_bytes = gpa.allocAdvanced(
181+
u8,
182+
@alignOf(S),
183+
capacityInBytes(new_len),
184+
.exact,
185+
) catch {
186+
self.len = new_len;
187+
// TODO memset the invalidated items to undefined
188+
return;
189+
};
190+
var other = Self{
191+
.bytes = other_bytes.ptr,
192+
.capacity = new_len,
193+
.len = new_len,
194+
};
195+
self.len = new_len;
196+
const self_slice = self.slice();
197+
const other_slice = other.slice();
198+
inline for (fields) |field_info, i| {
199+
const field = @intToEnum(Field, i);
200+
mem.copy(field_info.field_type, other_slice.items(field), self_slice.items(field));
201+
}
202+
gpa.free(self.allocatedBytes());
203+
self.* = other;
204+
}
205+
206+
/// Reduce length to `new_len`.
207+
/// Invalidates pointers to elements `items[new_len..]`.
208+
/// Keeps capacity the same.
209+
pub fn shrinkRetainingCapacity(self: *Self, new_len: usize) void {
210+
self.len = new_len;
211+
}
212+
213+
/// Modify the array so that it can hold at least `new_capacity` items.
214+
/// Implements super-linear growth to achieve amortized O(1) append operations.
215+
/// Invalidates pointers if additional memory is needed.
216+
pub fn ensureCapacity(self: *Self, gpa: *Allocator, new_capacity: usize) !void {
217+
var better_capacity = self.capacity;
218+
if (better_capacity >= new_capacity) return;
219+
220+
while (true) {
221+
better_capacity += better_capacity / 2 + 8;
222+
if (better_capacity >= new_capacity) break;
223+
}
224+
225+
return self.setCapacity(gpa, better_capacity);
226+
}
227+
228+
/// Modify the array so that it can hold exactly `new_capacity` items.
229+
/// Invalidates pointers if additional memory is needed.
230+
/// `new_capacity` must be greater or equal to `len`.
231+
pub fn setCapacity(self: *Self, gpa: *Allocator, new_capacity: usize) !void {
232+
assert(new_capacity >= self.len);
233+
const new_bytes = try gpa.allocAdvanced(
234+
u8,
235+
@alignOf(S),
236+
capacityInBytes(new_capacity),
237+
.exact,
238+
);
239+
if (self.len == 0) {
240+
self.bytes = new_bytes.ptr;
241+
self.capacity = new_capacity;
242+
return;
243+
}
244+
var other = Self{
245+
.bytes = new_bytes.ptr,
246+
.capacity = new_capacity,
247+
.len = self.len,
248+
};
249+
const self_slice = self.slice();
250+
const other_slice = other.slice();
251+
inline for (fields) |field_info, i| {
252+
const field = @intToEnum(Field, i);
253+
mem.copy(field_info.field_type, other_slice.items(field), self_slice.items(field));
254+
}
255+
gpa.free(self.allocatedBytes());
256+
self.* = other;
257+
}
258+
259+
fn capacityInBytes(capacity: usize) usize {
260+
const sizes_vector: std.meta.Vector(sizes.bytes.len, usize) = sizes.bytes;
261+
const capacity_vector = @splat(sizes.bytes.len, capacity);
262+
return @reduce(.Add, capacity_vector * sizes_vector);
263+
}
264+
265+
fn allocatedBytes(self: Self) []align(@alignOf(S)) u8 {
266+
return self.bytes[0..capacityInBytes(self.capacity)];
267+
}
268+
269+
fn FieldType(field: Field) type {
270+
return meta.fieldInfo(S, field).field_type;
271+
}
272+
};
273+
}
274+
275+
test "basic usage" {
276+
const testing = std.testing;
277+
const ally = testing.allocator;
278+
279+
const Foo = struct {
280+
a: u32,
281+
b: []const u8,
282+
c: u8,
283+
};
284+
285+
var list = MultiArrayList(Foo){};
286+
defer list.deinit(ally);
287+
288+
try list.ensureCapacity(ally, 2);
289+
290+
list.appendAssumeCapacity(.{
291+
.a = 1,
292+
.b = "foobar",
293+
.c = 'a',
294+
});
295+
296+
list.appendAssumeCapacity(.{
297+
.a = 2,
298+
.b = "zigzag",
299+
.c = 'b',
300+
});
301+
302+
testing.expectEqualSlices(u32, list.items(.a), &[_]u32{ 1, 2 });
303+
testing.expectEqualSlices(u8, list.items(.c), &[_]u8{ 'a', 'b' });
304+
305+
testing.expectEqual(@as(usize, 2), list.items(.b).len);
306+
testing.expectEqualStrings("foobar", list.items(.b)[0]);
307+
testing.expectEqualStrings("zigzag", list.items(.b)[1]);
308+
309+
try list.append(ally, .{
310+
.a = 3,
311+
.b = "fizzbuzz",
312+
.c = 'c',
313+
});
314+
315+
testing.expectEqualSlices(u32, list.items(.a), &[_]u32{ 1, 2, 3 });
316+
testing.expectEqualSlices(u8, list.items(.c), &[_]u8{ 'a', 'b', 'c' });
317+
318+
testing.expectEqual(@as(usize, 3), list.items(.b).len);
319+
testing.expectEqualStrings("foobar", list.items(.b)[0]);
320+
testing.expectEqualStrings("zigzag", list.items(.b)[1]);
321+
testing.expectEqualStrings("fizzbuzz", list.items(.b)[2]);
322+
323+
// Add 6 more things to force a capacity increase.
324+
var i: usize = 0;
325+
while (i < 6) : (i += 1) {
326+
try list.append(ally, .{
327+
.a = @intCast(u32, 4 + i),
328+
.b = "whatever",
329+
.c = @intCast(u8, 'd' + i),
330+
});
331+
}
332+
333+
testing.expectEqualSlices(
334+
u32,
335+
&[_]u32{ 1, 2, 3, 4, 5, 6, 7, 8, 9 },
336+
list.items(.a),
337+
);
338+
testing.expectEqualSlices(
339+
u8,
340+
&[_]u8{ 'a', 'b', 'c', 'd', 'e', 'f', 'g', 'h', 'i' },
341+
list.items(.c),
342+
);
343+
344+
list.shrinkAndFree(ally, 3);
345+
346+
testing.expectEqualSlices(u32, list.items(.a), &[_]u32{ 1, 2, 3 });
347+
testing.expectEqualSlices(u8, list.items(.c), &[_]u8{ 'a', 'b', 'c' });
348+
349+
testing.expectEqual(@as(usize, 3), list.items(.b).len);
350+
testing.expectEqualStrings("foobar", list.items(.b)[0]);
351+
testing.expectEqualStrings("zigzag", list.items(.b)[1]);
352+
testing.expectEqualStrings("fizzbuzz", list.items(.b)[2]);
353+
}

lib/std/std.zig

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -20,6 +20,7 @@ pub const ComptimeStringMap = @import("comptime_string_map.zig").ComptimeStringM
2020
pub const DynLib = @import("dynamic_library.zig").DynLib;
2121
pub const HashMap = hash_map.HashMap;
2222
pub const HashMapUnmanaged = hash_map.HashMapUnmanaged;
23+
pub const MultiArrayList = @import("multi_array_list.zig").MultiArrayList;
2324
pub const PackedIntArray = @import("packed_int_array.zig").PackedIntArray;
2425
pub const PackedIntArrayEndian = @import("packed_int_array.zig").PackedIntArrayEndian;
2526
pub const PackedIntSlice = @import("packed_int_array.zig").PackedIntSlice;

0 commit comments

Comments
 (0)