Skip to content

Commit 4e26722

Browse files
committed
Auto merge of #45434 - gnzlbg:vecopt, r=<try>
[vec] growth-strategy optimization This commits introduces a growth-strategy optimization for `RawVec` (and indirectly `Vec` and `VecDeque`). It introduces a method `grow_by(capacity_increase)` that tells the `RawVec` by how much the user would like to increase its capacity (e.g. `1` on `vec.push(val)`). It then uses following growth strategy: - If the `RawVec` is empty: it allocates at least 64 bytes. - If the `RawVec` is not empty: - it uses a growth-factor of 2 for small (<4096 bytes) and large (>4096*32 bytes) vectors, and 1.5 otherwise - it uses this growth factor to compute a suitable capacity - it takes the max between this capacity and the desired capacity increase (e.g. by using the desired capacity increase of `self.cap` one can force this method to double the capacity) - it passes the result to the `usable_size` function of the allocator to obtain the max usable size The commit also refactors the logic of `Vec`'s growth test into a `is_full` function, and uses the `core::intrinsic::unlikely` on the result of both `Vec`'s and `VecDeque`'s test to indicate that growth is an `unlikely` event. The `grow_by` function is not `#[inline(never)]` but `#[inline] + #[cold]`. That is, the function can be inlined, but the function author expects it to not be called often. Combined with the `unlikely` annotation on the call site, the compiler should have enough information to decide when eliding the call is worth it.
2 parents 5481098 + 0963f11 commit 4e26722

File tree

3 files changed

+192
-86
lines changed

3 files changed

+192
-86
lines changed

src/liballoc/raw_vec.rs

Lines changed: 177 additions & 78 deletions
Original file line numberDiff line numberDiff line change
@@ -235,7 +235,66 @@ impl<T, A: Alloc> RawVec<T, A> {
235235
}
236236
}
237237

238-
/// Doubles the size of the type's backing allocation. This is common enough
238+
/// Grows the vector capacity by `capacity_increase`.
239+
///
240+
/// It allows implementing amortized O(1) `push` on vector-like containers.
241+
///
242+
/// # Attributes
243+
///
244+
/// - `#[inline]`: LLVM is able to completely elide memory allocations in
245+
/// many cases if it can "see" where memory is allocated and freed.
246+
/// - `#[cold]`: calling this function is a "rare" event
247+
///
248+
#[inline]
249+
#[cold]
250+
pub fn grow_by(&mut self, capacity_increase: usize) {
251+
let elem_size = mem::size_of::<T>();
252+
assert!(elem_size != 0, "RawVecs of zero-sized types can't grow");
253+
254+
let (new_cap, uniq) = match self.current_layout() {
255+
Some(cur) => {
256+
// The invariant `elem_size * self.cap <= isize::MAX` is
257+
// maintained by `alloc_guard`; the alignment will never be too
258+
// large as to "not be specifiable" (so we can use
259+
// `from_size_align_unchecked`).
260+
let new_cap = Self::suitable_capacity(self.cap, capacity_increase);
261+
let new_size = new_cap * elem_size;
262+
let new_layout = unsafe {
263+
Layout::from_size_align_unchecked(new_size, cur.align())
264+
};
265+
let (_, usable_size) = self.a.usable_size(&new_layout);
266+
let new_layout = unsafe {
267+
Layout::from_size_align_unchecked(usable_size, cur.align())
268+
};
269+
alloc_guard(usable_size);
270+
let ptr_res = unsafe { self.a.realloc(self.ptr.as_ptr() as *mut u8,
271+
cur,
272+
new_layout) };
273+
match ptr_res {
274+
Ok(ptr) => (new_cap, unsafe {
275+
Unique::new_unchecked(ptr as *mut T)
276+
}),
277+
Err(e) => self.a.oom(e),
278+
}
279+
}
280+
None => {
281+
let new_cap = Self::suitable_capacity(self.cap, capacity_increase);
282+
let align = mem::align_of::<T>();
283+
let new_size = new_cap * elem_size;
284+
let new_layout = unsafe { Layout::from_size_align_unchecked(new_size, align) };
285+
let (_, new_cap) = self.a.usable_size(&new_layout);;
286+
alloc_guard(new_cap);
287+
match self.a.alloc_array::<T>(new_cap) {
288+
Ok(ptr) => (new_cap, ptr),
289+
Err(e) => self.a.oom(e),
290+
}
291+
}
292+
};
293+
self.ptr = uniq;
294+
self.cap = new_cap;
295+
}
296+
297+
/// Increases the size of the type's backing allocation. This is common enough
239298
/// to want to do that it's easiest to just have a dedicated method. Slightly
240299
/// more efficient logic can be provided for this than the general case.
241300
///
@@ -286,53 +345,79 @@ impl<T, A: Alloc> RawVec<T, A> {
286345
#[inline(never)]
287346
#[cold]
288347
pub fn double(&mut self) {
289-
unsafe {
290-
let elem_size = mem::size_of::<T>();
348+
let cap = self.cap;
349+
self.grow_by(cap)
350+
}
291351

292-
// since we set the capacity to usize::MAX when elem_size is
293-
// 0, getting to here necessarily means the RawVec is overfull.
294-
assert!(elem_size != 0, "capacity overflow");
295-
296-
let (new_cap, uniq) = match self.current_layout() {
297-
Some(cur) => {
298-
// Since we guarantee that we never allocate more than
299-
// isize::MAX bytes, `elem_size * self.cap <= isize::MAX` as
300-
// a precondition, so this can't overflow. Additionally the
301-
// alignment will never be too large as to "not be
302-
// satisfiable", so `Layout::from_size_align` will always
303-
// return `Some`.
304-
//
305-
// tl;dr; we bypass runtime checks due to dynamic assertions
306-
// in this module, allowing us to use
307-
// `from_size_align_unchecked`.
308-
let new_cap = 2 * self.cap;
309-
let new_size = new_cap * elem_size;
310-
let new_layout = Layout::from_size_align_unchecked(new_size, cur.align());
311-
alloc_guard(new_size);
312-
let ptr_res = self.a.realloc(self.ptr.as_ptr() as *mut u8,
313-
cur,
314-
new_layout);
315-
match ptr_res {
316-
Ok(ptr) => (new_cap, Unique::new_unchecked(ptr as *mut T)),
317-
Err(e) => self.a.oom(e),
318-
}
319-
}
320-
None => {
321-
// skip to 4 because tiny Vec's are dumb; but not if that
322-
// would cause overflow
323-
let new_cap = if elem_size > (!0) / 8 { 1 } else { 4 };
324-
match self.a.alloc_array::<T>(new_cap) {
325-
Ok(ptr) => (new_cap, ptr),
326-
Err(e) => self.a.oom(e),
327-
}
328-
}
329-
};
330-
self.ptr = uniq;
331-
self.cap = new_cap;
332-
}
352+
/// Given a `current_capacity` and a desired `capacity_increase` returns a
353+
/// suitable capacity for the `RawVec` such that `suitable_capacity >=
354+
/// current_capacity + capacity_increase`.
355+
///
356+
/// # Panics
357+
///
358+
/// Panics on overflow if `current_capacity + capacity_increase >
359+
/// std::usize::MAX`.
360+
///
361+
///
362+
/// # Growth strategy
363+
///
364+
/// RawVec grows differently depending on:
365+
///
366+
/// - 1. initial size: grows from zero to at least 64 bytes;
367+
/// use `with_capacity` to avoid a growth from zero.
368+
///
369+
/// - 2. vector size:
370+
/// - small vectors (<= 4096 bytes) and large vectors (>= 4096 * 32 bytes)
371+
/// grow with a growth factor of 2x.
372+
/// - otherwise (medium-sized vectors) grow with a growth factor of 1.5x.
373+
///
374+
/// # Growth factor
375+
///
376+
/// Medium-sized vectors' growth-factor is chosen to allow reusing memory from
377+
/// previous allocations. Previously freed memory can be reused after
378+
///
379+
/// - 4 reallocations for a growth factor of 1.5x
380+
/// - 3 reallocations for a growth factor of 1.45x
381+
/// - 2 reallocations for a growth factor of 1.3x
382+
///
383+
/// Which one is better [is application
384+
/// dependent](https://stackoverflow.com/questions/1100311/
385+
/// what-is-the-ideal-growth-rate-for-a-dynamically-allocated-array),
386+
/// also some claim that [the golden ration (1.618) is
387+
/// optimal](https://crntaylor.wordpress.com/2011/07/15/
388+
/// optimal-memory-reallocation-and-the-golden-ratio/).
389+
/// The trade-off is having to wait for many reallocations to be able to
390+
/// reuse old memory.
391+
///
392+
/// Note: a factor of 2x _never_ allows reusing previously-freed memory.
393+
///
394+
#[inline]
395+
fn suitable_capacity(current_capacity: usize, capacity_increase: usize) -> usize {
396+
let elem_size = mem::size_of::<T>();
397+
assert!(elem_size != 0, "RawVecs of zero-sized types can't grow");
398+
399+
// Computes the capacity from the `current_capacity` following the
400+
// growth-strategy:
401+
let growth_capacity = match current_capacity {
402+
// Empty vector => at least 64 bytes
403+
0 => (64 / elem_size).max(1),
404+
// Small and large vectors (<= 4096 bytes, and >= 4096 * 32 bytes):
405+
//
406+
// FIXME: jemalloc specific behavior, allocators should provide a
407+
// way to query the byte size of blocks that can grow inplace.
408+
//
409+
// jemalloc can never grow in place small blocks but blocks larger
410+
// than or equal to 4096 bytes can be expanded in place:
411+
c if c < 4096 / elem_size => 2 * c,
412+
c if c > 4096 * 32 / elem_size => 2 * c,
413+
// Medium sized vectors in the [4096, 4096 * 32) bytes range:
414+
c => (c * 3 + 1) / 2
415+
};
416+
417+
growth_capacity.max(current_capacity + capacity_increase)
333418
}
334419

335-
/// Attempts to double the size of the type's backing allocation in place. This is common
420+
/// Attempts to increase the size of the type's backing allocation in place. This is common
336421
/// enough to want to do that it's easiest to just have a dedicated method. Slightly
337422
/// more efficient logic can be provided for this than the general case.
338423
///
@@ -344,41 +429,54 @@ impl<T, A: Alloc> RawVec<T, A> {
344429
/// all `usize::MAX` slots in your imaginary buffer.
345430
/// * Panics on 32-bit platforms if the requested capacity exceeds
346431
/// `isize::MAX` bytes.
347-
#[inline(never)]
348-
#[cold]
349432
pub fn double_in_place(&mut self) -> bool {
350-
unsafe {
351-
let elem_size = mem::size_of::<T>();
352-
let old_layout = match self.current_layout() {
353-
Some(layout) => layout,
354-
None => return false, // nothing to double
355-
};
356-
357-
// since we set the capacity to usize::MAX when elem_size is
358-
// 0, getting to here necessarily means the RawVec is overfull.
359-
assert!(elem_size != 0, "capacity overflow");
433+
let capacity_increase = self.cap;
434+
self.grow_by_in_place(capacity_increase)
435+
}
360436

361-
// Since we guarantee that we never allocate more than isize::MAX
362-
// bytes, `elem_size * self.cap <= isize::MAX` as a precondition, so
363-
// this can't overflow.
364-
//
365-
// Similarly like with `double` above we can go straight to
366-
// `Layout::from_size_align_unchecked` as we know this won't
367-
// overflow and the alignment is sufficiently small.
368-
let new_cap = 2 * self.cap;
369-
let new_size = new_cap * elem_size;
370-
alloc_guard(new_size);
371-
let ptr = self.ptr() as *mut _;
372-
let new_layout = Layout::from_size_align_unchecked(new_size, old_layout.align());
373-
match self.a.grow_in_place(ptr, old_layout, new_layout) {
374-
Ok(_) => {
375-
// We can't directly divide `size`.
376-
self.cap = new_cap;
377-
true
378-
}
379-
Err(_) => {
380-
false
381-
}
437+
/// Attempts to grow the vector capacity by `capacity_increase`.
438+
///
439+
/// It allows implementing amortized O(1) `push` on vector-like containers.
440+
///
441+
/// # Attributes
442+
///
443+
/// - `#[inline]`: LLVM is able to completely elide memory allocations in
444+
/// many cases if it can "see" where memory is allocated and freed.
445+
/// - `#[cold]`: calling this function is a "rare" event
446+
///
447+
#[inline(never)]
448+
#[cold]
449+
pub fn grow_by_in_place(&mut self, capacity_increase: usize) -> bool {
450+
let elem_size = mem::size_of::<T>();
451+
assert!(elem_size != 0, "RawVecs of zero-sized types can't grow");
452+
let old_layout = match self.current_layout() {
453+
Some(layout) => layout,
454+
None => return false, // nothing to grow
455+
};
456+
457+
// The invariant `elem_size * self.cap <= isize::MAX` is
458+
// maintained by `alloc_guard`; the alignment will never be too
459+
// large as to "not be satisfiable" (so we can use
460+
// `from_size_align_unchecked`).
461+
let new_cap = Self::suitable_capacity(self.cap, capacity_increase);
462+
let new_size = new_cap * elem_size;
463+
let new_layout = unsafe {
464+
Layout::from_size_align_unchecked(new_size, old_layout.align())
465+
};
466+
let (_, usable_size) = self.a.usable_size(&new_layout);
467+
let new_layout = unsafe {
468+
Layout::from_size_align_unchecked(usable_size, old_layout.align())
469+
};
470+
alloc_guard(usable_size);
471+
let ptr = self.ptr() as *mut _;
472+
match unsafe { self.a.grow_in_place(ptr, old_layout, new_layout) } {
473+
Ok(_) => {
474+
// We can't directly divide `size`.
475+
self.cap = new_cap;
476+
true
477+
}
478+
Err(_) => {
479+
false
382480
}
383481
}
384482
}
@@ -794,4 +892,5 @@ mod tests {
794892
}
795893

796894

895+
797896
}

src/liballoc/vec.rs

Lines changed: 12 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -79,6 +79,7 @@ use core::ops;
7979
use core::ptr;
8080
use core::ptr::Shared;
8181
use core::slice;
82+
use core::intrinsics;
8283

8384
use borrow::ToOwned;
8485
use borrow::Cow;
@@ -707,6 +708,11 @@ impl<T> Vec<T> {
707708
self.pop().unwrap()
708709
}
709710

711+
#[inline(always)]
712+
fn is_full(&self) -> bool {
713+
self.len == self.buf.cap()
714+
}
715+
710716
/// Inserts an element at position `index` within the vector, shifting all
711717
/// elements after it to the right.
712718
///
@@ -729,8 +735,8 @@ impl<T> Vec<T> {
729735
assert!(index <= len);
730736

731737
// space for the new element
732-
if len == self.buf.cap() {
733-
self.buf.double();
738+
if unsafe { intrinsics::unlikely(self.is_full()) } {
739+
self.buf.grow_by(1);
734740
}
735741

736742
unsafe {
@@ -966,8 +972,8 @@ impl<T> Vec<T> {
966972
pub fn push(&mut self, value: T) {
967973
// This will panic or abort if we would allocate > isize::MAX bytes
968974
// or if the length increment would overflow for zero-sized types.
969-
if self.len == self.buf.cap() {
970-
self.buf.double();
975+
if unsafe { intrinsics::unlikely(self.is_full()) } {
976+
self.buf.grow_by(1);
971977
}
972978
unsafe {
973979
let end = self.as_mut_ptr().offset(self.len as isize);
@@ -2534,8 +2540,8 @@ impl<'a, T> Placer<T> for PlaceBack<'a, T> {
25342540
fn make_place(self) -> Self {
25352541
// This will panic or abort if we would allocate > isize::MAX bytes
25362542
// or if the length increment would overflow for zero-sized types.
2537-
if self.vec.len == self.vec.buf.cap() {
2538-
self.vec.buf.double();
2543+
if unsafe { intrinsics::unlikely(self.vec.is_full()) } {
2544+
self.vec.buf.grow_by(1);
25392545
}
25402546
self
25412547
}

src/liballoc/vec_deque.rs

Lines changed: 3 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -25,6 +25,7 @@ use core::ops::{Index, IndexMut, Place, Placer, InPlace};
2525
use core::ptr;
2626
use core::ptr::Shared;
2727
use core::slice;
28+
use core::intrinsics;
2829

2930
use core::hash::{Hash, Hasher};
3031
use core::cmp;
@@ -1752,9 +1753,9 @@ impl<T> VecDeque<T> {
17521753
// This may panic or abort
17531754
#[inline]
17541755
fn grow_if_necessary(&mut self) {
1755-
if self.is_full() {
1756+
if unsafe { intrinsics::unlikely(self.is_full()) } {
17561757
let old_cap = self.cap();
1757-
self.buf.double();
1758+
self.buf.grow_by(old_cap);
17581759
unsafe {
17591760
self.handle_cap_increase(old_cap);
17601761
}

0 commit comments

Comments
 (0)