From d2214d65d187504c15b1f53b32240809362fc4b6 Mon Sep 17 00:00:00 2001 From: GnomedDev Date: Fri, 11 Oct 2024 09:43:14 +0100 Subject: [PATCH 1/4] Add ThinVec::extract_if --- src/lib.rs | 140 +++++++++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 140 insertions(+) diff --git a/src/lib.rs b/src/lib.rs index 5db06a2..0a225d6 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -1541,6 +1541,75 @@ impl ThinVec { } } + /// Creates an iterator which uses a closure to determine if an element should be removed. + /// + /// If the closure returns true, then the element is removed and yielded. + /// If the closure returns false, the element will remain in the vector and will not be yielded + /// by the iterator. + /// + /// If the returned `ExtractIf` is not exhausted, e.g. because it is dropped without iterating + /// or the iteration short-circuits, then the remaining elements will be retained. + /// Use [`ThinVec::retain`] with a negated predicate if you do not need the returned iterator. + /// + /// Using this method is equivalent to the following code: + /// + /// ``` + /// # use thin_vec::{ThinVec, thin_vec}; + /// # let some_predicate = |x: &mut i32| { *x == 2 || *x == 3 || *x == 6 }; + /// # let mut vec = thin_vec![1, 2, 3, 4, 5, 6]; + /// let mut i = 0; + /// while i < vec.len() { + /// if some_predicate(&mut vec[i]) { + /// let val = vec.remove(i); + /// // your code here + /// } else { + /// i += 1; + /// } + /// } + /// + /// # assert_eq!(vec, thin_vec![1, 4, 5]); + /// ``` + /// + /// But `extract_if` is easier to use. `extract_if` is also more efficient, + /// because it can backshift the elements of the array in bulk. + /// + /// Note that `extract_if` also lets you mutate every element in the filter closure, + /// regardless of whether you choose to keep or remove it. + /// + /// # Examples + /// + /// Splitting an array into evens and odds, reusing the original allocation: + /// + /// ``` + /// use thin_vec::{ThinVec, thin_vec}; + /// + /// let mut numbers = thin_vec![1, 2, 3, 4, 5, 6, 8, 9, 11, 13, 14, 15]; + /// + /// let evens = numbers.extract_if(|x| *x % 2 == 0).collect::>(); + /// let odds = numbers; + /// + /// assert_eq!(evens, thin_vec![2, 4, 6, 8, 14]); + /// assert_eq!(odds, thin_vec![1, 3, 5, 9, 11, 13, 15]); + /// ``` + pub fn extract_if(&mut self, filter: F) -> ExtractIf<'_, T, F> + where + F: FnMut(&mut T) -> bool, + { + let old_len = self.len(); + // Guard against us getting leaked (leak amplification) + unsafe { + self.set_len(0); + } + + ExtractIf { + vec: self, + idx: 0, + del: 0, + old_len, + pred: filter, + } + } + /// Resize the buffer and update its capacity, without changing the length. /// Unsafe because it can cause length to be greater than capacity. unsafe fn reallocate(&mut self, new_cap: usize) { @@ -2776,6 +2845,77 @@ impl Drain<'_, T> { } } +/// An iterator for [`ThinVec`] which uses a closure to determine if an element should be removed. +#[must_use = "iterators are lazy and do nothing unless consumed"] +pub struct ExtractIf<'a, T, F> { + vec: &'a mut ThinVec, + /// The index of the item that will be inspected by the next call to `next`. + idx: usize, + /// The number of items that have been drained (removed) thus far. + del: usize, + /// The original length of `vec` prior to draining. + old_len: usize, + /// The filter test predicate. + pred: F, +} + +impl Iterator for ExtractIf<'_, T, F> +where + F: FnMut(&mut T) -> bool, +{ + type Item = T; + + fn next(&mut self) -> Option { + unsafe { + while self.idx < self.old_len { + let i = self.idx; + let v = slice::from_raw_parts_mut(self.vec.as_mut_ptr(), self.old_len); + let drained = (self.pred)(&mut v[i]); + // Update the index *after* the predicate is called. If the index + // is updated prior and the predicate panics, the element at this + // index would be leaked. + self.idx += 1; + if drained { + self.del += 1; + return Some(ptr::read(&v[i])); + } else if self.del > 0 { + let del = self.del; + let src: *const T = &v[i]; + let dst: *mut T = &mut v[i - del]; + ptr::copy_nonoverlapping(src, dst, 1); + } + } + None + } + } + + fn size_hint(&self) -> (usize, Option) { + (0, Some(self.old_len - self.idx)) + } +} + +impl Drop for ExtractIf<'_, A, F> { + fn drop(&mut self) { + unsafe { + if self.idx < self.old_len && self.del > 0 { + // This is a pretty messed up state, and there isn't really an + // obviously right thing to do. We don't want to keep trying + // to execute `pred`, so we just backshift all the unprocessed + // elements and tell the vec that they still exist. The backshift + // is required to prevent a double-drop of the last successfully + // drained item prior to a panic in the predicate. + let ptr = self.vec.as_mut_ptr(); + let src = ptr.add(self.idx); + let dst = src.sub(self.del); + let tail_len = self.old_len - self.idx; + src.copy_to(dst, tail_len); + } + + self.vec.set_len(self.old_len - self.del); + } + } +} + /// Write is implemented for `ThinVec` by appending to the vector. /// The vector will grow as needed. /// This implementation is identical to the one for `Vec`. From 7f012915ba593658c8e0b6f665f622e1d28fc075 Mon Sep 17 00:00:00 2001 From: GnomedDev Date: Fri, 11 Oct 2024 09:44:17 +0100 Subject: [PATCH 2/4] Update CI --- .github/workflows/rust.yml | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/.github/workflows/rust.yml b/.github/workflows/rust.yml index 206d798..a55ad7b 100644 --- a/.github/workflows/rust.yml +++ b/.github/workflows/rust.yml @@ -40,14 +40,14 @@ jobs: run: cargo build --features=malloc_size_of --verbose - name: Run tests run: cargo test --verbose - - name: Run tests - run: cargo test --verbose - name: Run tests (serde) run: cargo test --features=serde --verbose - name: Run tests (gecko-ffi) run: cargo test --tests --features=gecko-ffi --verbose - name: Run tests (no_std) run: cargo test --tests --no-default-features --verbose + - name: Run tests (unstable) + run: cargo +nightly test --features=unstable --verbose msrv: runs-on: ubuntu-latest From 9a039efd541469abd9fd29f09a2b284021c2e009 Mon Sep 17 00:00:00 2001 From: GnomedDev Date: Mon, 25 Aug 2025 11:57:33 +0100 Subject: [PATCH 3/4] Implement range argument to match Vec --- src/lib.rs | 85 ++++++++++++++++++++++++++++++++++++++++++++++++------ 1 file changed, 76 insertions(+), 9 deletions(-) diff --git a/src/lib.rs b/src/lib.rs index 0a225d6..e3b9001 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -160,7 +160,7 @@ use core::ops::Bound; use core::ops::{Deref, DerefMut, RangeBounds}; use core::ptr::NonNull; use core::slice::Iter; -use core::{fmt, mem, ptr, slice}; +use core::{fmt, mem, ops, ptr, slice}; use impl_details::*; @@ -1585,26 +1585,91 @@ impl ThinVec { /// /// let mut numbers = thin_vec![1, 2, 3, 4, 5, 6, 8, 9, 11, 13, 14, 15]; /// - /// let evens = numbers.extract_if(|x| *x % 2 == 0).collect::>(); + /// let evens = numbers.extract_if(.., |x| *x % 2 == 0).collect::>(); /// let odds = numbers; /// /// assert_eq!(evens, thin_vec![2, 4, 6, 8, 14]); /// assert_eq!(odds, thin_vec![1, 3, 5, 9, 11, 13, 15]); /// ``` - pub fn extract_if(&mut self, filter: F) -> ExtractIf<'_, T, F> + pub fn extract_if>( + &mut self, + range: R, + filter: F, + ) -> ExtractIf<'_, T, F> where F: FnMut(&mut T) -> bool, { + // Copy of https://github.com/rust-lang/rust/blob/ee361e8fca1c30e13e7a31cc82b64c045339d3a8/library/core/src/slice/index.rs#L37 + fn slice_index_fail(start: usize, end: usize, len: usize) -> ! { + if start > len { + panic!( + "range start index {} out of range for slice of length {}", + start, len + ) + } + + if end > len { + panic!( + "range end index {} out of range for slice of length {}", + end, len + ) + } + + if start > end { + panic!("slice index starts at {} but ends at {}", start, end) + } + + // Only reachable if the range was a `RangeInclusive` or a + // `RangeToInclusive`, with `end == len`. + panic!( + "range end index {} out of range for slice of length {}", + end, len + ) + } + + // Backport of https://github.com/rust-lang/rust/blob/ee361e8fca1c30e13e7a31cc82b64c045339d3a8/library/core/src/slice/index.rs#L855 + pub fn slice_range(range: R, bounds: ops::RangeTo) -> ops::Range + where + R: ops::RangeBounds, + { + let len = bounds.end; + + let end = match range.end_bound() { + ops::Bound::Included(&end) if end >= len => slice_index_fail(0, end, len), + // Cannot overflow because `end < len` implies `end < usize::MAX`. + ops::Bound::Included(&end) => end + 1, + + ops::Bound::Excluded(&end) if end > len => slice_index_fail(0, end, len), + ops::Bound::Excluded(&end) => end, + ops::Bound::Unbounded => len, + }; + + let start = match range.start_bound() { + ops::Bound::Excluded(&start) if start >= end => slice_index_fail(start, end, len), + // Cannot overflow because `start < end` implies `start < usize::MAX`. + ops::Bound::Excluded(&start) => start + 1, + + ops::Bound::Included(&start) if start > end => slice_index_fail(start, end, len), + ops::Bound::Included(&start) => start, + + ops::Bound::Unbounded => 0, + }; + + ops::Range { start, end } + } + let old_len = self.len(); - // Guard against us getting leaked (leak amplification) + let ops::Range { start, end } = slice_range(range, ..old_len); + + // Guard against the vec getting leaked (leak amplification) unsafe { self.set_len(0); } - ExtractIf { vec: self, - idx: 0, + idx: start, del: 0, + end, old_len, pred: filter, } @@ -2851,6 +2916,8 @@ pub struct ExtractIf<'a, T, F> { vec: &'a mut ThinVec, /// The index of the item that will be inspected by the next call to `next`. idx: usize, + /// Elements at and beyond this point will be retained. Must be equal or smaller than `old_len`. + end: usize, /// The number of items that have been drained (removed) thus far. del: usize, /// The original length of `vec` prior to draining. @@ -2865,9 +2932,9 @@ where { type Item = T; - fn next(&mut self) -> Option { + fn next(&mut self) -> Option { unsafe { - while self.idx < self.old_len { + while self.idx < self.end { let i = self.idx; let v = slice::from_raw_parts_mut(self.vec.as_mut_ptr(), self.old_len); let drained = (self.pred)(&mut v[i]); @@ -2890,7 +2957,7 @@ where } fn size_hint(&self) -> (usize, Option) { - (0, Some(self.old_len - self.idx)) + (0, Some(self.end - self.idx)) } } From 971a22eb7276ead3ca90e9f0102d759599f1929d Mon Sep 17 00:00:00 2001 From: GnomedDev Date: Mon, 25 Aug 2025 12:05:48 +0100 Subject: [PATCH 4/4] Disable stacked borrows Miri in CI --- .github/workflows/rust.yml | 6 ++---- 1 file changed, 2 insertions(+), 4 deletions(-) diff --git a/.github/workflows/rust.yml b/.github/workflows/rust.yml index a55ad7b..2c3fbd4 100644 --- a/.github/workflows/rust.yml +++ b/.github/workflows/rust.yml @@ -20,15 +20,13 @@ jobs: rustup toolchain install nightly --component miri rustup override set nightly cargo miri setup - - name: Test (default) with Miri - run: MIRIFLAGS=-Zmiri-strict-provenance cargo miri test + # We do not use Stacked Borrows anymore, since ExtractIf (which is lifted from std) does not pass SB, + # and if std can do it in non-magical data structure code, we can do it too. - name: Test (default) with Miri + Tree Borrows run: MIRIFLAGS="-Zmiri-strict-provenance -Zmiri-tree-borrows" cargo miri test # AutoThinVec needs tree borrows. - name: Test (gecko-ffi) with Miri run: MIRIFLAGS="-Zmiri-strict-provenance -Zmiri-tree-borrows" cargo miri test --features=gecko-ffi - - name: Test (unstable features) with Miri - run: MIRIFLAGS=-Zmiri-strict-provenance cargo miri test --features=unstable build: runs-on: ubuntu-latest