Skip to content

Commit cfad9d1

Browse files
committed
Auto merge of rust-lang#1935 - saethlin:optimize-sb, r=RalfJung
Optimizing Stacked Borrows (part 1?): Cache locations of Tags in a Borrow Stack Before this PR, a profile of Miri under almost any workload points quite squarely at these regions of code as being incredibly hot (each being ~40% of cycles): https://github.com/rust-lang/miri/blob/dadcbebfbd017aac2358cf652a4bd71a91694edc/src/stacked_borrows.rs#L259-L269 https://github.com/rust-lang/miri/blob/dadcbebfbd017aac2358cf652a4bd71a91694edc/src/stacked_borrows.rs#L362-L369 This code is one of at least three reasons that stacked borrows analysis is super-linear: These are both linear in the number of borrows in the stack and they are positioned along the most commonly-taken paths. I'm addressing the first loop (which is in `Stack::find_granting`) by adding a very very simple sort of LRU cache implemented on a `VecDeque`, which maps recently-looked-up tags to their position in the stack. For `Untagged` access we fall back to the same sort of linear search. But as far as I can tell there are never enough `Untagged` items to be significant. I'm addressing the second loop by keeping track of the region of stack where there could be items granting `Permission::Unique`. This optimization is incredibly effective because `Read` access tends to dominate and many trips through this code path now skip the loop entirely. These optimizations result in pretty enormous improvements: Without raw pointer tagging, `mse` 34.5s -> 2.4s, `serde1` 5.6s -> 3.6s With raw pointer tagging, `mse` 35.3s -> 2.4s, `serde1` 5.7s -> 3.6s And there is hardly any impact on memory usage: Memory usage on `mse` 844 MB -> 848 MB, `serde1` 184 MB -> 184 MB (jitter on these is a few MB).
2 parents 5382f46 + b004a03 commit cfad9d1

File tree

5 files changed

+421
-108
lines changed

5 files changed

+421
-108
lines changed

Cargo.toml

+6
Original file line numberDiff line numberDiff line change
@@ -50,3 +50,9 @@ rustc_private = true
5050
[[test]]
5151
name = "compiletest"
5252
harness = false
53+
54+
[features]
55+
default = ["stack-cache"]
56+
# Will be enabled on CI via `--all-features`.
57+
expensive-debug-assertions = []
58+
stack-cache = []

src/lib.rs

+2-2
Original file line numberDiff line numberDiff line change
@@ -90,8 +90,8 @@ pub use crate::mono_hash_map::MonoHashMap;
9090
pub use crate::operator::EvalContextExt as OperatorEvalContextExt;
9191
pub use crate::range_map::RangeMap;
9292
pub use crate::stacked_borrows::{
93-
CallId, EvalContextExt as StackedBorEvalContextExt, Item, Permission, SbTag, SbTagExtra, Stack,
94-
Stacks,
93+
stack::Stack, CallId, EvalContextExt as StackedBorEvalContextExt, Item, Permission, SbTag,
94+
SbTagExtra, Stacks,
9595
};
9696
pub use crate::sync::{CondvarId, EvalContextExt as SyncEvalContextExt, MutexId, RwLockId};
9797
pub use crate::thread::{

src/stacked_borrows.rs

+30-105
Original file line numberDiff line numberDiff line change
@@ -23,6 +23,9 @@ use crate::*;
2323
pub mod diagnostics;
2424
use diagnostics::{AllocHistory, TagHistory};
2525

26+
pub mod stack;
27+
use stack::Stack;
28+
2629
pub type CallId = NonZeroU64;
2730

2831
// Even reading memory can have effects on the stack, so we need a `RefCell` here.
@@ -111,23 +114,6 @@ impl fmt::Debug for Item {
111114
}
112115
}
113116

114-
/// Extra per-location state.
115-
#[derive(Clone, Debug, PartialEq, Eq)]
116-
pub struct Stack {
117-
/// Used *mostly* as a stack; never empty.
118-
/// Invariants:
119-
/// * Above a `SharedReadOnly` there can only be more `SharedReadOnly`.
120-
/// * No tag occurs in the stack more than once.
121-
borrows: Vec<Item>,
122-
/// If this is `Some(id)`, then the actual current stack is unknown. This can happen when
123-
/// wildcard pointers are used to access this location. What we do know is that `borrows` are at
124-
/// the top of the stack, and below it are arbitrarily many items whose `tag` is strictly less
125-
/// than `id`.
126-
/// When the bottom is unknown, `borrows` always has a `SharedReadOnly` or `Unique` at the bottom;
127-
/// we never have the unknown-to-known boundary in an SRW group.
128-
unknown_bottom: Option<SbTag>,
129-
}
130-
131117
/// Extra per-allocation state.
132118
#[derive(Clone, Debug)]
133119
pub struct Stacks {
@@ -298,65 +284,10 @@ impl Permission {
298284

299285
/// Core per-location operations: access, dealloc, reborrow.
300286
impl<'tcx> Stack {
301-
/// Find the item granting the given kind of access to the given tag, and return where
302-
/// it is on the stack. For wildcard tags, the given index is approximate, but if *no*
303-
/// index is given it means the match was *not* in the known part of the stack.
304-
/// `Ok(None)` indicates it matched the "unknown" part of the stack.
305-
/// `Err` indicates it was not found.
306-
fn find_granting(
307-
&self,
308-
access: AccessKind,
309-
tag: SbTagExtra,
310-
exposed_tags: &FxHashSet<SbTag>,
311-
) -> Result<Option<usize>, ()> {
312-
let SbTagExtra::Concrete(tag) = tag else {
313-
// Handle the wildcard case.
314-
// Go search the stack for an exposed tag.
315-
if let Some(idx) =
316-
self.borrows
317-
.iter()
318-
.enumerate() // we also need to know *where* in the stack
319-
.rev() // search top-to-bottom
320-
.find_map(|(idx, item)| {
321-
// If the item fits and *might* be this wildcard, use it.
322-
if item.perm.grants(access) && exposed_tags.contains(&item.tag) {
323-
Some(idx)
324-
} else {
325-
None
326-
}
327-
})
328-
{
329-
return Ok(Some(idx));
330-
}
331-
// If we couldn't find it in the stack, check the unknown bottom.
332-
return if self.unknown_bottom.is_some() { Ok(None) } else { Err(()) };
333-
};
334-
335-
if let Some(idx) =
336-
self.borrows
337-
.iter()
338-
.enumerate() // we also need to know *where* in the stack
339-
.rev() // search top-to-bottom
340-
// Return permission of first item that grants access.
341-
// We require a permission with the right tag, ensuring U3 and F3.
342-
.find_map(|(idx, item)| {
343-
if tag == item.tag && item.perm.grants(access) { Some(idx) } else { None }
344-
})
345-
{
346-
return Ok(Some(idx));
347-
}
348-
349-
// Couldn't find it in the stack; but if there is an unknown bottom it might be there.
350-
let found = self.unknown_bottom.is_some_and(|&unknown_limit| {
351-
tag.0 < unknown_limit.0 // unknown_limit is an upper bound for what can be in the unknown bottom.
352-
});
353-
if found { Ok(None) } else { Err(()) }
354-
}
355-
356287
/// Find the first write-incompatible item above the given one --
357288
/// i.e, find the height to which the stack will be truncated when writing to `granting`.
358289
fn find_first_write_incompatible(&self, granting: usize) -> usize {
359-
let perm = self.borrows[granting].perm;
290+
let perm = self.get(granting).unwrap().perm;
360291
match perm {
361292
Permission::SharedReadOnly => bug!("Cannot use SharedReadOnly for writing"),
362293
Permission::Disabled => bug!("Cannot use Disabled for anything"),
@@ -367,7 +298,7 @@ impl<'tcx> Stack {
367298
Permission::SharedReadWrite => {
368299
// The SharedReadWrite *just* above us are compatible, to skip those.
369300
let mut idx = granting + 1;
370-
while let Some(item) = self.borrows.get(idx) {
301+
while let Some(item) = self.get(idx) {
371302
if item.perm == Permission::SharedReadWrite {
372303
// Go on.
373304
idx += 1;
@@ -462,16 +393,16 @@ impl<'tcx> Stack {
462393
// There is a SRW group boundary between the unknown and the known, so everything is incompatible.
463394
0
464395
};
465-
for item in self.borrows.drain(first_incompatible_idx..).rev() {
466-
trace!("access: popping item {:?}", item);
396+
self.pop_items_after(first_incompatible_idx, |item| {
467397
Stack::item_popped(
468398
&item,
469399
Some((tag, alloc_range, offset, access)),
470400
global,
471401
alloc_history,
472402
)?;
473403
alloc_history.log_invalidation(item.tag, alloc_range, current_span);
474-
}
404+
Ok(())
405+
})?;
475406
} else {
476407
// On a read, *disable* all `Unique` above the granting item. This ensures U2 for read accesses.
477408
// The reason this is not following the stack discipline (by removing the first Unique and
@@ -488,44 +419,39 @@ impl<'tcx> Stack {
488419
// We are reading from something in the unknown part. That means *all* `Unique` we know about are dead now.
489420
0
490421
};
491-
for idx in (first_incompatible_idx..self.borrows.len()).rev() {
492-
let item = &mut self.borrows[idx];
493-
494-
if item.perm == Permission::Unique {
495-
trace!("access: disabling item {:?}", item);
496-
Stack::item_popped(
497-
item,
498-
Some((tag, alloc_range, offset, access)),
499-
global,
500-
alloc_history,
501-
)?;
502-
item.perm = Permission::Disabled;
503-
alloc_history.log_invalidation(item.tag, alloc_range, current_span);
504-
}
505-
}
422+
self.disable_uniques_starting_at(first_incompatible_idx, |item| {
423+
Stack::item_popped(
424+
&item,
425+
Some((tag, alloc_range, offset, access)),
426+
global,
427+
alloc_history,
428+
)?;
429+
alloc_history.log_invalidation(item.tag, alloc_range, current_span);
430+
Ok(())
431+
})?;
506432
}
507433

508434
// If this was an approximate action, we now collapse everything into an unknown.
509435
if granting_idx.is_none() || matches!(tag, SbTagExtra::Wildcard) {
510436
// Compute the upper bound of the items that remain.
511437
// (This is why we did all the work above: to reduce the items we have to consider here.)
512438
let mut max = NonZeroU64::new(1).unwrap();
513-
for item in &self.borrows {
439+
for i in 0..self.len() {
440+
let item = self.get(i).unwrap();
514441
// Skip disabled items, they cannot be matched anyway.
515442
if !matches!(item.perm, Permission::Disabled) {
516443
// We are looking for a strict upper bound, so add 1 to this tag.
517444
max = cmp::max(item.tag.0.checked_add(1).unwrap(), max);
518445
}
519446
}
520-
if let Some(unk) = self.unknown_bottom {
447+
if let Some(unk) = self.unknown_bottom() {
521448
max = cmp::max(unk.0, max);
522449
}
523450
// Use `max` as new strict upper bound for everything.
524451
trace!(
525452
"access: forgetting stack to upper bound {max} due to wildcard or unknown access"
526453
);
527-
self.borrows.clear();
528-
self.unknown_bottom = Some(SbTag(max));
454+
self.set_unknown_bottom(SbTag(max));
529455
}
530456

531457
// Done.
@@ -553,8 +479,9 @@ impl<'tcx> Stack {
553479
)
554480
})?;
555481

556-
// Step 2: Remove all items. Also checks for protectors.
557-
for item in self.borrows.drain(..).rev() {
482+
// Step 2: Consider all items removed. This checks for protectors.
483+
for idx in (0..self.len()).rev() {
484+
let item = self.get(idx).unwrap();
558485
Stack::item_popped(&item, None, global, alloc_history)?;
559486
}
560487
Ok(())
@@ -602,8 +529,7 @@ impl<'tcx> Stack {
602529
// The new thing is SRW anyway, so we cannot push it "on top of the unkown part"
603530
// (for all we know, it might join an SRW group inside the unknown).
604531
trace!("reborrow: forgetting stack entirely due to SharedReadWrite reborrow from wildcard or unknown");
605-
self.borrows.clear();
606-
self.unknown_bottom = Some(global.next_ptr_tag);
532+
self.set_unknown_bottom(global.next_ptr_tag);
607533
return Ok(());
608534
};
609535

@@ -630,19 +556,18 @@ impl<'tcx> Stack {
630556
// on top of `derived_from`, and we want the new item at the top so that we
631557
// get the strongest possible guarantees.
632558
// This ensures U1 and F1.
633-
self.borrows.len()
559+
self.len()
634560
};
635561

636562
// Put the new item there. As an optimization, deduplicate if it is equal to one of its new neighbors.
637563
// `new_idx` might be 0 if we just cleared the entire stack.
638-
if self.borrows.get(new_idx) == Some(&new)
639-
|| (new_idx > 0 && self.borrows[new_idx - 1] == new)
564+
if self.get(new_idx) == Some(new) || (new_idx > 0 && self.get(new_idx - 1).unwrap() == new)
640565
{
641566
// Optimization applies, done.
642567
trace!("reborrow: avoiding adding redundant item {:?}", new);
643568
} else {
644569
trace!("reborrow: adding item {:?}", new);
645-
self.borrows.insert(new_idx, new);
570+
self.insert(new_idx, new);
646571
}
647572
Ok(())
648573
}
@@ -654,7 +579,7 @@ impl<'tcx> Stacks {
654579
/// Creates new stack with initial tag.
655580
fn new(size: Size, perm: Permission, tag: SbTag) -> Self {
656581
let item = Item { perm, tag, protector: None };
657-
let stack = Stack { borrows: vec![item], unknown_bottom: None };
582+
let stack = Stack::new(item);
658583

659584
Stacks {
660585
stacks: RangeMap::new(size, stack),

src/stacked_borrows/diagnostics.rs

+4-1
Original file line numberDiff line numberDiff line change
@@ -185,7 +185,10 @@ fn operation_summary(
185185

186186
fn error_cause(stack: &Stack, tag: SbTagExtra) -> &'static str {
187187
if let SbTagExtra::Concrete(tag) = tag {
188-
if stack.borrows.iter().any(|item| item.tag == tag && item.perm != Permission::Disabled) {
188+
if (0..stack.len())
189+
.map(|i| stack.get(i).unwrap())
190+
.any(|item| item.tag == tag && item.perm != Permission::Disabled)
191+
{
189192
", but that tag only grants SharedReadOnly permission for this location"
190193
} else {
191194
", but that tag does not exist in the borrow stack for this location"

0 commit comments

Comments
 (0)