Skip to content
This repository was archived by the owner on May 28, 2025. It is now read-only.
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.

Commit f6cb952

Browse files
committedDec 9, 2024·
Auto merge of rust-lang#133891 - nnethercote:MixedBitSet, r=Mark-Simulacrum
Introduce `MixedBitSet` `ChunkedBitSet` is good at avoiding excessive memory usage for programs with very large functgions where dataflow bitsets have very large domain sizes. But it's overly heavyweight for small bitsets, because any non-empty `ChunkedBitSet` takes up at least 256 bytes. This PR introduces `MixedBitSet`, which is a simple bitset that uses `BitSet` for small/medium bitsets and `ChunkedBitSet` for large bitsets. It's a speed and memory usage win. r? `@Mark-Simulacrum`
2 parents 1b3fb31 + fa6ceba commit f6cb952

File tree

9 files changed

+347
-169
lines changed

9 files changed

+347
-169
lines changed
 

‎compiler/rustc_borrowck/src/lib.rs

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -26,7 +26,7 @@ use rustc_data_structures::graph::dominators::Dominators;
2626
use rustc_errors::Diag;
2727
use rustc_hir as hir;
2828
use rustc_hir::def_id::LocalDefId;
29-
use rustc_index::bit_set::{BitSet, ChunkedBitSet};
29+
use rustc_index::bit_set::{BitSet, MixedBitSet};
3030
use rustc_index::{IndexSlice, IndexVec};
3131
use rustc_infer::infer::{
3232
InferCtxt, NllRegionVariableOrigin, RegionVariableOrigin, TyCtxtInferExt,
@@ -1797,7 +1797,7 @@ impl<'a, 'tcx> MirBorrowckCtxt<'a, '_, 'tcx> {
17971797
location: Location,
17981798
desired_action: InitializationRequiringAction,
17991799
place_span: (PlaceRef<'tcx>, Span),
1800-
maybe_uninits: &ChunkedBitSet<MovePathIndex>,
1800+
maybe_uninits: &MixedBitSet<MovePathIndex>,
18011801
from: u64,
18021802
to: u64,
18031803
) {

‎compiler/rustc_index/src/bit_set.rs

Lines changed: 270 additions & 115 deletions
Large diffs are not rendered by default.

‎compiler/rustc_index/src/bit_set/tests.rs

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -503,15 +503,15 @@ fn sparse_matrix_operations() {
503503
matrix.insert(2, 99);
504504
matrix.insert(4, 0);
505505

506-
let mut disjoint: ChunkedBitSet<usize> = ChunkedBitSet::new_empty(100);
506+
let mut disjoint: BitSet<usize> = BitSet::new_empty(100);
507507
disjoint.insert(33);
508508

509-
let mut superset = ChunkedBitSet::new_empty(100);
509+
let mut superset = BitSet::new_empty(100);
510510
superset.insert(22);
511511
superset.insert(75);
512512
superset.insert(33);
513513

514-
let mut subset = ChunkedBitSet::new_empty(100);
514+
let mut subset = BitSet::new_empty(100);
515515
subset.insert(22);
516516

517517
// SparseBitMatrix::remove

‎compiler/rustc_mir_dataflow/src/framework/fmt.rs

Lines changed: 27 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -4,7 +4,7 @@
44
use std::fmt;
55

66
use rustc_index::Idx;
7-
use rustc_index::bit_set::{BitSet, ChunkedBitSet};
7+
use rustc_index::bit_set::{BitSet, ChunkedBitSet, MixedBitSet};
88

99
use super::lattice::MaybeReachable;
1010

@@ -85,8 +85,8 @@ where
8585
let size = self.domain_size();
8686
assert_eq!(size, old.domain_size());
8787

88-
let mut set_in_self = ChunkedBitSet::new_empty(size);
89-
let mut cleared_in_self = ChunkedBitSet::new_empty(size);
88+
let mut set_in_self = MixedBitSet::new_empty(size);
89+
let mut cleared_in_self = MixedBitSet::new_empty(size);
9090

9191
for i in (0..size).map(T::new) {
9292
match (self.contains(i), old.contains(i)) {
@@ -112,8 +112,8 @@ where
112112
let size = self.domain_size();
113113
assert_eq!(size, old.domain_size());
114114

115-
let mut set_in_self = ChunkedBitSet::new_empty(size);
116-
let mut cleared_in_self = ChunkedBitSet::new_empty(size);
115+
let mut set_in_self = MixedBitSet::new_empty(size);
116+
let mut cleared_in_self = MixedBitSet::new_empty(size);
117117

118118
for i in (0..size).map(T::new) {
119119
match (self.contains(i), old.contains(i)) {
@@ -127,6 +127,26 @@ where
127127
}
128128
}
129129

130+
impl<T, C> DebugWithContext<C> for MixedBitSet<T>
131+
where
132+
T: Idx + DebugWithContext<C>,
133+
{
134+
fn fmt_with(&self, ctxt: &C, f: &mut fmt::Formatter<'_>) -> fmt::Result {
135+
match self {
136+
MixedBitSet::Small(set) => set.fmt_with(ctxt, f),
137+
MixedBitSet::Large(set) => set.fmt_with(ctxt, f),
138+
}
139+
}
140+
141+
fn fmt_diff_with(&self, old: &Self, ctxt: &C, f: &mut fmt::Formatter<'_>) -> fmt::Result {
142+
match (self, old) {
143+
(MixedBitSet::Small(set), MixedBitSet::Small(old)) => set.fmt_diff_with(old, ctxt, f),
144+
(MixedBitSet::Large(set), MixedBitSet::Large(old)) => set.fmt_diff_with(old, ctxt, f),
145+
_ => panic!("MixedBitSet size mismatch"),
146+
}
147+
}
148+
}
149+
130150
impl<S, C> DebugWithContext<C> for MaybeReachable<S>
131151
where
132152
S: DebugWithContext<C>,
@@ -159,8 +179,8 @@ where
159179
}
160180

161181
fn fmt_diff<T, C>(
162-
inserted: &ChunkedBitSet<T>,
163-
removed: &ChunkedBitSet<T>,
182+
inserted: &MixedBitSet<T>,
183+
removed: &MixedBitSet<T>,
164184
ctxt: &C,
165185
f: &mut fmt::Formatter<'_>,
166186
) -> fmt::Result

‎compiler/rustc_mir_dataflow/src/framework/lattice.rs

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -40,7 +40,7 @@
4040
4141
use std::iter;
4242

43-
use rustc_index::bit_set::{BitSet, ChunkedBitSet};
43+
use rustc_index::bit_set::{BitSet, MixedBitSet};
4444
use rustc_index::{Idx, IndexVec};
4545

4646
use crate::framework::BitSetExt;
@@ -126,7 +126,7 @@ impl<T: Idx> JoinSemiLattice for BitSet<T> {
126126
}
127127
}
128128

129-
impl<T: Idx> JoinSemiLattice for ChunkedBitSet<T> {
129+
impl<T: Idx> JoinSemiLattice for MixedBitSet<T> {
130130
fn join(&mut self, other: &Self) -> bool {
131131
self.union(other)
132132
}

‎compiler/rustc_mir_dataflow/src/framework/mod.rs

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -35,7 +35,7 @@
3535
use std::cmp::Ordering;
3636

3737
use rustc_data_structures::work_queue::WorkQueue;
38-
use rustc_index::bit_set::{BitSet, ChunkedBitSet};
38+
use rustc_index::bit_set::{BitSet, MixedBitSet};
3939
use rustc_index::{Idx, IndexVec};
4040
use rustc_middle::bug;
4141
use rustc_middle::mir::{self, BasicBlock, CallReturnPlaces, Location, TerminatorEdges, traversal};
@@ -71,7 +71,7 @@ impl<T: Idx> BitSetExt<T> for BitSet<T> {
7171
}
7272
}
7373

74-
impl<T: Idx> BitSetExt<T> for ChunkedBitSet<T> {
74+
impl<T: Idx> BitSetExt<T> for MixedBitSet<T> {
7575
fn contains(&self, elem: T) -> bool {
7676
self.contains(elem)
7777
}
@@ -327,7 +327,7 @@ impl<T: Idx> GenKill<T> for BitSet<T> {
327327
}
328328
}
329329

330-
impl<T: Idx> GenKill<T> for ChunkedBitSet<T> {
330+
impl<T: Idx> GenKill<T> for MixedBitSet<T> {
331331
fn gen_(&mut self, elem: T) {
332332
self.insert(elem);
333333
}

‎compiler/rustc_mir_dataflow/src/impls/initialized.rs

Lines changed: 11 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -1,7 +1,7 @@
11
use std::assert_matches::assert_matches;
22

33
use rustc_index::Idx;
4-
use rustc_index::bit_set::{BitSet, ChunkedBitSet};
4+
use rustc_index::bit_set::{BitSet, MixedBitSet};
55
use rustc_middle::bug;
66
use rustc_middle::mir::{self, Body, CallReturnPlaces, Location, TerminatorEdges};
77
use rustc_middle::ty::{self, TyCtxt};
@@ -70,7 +70,7 @@ impl<'a, 'tcx> MaybeInitializedPlaces<'a, 'tcx> {
7070
pub fn is_unwind_dead(
7171
&self,
7272
place: mir::Place<'tcx>,
73-
state: &MaybeReachable<ChunkedBitSet<MovePathIndex>>,
73+
state: &MaybeReachable<MixedBitSet<MovePathIndex>>,
7474
) -> bool {
7575
if let LookupResult::Exact(path) = self.move_data().rev_lookup.find(place.as_ref()) {
7676
let mut maybe_live = false;
@@ -244,8 +244,8 @@ impl<'tcx> MaybeUninitializedPlaces<'_, 'tcx> {
244244

245245
impl<'tcx> Analysis<'tcx> for MaybeInitializedPlaces<'_, 'tcx> {
246246
/// There can be many more `MovePathIndex` than there are locals in a MIR body.
247-
/// We use a chunked bitset to avoid paying too high a memory footprint.
248-
type Domain = MaybeReachable<ChunkedBitSet<MovePathIndex>>;
247+
/// We use a mixed bitset to avoid paying too high a memory footprint.
248+
type Domain = MaybeReachable<MixedBitSet<MovePathIndex>>;
249249

250250
const NAME: &'static str = "maybe_init";
251251

@@ -256,7 +256,7 @@ impl<'tcx> Analysis<'tcx> for MaybeInitializedPlaces<'_, 'tcx> {
256256

257257
fn initialize_start_block(&self, _: &mir::Body<'tcx>, state: &mut Self::Domain) {
258258
*state =
259-
MaybeReachable::Reachable(ChunkedBitSet::new_empty(self.move_data().move_paths.len()));
259+
MaybeReachable::Reachable(MixedBitSet::new_empty(self.move_data().move_paths.len()));
260260
drop_flag_effects_for_function_entry(self.body, self.move_data, |path, s| {
261261
assert!(s == DropFlagState::Present);
262262
state.gen_(path);
@@ -371,14 +371,14 @@ impl<'tcx> Analysis<'tcx> for MaybeInitializedPlaces<'_, 'tcx> {
371371

372372
impl<'tcx> Analysis<'tcx> for MaybeUninitializedPlaces<'_, 'tcx> {
373373
/// There can be many more `MovePathIndex` than there are locals in a MIR body.
374-
/// We use a chunked bitset to avoid paying too high a memory footprint.
375-
type Domain = ChunkedBitSet<MovePathIndex>;
374+
/// We use a mixed bitset to avoid paying too high a memory footprint.
375+
type Domain = MixedBitSet<MovePathIndex>;
376376

377377
const NAME: &'static str = "maybe_uninit";
378378

379379
fn bottom_value(&self, _: &mir::Body<'tcx>) -> Self::Domain {
380380
// bottom = initialized (start_block_effect counters this at outset)
381-
ChunkedBitSet::new_empty(self.move_data().move_paths.len())
381+
MixedBitSet::new_empty(self.move_data().move_paths.len())
382382
}
383383

384384
// sets on_entry bits for Arg places
@@ -492,14 +492,14 @@ impl<'tcx> Analysis<'tcx> for MaybeUninitializedPlaces<'_, 'tcx> {
492492

493493
impl<'tcx> Analysis<'tcx> for EverInitializedPlaces<'_, 'tcx> {
494494
/// There can be many more `InitIndex` than there are locals in a MIR body.
495-
/// We use a chunked bitset to avoid paying too high a memory footprint.
496-
type Domain = ChunkedBitSet<InitIndex>;
495+
/// We use a mixed bitset to avoid paying too high a memory footprint.
496+
type Domain = MixedBitSet<InitIndex>;
497497

498498
const NAME: &'static str = "ever_init";
499499

500500
fn bottom_value(&self, _: &mir::Body<'tcx>) -> Self::Domain {
501501
// bottom = no initialized variables by default
502-
ChunkedBitSet::new_empty(self.move_data().inits.len())
502+
MixedBitSet::new_empty(self.move_data().inits.len())
503503
}
504504

505505
fn initialize_start_block(&self, body: &mir::Body<'tcx>, state: &mut Self::Domain) {

‎compiler/rustc_mir_transform/src/lint_tail_expr_drop_order.rs

Lines changed: 27 additions & 24 deletions
Original file line numberDiff line numberDiff line change
@@ -7,7 +7,7 @@ use rustc_data_structures::unord::{UnordMap, UnordSet};
77
use rustc_errors::Subdiagnostic;
88
use rustc_hir::CRATE_HIR_ID;
99
use rustc_hir::def_id::{DefId, LocalDefId};
10-
use rustc_index::bit_set::ChunkedBitSet;
10+
use rustc_index::bit_set::MixedBitSet;
1111
use rustc_index::{IndexSlice, IndexVec};
1212
use rustc_macros::{LintDiagnostic, Subdiagnostic};
1313
use rustc_middle::bug;
@@ -49,24 +49,24 @@ struct DropsReachable<'a, 'mir, 'tcx> {
4949
move_data: &'a MoveData<'tcx>,
5050
maybe_init: &'a mut ResultsCursor<'mir, 'tcx, MaybeInitializedPlaces<'mir, 'tcx>>,
5151
block_drop_value_info: &'a mut IndexSlice<BasicBlock, MovePathIndexAtBlock>,
52-
collected_drops: &'a mut ChunkedBitSet<MovePathIndex>,
53-
visited: FxHashMap<BasicBlock, Rc<RefCell<ChunkedBitSet<MovePathIndex>>>>,
52+
collected_drops: &'a mut MixedBitSet<MovePathIndex>,
53+
visited: FxHashMap<BasicBlock, Rc<RefCell<MixedBitSet<MovePathIndex>>>>,
5454
}
5555

5656
impl<'a, 'mir, 'tcx> DropsReachable<'a, 'mir, 'tcx> {
5757
fn visit(&mut self, block: BasicBlock) {
5858
let move_set_size = self.move_data.move_paths.len();
59-
let make_new_path_set = || Rc::new(RefCell::new(ChunkedBitSet::new_empty(move_set_size)));
59+
let make_new_path_set = || Rc::new(RefCell::new(MixedBitSet::new_empty(move_set_size)));
6060

6161
let data = &self.body.basic_blocks[block];
6262
let Some(terminator) = &data.terminator else { return };
63-
// Given that we observe these dropped locals here at `block` so far,
64-
// we will try to update the successor blocks.
65-
// An occupied entry at `block` in `self.visited` signals that we have visited `block` before.
63+
// Given that we observe these dropped locals here at `block` so far, we will try to update
64+
// the successor blocks. An occupied entry at `block` in `self.visited` signals that we
65+
// have visited `block` before.
6666
let dropped_local_here =
6767
Rc::clone(self.visited.entry(block).or_insert_with(make_new_path_set));
68-
// We could have invoked reverse lookup for a `MovePathIndex` every time, but unfortunately it is expensive.
69-
// Let's cache them in `self.block_drop_value_info`.
68+
// We could have invoked reverse lookup for a `MovePathIndex` every time, but unfortunately
69+
// it is expensive. Let's cache them in `self.block_drop_value_info`.
7070
match self.block_drop_value_info[block] {
7171
MovePathIndexAtBlock::Some(dropped) => {
7272
dropped_local_here.borrow_mut().insert(dropped);
@@ -76,23 +76,24 @@ impl<'a, 'mir, 'tcx> DropsReachable<'a, 'mir, 'tcx> {
7676
&& let LookupResult::Exact(idx) | LookupResult::Parent(Some(idx)) =
7777
self.move_data.rev_lookup.find(place.as_ref())
7878
{
79-
// Since we are working with MIRs at a very early stage,
80-
// observing a `drop` terminator is not indicative enough that
81-
// the drop will definitely happen.
82-
// That is decided in the drop elaboration pass instead.
83-
// Therefore, we need to consult with the maybe-initialization information.
79+
// Since we are working with MIRs at a very early stage, observing a `drop`
80+
// terminator is not indicative enough that the drop will definitely happen.
81+
// That is decided in the drop elaboration pass instead. Therefore, we need to
82+
// consult with the maybe-initialization information.
8483
self.maybe_init.seek_before_primary_effect(Location {
8584
block,
8685
statement_index: data.statements.len(),
8786
});
8887

89-
// Check if the drop of `place` under inspection is really in effect.
90-
// This is true only when `place` may have been initialized along a control flow path from a BID to the drop program point today.
91-
// In other words, this is where the drop of `place` will happen in the future instead.
88+
// Check if the drop of `place` under inspection is really in effect. This is
89+
// true only when `place` may have been initialized along a control flow path
90+
// from a BID to the drop program point today. In other words, this is where
91+
// the drop of `place` will happen in the future instead.
9292
if let MaybeReachable::Reachable(maybe_init) = self.maybe_init.get()
9393
&& maybe_init.contains(idx)
9494
{
95-
// We also cache the drop information, so that we do not need to check on data-flow cursor again
95+
// We also cache the drop information, so that we do not need to check on
96+
// data-flow cursor again.
9697
self.block_drop_value_info[block] = MovePathIndexAtBlock::Some(idx);
9798
dropped_local_here.borrow_mut().insert(idx);
9899
} else {
@@ -139,8 +140,9 @@ impl<'a, 'mir, 'tcx> DropsReachable<'a, 'mir, 'tcx> {
139140
// Let's check the observed dropped places in.
140141
self.collected_drops.union(&*dropped_local_there.borrow());
141142
if self.drop_span.is_none() {
142-
// FIXME(@dingxiangfei2009): it turns out that `self.body.source_scopes` are still a bit wonky.
143-
// There is a high chance that this span still points to a block rather than a statement semicolon.
143+
// FIXME(@dingxiangfei2009): it turns out that `self.body.source_scopes` are
144+
// still a bit wonky. There is a high chance that this span still points to a
145+
// block rather than a statement semicolon.
144146
*self.drop_span = Some(terminator.source_info.span);
145147
}
146148
// Now we have discovered a simple control flow path from a future drop point
@@ -394,10 +396,10 @@ pub(crate) fn run_lint<'tcx>(tcx: TyCtxt<'tcx>, def_id: LocalDefId, body: &Body<
394396
for (&block, candidates) in &bid_per_block {
395397
// We will collect drops on locals on paths between BID points to their actual drop locations
396398
// into `all_locals_dropped`.
397-
let mut all_locals_dropped = ChunkedBitSet::new_empty(move_data.move_paths.len());
399+
let mut all_locals_dropped = MixedBitSet::new_empty(move_data.move_paths.len());
398400
let mut drop_span = None;
399401
for &(_, place) in candidates.iter() {
400-
let mut collected_drops = ChunkedBitSet::new_empty(move_data.move_paths.len());
402+
let mut collected_drops = MixedBitSet::new_empty(move_data.move_paths.len());
401403
// ## On detecting change in relative drop order ##
402404
// Iterate through each BID-containing block `block`.
403405
// If the place `P` targeted by the BID is "maybe initialized",
@@ -425,8 +427,9 @@ pub(crate) fn run_lint<'tcx>(tcx: TyCtxt<'tcx>, def_id: LocalDefId, body: &Body<
425427

426428
// We shall now exclude some local bindings for the following cases.
427429
{
428-
let mut to_exclude = ChunkedBitSet::new_empty(all_locals_dropped.domain_size());
429-
// We will now do subtraction from the candidate dropped locals, because of the following reasons.
430+
let mut to_exclude = MixedBitSet::new_empty(all_locals_dropped.domain_size());
431+
// We will now do subtraction from the candidate dropped locals, because of the
432+
// following reasons.
430433
for path_idx in all_locals_dropped.iter() {
431434
let move_path = &move_data.move_paths[path_idx];
432435
let dropped_local = move_path.place.local;

‎compiler/rustc_mir_transform/src/remove_uninit_drops.rs

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,5 @@
11
use rustc_abi::FieldIdx;
2-
use rustc_index::bit_set::ChunkedBitSet;
2+
use rustc_index::bit_set::MixedBitSet;
33
use rustc_middle::mir::{Body, TerminatorKind};
44
use rustc_middle::ty::{self, GenericArgsRef, Ty, TyCtxt, VariantDef};
55
use rustc_mir_dataflow::impls::MaybeInitializedPlaces;
@@ -67,7 +67,7 @@ impl<'tcx> crate::MirPass<'tcx> for RemoveUninitDrops {
6767
fn is_needs_drop_and_init<'tcx>(
6868
tcx: TyCtxt<'tcx>,
6969
typing_env: ty::TypingEnv<'tcx>,
70-
maybe_inits: &ChunkedBitSet<MovePathIndex>,
70+
maybe_inits: &MixedBitSet<MovePathIndex>,
7171
move_data: &MoveData<'tcx>,
7272
ty: Ty<'tcx>,
7373
mpi: MovePathIndex,

0 commit comments

Comments
 (0)
This repository has been archived.