Skip to content

Commit 831397c

Browse files
committed
refactor (#287)
Remove VecDeque in favor of two vecs which will keep their indices stable no matter what, while making much clearer what's happening. It's surprising I wasn't able to do this refactor the first time around and shows how hard it was to understand (for me) :D.
1 parent 84ade1d commit 831397c

File tree

7 files changed

+74
-79
lines changed

7 files changed

+74
-79
lines changed

git-odb/src/store_impls/loose/verify.rs

+1-1
Original file line numberDiff line numberDiff line change
@@ -52,7 +52,7 @@ impl Store {
5252

5353
let mut num_objects = 0;
5454
let start = Instant::now();
55-
let mut progress = progress.add_child("validating");
55+
let mut progress = progress.add_child("Validating");
5656
progress.init(None, git_features::progress::count("objects"));
5757
for id in self.iter().filter_map(Result::ok) {
5858
let object = self

git-pack/src/cache/delta/iter.rs

+4-8
Original file line numberDiff line numberDiff line change
@@ -4,16 +4,12 @@ use crate::cache::delta::{Item, Tree};
44
impl<T> Tree<T> {
55
/// Return an iterator over chunks of roots. Roots are not children themselves, they have no parents.
66
pub fn iter_root_chunks(&mut self, chunk_size: usize) -> impl Iterator<Item = Chunk<'_, T>> + '_ {
7-
let (front, back) = self.items.as_mut_slices();
8-
assert_eq!(
9-
front.len(),
10-
self.roots,
11-
"This should not happen unless we added more nodes than were declared in the constructor"
12-
);
7+
let roots = self.root_items.as_mut_slice();
8+
let children = self.child_items.as_mut_slice();
139

14-
front.chunks_mut(chunk_size).map(move |c| Chunk {
10+
roots.chunks_mut(chunk_size).map(move |c| Chunk {
1511
inner: c.iter_mut(),
16-
children: back as *mut [Item<T>],
12+
children: children as *mut [Item<T>],
1713
})
1814
}
1915
}

git-pack/src/cache/delta/mod.rs

+47-50
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,3 @@
1-
use std::collections::VecDeque;
2-
31
/// Returned when using various methods on a [`Tree`]
42
#[derive(thiserror::Error, Debug)]
53
#[allow(missing_docs)]
@@ -32,15 +30,23 @@ pub struct Item<T> {
3230
/// Indices into our Tree's `items`, one for each pack entry that depends on us.
3331
children: Vec<usize>,
3432
}
33+
34+
/// Identify what kind of node we have last seen
35+
enum NodeKind {
36+
Root,
37+
Child,
38+
}
39+
3540
/// A tree that allows one-time iteration over all nodes and their children, consuming it in the process,
3641
/// while being shareable among threads without a lock.
3742
/// It does this by making the guarantee that iteration only happens once.
3843
pub struct Tree<T> {
39-
/// Roots are first, then children.
40-
items: VecDeque<Item<T>>,
41-
roots: usize,
42-
/// The last child index into the `items` array
43-
last_index: usize,
44+
/// The root nodes, i.e. base objects
45+
root_items: Vec<Item<T>>,
46+
/// The child nodes, i.e. those that rely a base object, like ref and ofs delta objets
47+
child_items: Vec<Item<T>>,
48+
/// The last encountered node was either a root or a child.
49+
last_seen: Option<NodeKind>,
4450
/// Future child offsets, associating their offset into the pack with their index in the items array.
4551
/// (parent_offset, child_index)
4652
future_child_offsets: Vec<(crate::data::Offset, usize)>,
@@ -50,22 +56,27 @@ impl<T> Tree<T> {
5056
/// Instantiate a empty tree capable of storing `num_objects` amounts of items.
5157
pub fn with_capacity(num_objects: usize) -> Result<Self, Error> {
5258
Ok(Tree {
53-
items: VecDeque::with_capacity(num_objects),
54-
roots: 0,
55-
last_index: 0,
59+
root_items: Vec::with_capacity(num_objects / 2),
60+
child_items: Vec::with_capacity(num_objects / 2),
61+
last_seen: None,
5662
future_child_offsets: Vec::new(),
5763
})
5864
}
5965

66+
fn num_items(&self) -> usize {
67+
self.root_items.len() + self.child_items.len()
68+
}
69+
6070
fn assert_is_incrementing_and_update_next_offset(&mut self, offset: crate::data::Offset) -> Result<(), Error> {
61-
if self.items.is_empty() {
62-
return Ok(());
63-
}
64-
let item = &mut self.items[self.last_index];
65-
let last_offset = item.offset;
66-
if offset <= last_offset {
71+
let items = match &self.last_seen {
72+
Some(NodeKind::Root) => &mut self.root_items,
73+
Some(NodeKind::Child) => &mut self.child_items,
74+
None => return Ok(()),
75+
};
76+
let item = &mut items.last_mut().expect("last seen won't lie");
77+
if offset <= item.offset {
6778
return Err(Error::InvariantIncreasingPackOffset {
68-
last_pack_offset: last_offset,
79+
last_pack_offset: item.offset,
6980
pack_offset: offset,
7081
});
7182
}
@@ -77,22 +88,12 @@ impl<T> Tree<T> {
7788
&mut self,
7889
pack_entries_end: crate::data::Offset,
7990
) -> Result<(), traverse::Error> {
80-
if self.items.is_empty() {
81-
return Ok(());
82-
};
83-
8491
if !self.future_child_offsets.is_empty() {
85-
let (roots, children) = self.items.as_mut_slices();
86-
assert_eq!(
87-
roots.len(),
88-
self.roots,
89-
"item deque has been resized, maybe we added more nodes than we declared in the constructor?"
90-
);
9192
for (parent_offset, child_index) in self.future_child_offsets.drain(..) {
92-
if let Ok(i) = children.binary_search_by_key(&parent_offset, |i| i.offset) {
93-
children[i].children.push(child_index);
94-
} else if let Ok(i) = roots.binary_search_by(|i| parent_offset.cmp(&i.offset)) {
95-
roots[i].children.push(child_index);
93+
if let Ok(i) = self.child_items.binary_search_by_key(&parent_offset, |i| i.offset) {
94+
self.child_items[i].children.push(child_index);
95+
} else if let Ok(i) = self.root_items.binary_search_by_key(&parent_offset, |i| i.offset) {
96+
self.root_items[i].children.push(child_index);
9697
} else {
9798
return Err(traverse::Error::OutOfPackRefDelta {
9899
base_pack_offset: parent_offset,
@@ -101,22 +102,22 @@ impl<T> Tree<T> {
101102
}
102103
}
103104

104-
self.items[self.last_index].next_offset = pack_entries_end;
105+
self.assert_is_incrementing_and_update_next_offset(pack_entries_end)
106+
.expect("BUG: pack now is smaller than all previously seen entries");
105107
Ok(())
106108
}
107109

108110
/// Add a new root node, one that only has children but is not a child itself, at the given pack `offset` and associate
109111
/// custom `data` with it.
110112
pub fn add_root(&mut self, offset: crate::data::Offset, data: T) -> Result<(), Error> {
111113
self.assert_is_incrementing_and_update_next_offset(offset)?;
112-
self.last_index = 0;
113-
self.items.push_front(Item {
114+
self.last_seen = NodeKind::Root.into();
115+
self.root_items.push(Item {
114116
offset,
115117
next_offset: 0,
116118
data,
117119
children: Vec::new(),
118120
});
119-
self.roots += 1;
120121
Ok(())
121122
}
122123

@@ -128,22 +129,18 @@ impl<T> Tree<T> {
128129
data: T,
129130
) -> Result<(), Error> {
130131
self.assert_is_incrementing_and_update_next_offset(offset)?;
131-
let (roots, children) = self.items.as_mut_slices();
132-
assert_eq!(
133-
roots.len(),
134-
self.roots,
135-
"item deque has been resized, maybe we added more nodes than we declared in the constructor?"
136-
);
137-
let next_child_index = children.len();
138-
if let Ok(i) = children.binary_search_by_key(&base_offset, |i| i.offset) {
139-
children[i].children.push(next_child_index);
140-
} else if let Ok(i) = roots.binary_search_by(|i| base_offset.cmp(&i.offset)) {
141-
roots[i].children.push(next_child_index);
132+
133+
let next_child_index = self.child_items.len();
134+
if let Ok(i) = self.child_items.binary_search_by_key(&base_offset, |i| i.offset) {
135+
self.child_items[i].children.push(next_child_index);
136+
} else if let Ok(i) = self.root_items.binary_search_by_key(&base_offset, |i| i.offset) {
137+
self.root_items[i].children.push(next_child_index);
142138
} else {
143139
self.future_child_offsets.push((base_offset, next_child_index));
144140
}
145-
self.last_index = self.items.len();
146-
self.items.push_back(Item {
141+
142+
self.last_seen = NodeKind::Child.into();
143+
self.child_items.push(Item {
147144
offset,
148145
next_offset: 0,
149146
data,
@@ -153,8 +150,8 @@ impl<T> Tree<T> {
153150
}
154151

155152
/// Transform this `Tree` into its items.
156-
pub fn into_items(self) -> VecDeque<Item<T>> {
157-
self.items
153+
pub fn into_items(self) -> (Vec<Item<T>>, Vec<Item<T>>) {
154+
(self.root_items, self.child_items)
158155
}
159156
}
160157

git-pack/src/cache/delta/traverse/mod.rs

+3-6
Original file line numberDiff line numberDiff line change
@@ -1,7 +1,4 @@
1-
use std::{
2-
collections::VecDeque,
3-
sync::atomic::{AtomicBool, Ordering},
4-
};
1+
use std::sync::atomic::{AtomicBool, Ordering};
52

63
use git_features::{
74
parallel,
@@ -107,7 +104,7 @@ where
107104
should_interrupt,
108105
object_hash,
109106
}: Options<'_, P1, P2>,
110-
) -> Result<VecDeque<Item<T>>, Error>
107+
) -> Result<(Vec<Item<T>>, Vec<Item<T>>), Error>
111108
where
112109
F: for<'r> Fn(EntryRange, &'r mut Vec<u8>) -> Option<()> + Send + Clone,
113110
P1: Progress,
@@ -119,7 +116,7 @@ where
119116
let (chunk_size, thread_limit, _) = parallel::optimize_chunk_size_and_thread_limit(1, None, thread_limit, None);
120117
let object_progress = OwnShared::new(Mutable::new(object_progress));
121118

122-
let num_objects = self.items.len();
119+
let num_objects = self.num_items();
123120
in_parallel_if(
124121
should_run_in_parallel,
125122
self.iter_root_chunks(chunk_size),

git-pack/src/index/traverse/with_index.rs

+13-10
Original file line numberDiff line numberDiff line change
@@ -1,7 +1,4 @@
1-
use std::{
2-
collections::VecDeque,
3-
sync::atomic::{AtomicBool, Ordering},
4-
};
1+
use std::sync::atomic::{AtomicBool, Ordering};
52

63
use git_features::{parallel, progress::Progress};
74

@@ -163,10 +160,15 @@ impl From<crate::index::Entry> for Entry {
163160
}
164161
}
165162

166-
fn digest_statistics(items: VecDeque<crate::cache::delta::Item<Entry>>) -> index::traverse::Statistics {
163+
fn digest_statistics(
164+
(roots, children): (
165+
Vec<crate::cache::delta::Item<Entry>>,
166+
Vec<crate::cache::delta::Item<Entry>>,
167+
),
168+
) -> index::traverse::Statistics {
167169
let mut res = index::traverse::Statistics::default();
168170
let average = &mut res.average;
169-
for item in &items {
171+
for item in roots.iter().chain(children.iter()) {
170172
res.total_compressed_entries_size += item.data.compressed_size;
171173
res.total_decompressed_entries_size += item.data.decompressed_size;
172174
res.total_object_size += item.data.object_size;
@@ -185,10 +187,11 @@ fn digest_statistics(items: VecDeque<crate::cache::delta::Item<Entry>>) -> index
185187
};
186188
}
187189

188-
average.decompressed_size /= items.len() as u64;
189-
average.compressed_size /= items.len();
190-
average.object_size /= items.len() as u64;
191-
average.num_deltas /= items.len() as u32;
190+
let num_nodes = roots.len() + children.len();
191+
average.decompressed_size /= num_nodes as u64;
192+
average.compressed_size /= num_nodes;
193+
average.object_size /= num_nodes as u64;
194+
average.num_deltas /= num_nodes as u32;
192195

193196
res
194197
}

git-pack/src/index/write/encode.rs

+2-2
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,4 @@
1-
use std::{cmp::Ordering, collections::VecDeque, io};
1+
use std::{cmp::Ordering, io};
22

33
pub(crate) const LARGE_OFFSET_THRESHOLD: u64 = 0x7fff_ffff;
44
pub(crate) const HIGH_BIT: u32 = 0x8000_0000;
@@ -13,7 +13,7 @@ use crate::index::{util::Count, V2_SIGNATURE};
1313

1414
pub(crate) fn write_to(
1515
out: impl io::Write,
16-
entries_sorted_by_oid: VecDeque<crate::cache::delta::Item<crate::index::write::TreeEntry>>,
16+
entries_sorted_by_oid: Vec<crate::cache::delta::Item<crate::index::write::TreeEntry>>,
1717
pack_hash: &git_hash::ObjectId,
1818
kind: crate::index::Version,
1919
mut progress: impl Progress,

git-pack/src/index/write/mod.rs

+4-2
Original file line numberDiff line numberDiff line change
@@ -156,7 +156,7 @@ impl crate::index::File {
156156
let resolver = make_resolver()?;
157157
let sorted_pack_offsets_by_oid = {
158158
let in_parallel_if_pack_is_big_enough = || bytes_to_process > 5_000_000;
159-
let mut items = tree.traverse(
159+
let (roots, children) = tree.traverse(
160160
in_parallel_if_pack_is_big_enough,
161161
resolver,
162162
pack_entries_end,
@@ -181,9 +181,11 @@ impl crate::index::File {
181181
)?;
182182
root_progress.inc();
183183

184+
let mut items = roots;
185+
items.extend(children);
184186
{
185187
let _progress = root_progress.add_child("sorting by id");
186-
items.make_contiguous().sort_by_key(|e| e.data.id);
188+
items.sort_by_key(|e| e.data.id);
187189
}
188190

189191
root_progress.inc();

0 commit comments

Comments
 (0)