Skip to content

Commit 02a4703

Browse files
committed
use preds to serialize just what we need
This massively speeds up serialization. It also seems to produce deterministic metadata hashes (before I was seeing inconsistent results). Fixes #35232.
1 parent 9978cbc commit 02a4703

File tree

16 files changed

+178
-215
lines changed

16 files changed

+178
-215
lines changed

src/librustc_incremental/persist/data.rs

+11-5
Original file line numberDiff line numberDiff line change
@@ -19,7 +19,6 @@ use super::directory::DefPathIndex;
1919
/// Data for use when recompiling the **current crate**.
2020
#[derive(Debug, RustcEncodable, RustcDecodable)]
2121
pub struct SerializedDepGraph {
22-
pub nodes: Vec<DepNode<DefPathIndex>>,
2322
pub edges: Vec<SerializedEdge>,
2423

2524
/// These are hashes of two things:
@@ -44,15 +43,22 @@ pub struct SerializedDepGraph {
4443
pub hashes: Vec<SerializedHash>,
4544
}
4645

46+
/// Represents a "reduced" dependency edge. Unlike the full dep-graph,
47+
/// the dep-graph we serialize contains only edges `S -> T` where the
48+
/// source `S` is something hashable (a HIR node or foreign metadata)
49+
/// and the target `T` is something significant, like a work-product.
50+
/// Normally, significant nodes are only those that have saved data on
51+
/// disk, but in unit-testing the set of significant nodes can be
52+
/// increased.
4753
pub type SerializedEdge = (DepNode<DefPathIndex>, DepNode<DefPathIndex>);
4854

4955
#[derive(Debug, RustcEncodable, RustcDecodable)]
5056
pub struct SerializedHash {
51-
/// node being hashed; either a Hir or MetaData variant, in
52-
/// practice
53-
pub node: DepNode<DefPathIndex>,
57+
/// def-id of thing being hashed
58+
pub dep_node: DepNode<DefPathIndex>,
5459

55-
/// the hash itself, computed by `calculate_item_hash`
60+
/// the hash as of previous compilation, computed by code in
61+
/// `hash` module
5662
pub hash: u64,
5763
}
5864

src/librustc_incremental/persist/load.rs

+69-120
Original file line numberDiff line numberDiff line change
@@ -28,7 +28,7 @@ use super::dirty_clean;
2828
use super::hash::*;
2929
use super::util::*;
3030

31-
type DirtyNodes = FnvHashSet<DepNode<DefId>>;
31+
type DirtyNodes = FnvHashSet<DepNode<DefPathIndex>>;
3232

3333
type CleanEdges = Vec<(DepNode<DefId>, DepNode<DefId>)>;
3434

@@ -110,157 +110,106 @@ pub fn decode_dep_graph<'a, 'tcx>(tcx: TyCtxt<'a, 'tcx, 'tcx>,
110110
// Retrace the paths in the directory to find their current location (if any).
111111
let retraced = directory.retrace(tcx);
112112

113-
// Compute the set of Hir nodes whose data has changed.
114-
let mut dirty_nodes =
115-
initial_dirty_nodes(tcx, &serialized_dep_graph.hashes, &retraced);
116-
117-
debug!("decode_dep_graph: initial dirty_nodes = {:#?}", dirty_nodes);
113+
// TODO -- this could be more efficient if we integrated the `DefIdDirectory` and
114+
// pred set more deeply
115+
116+
// Compute the set of Hir nodes whose data has changed or which have been removed.
117+
let dirty_raw_source_nodes = dirty_nodes(tcx, &serialized_dep_graph.hashes, &retraced);
118+
119+
// Create a (maybe smaller) list of
120+
let retraced_edges: Vec<_> =
121+
serialized_dep_graph.edges.iter()
122+
.filter_map(|&(ref raw_source_node, ref raw_target_node)| {
123+
retraced.map(raw_target_node)
124+
.map(|target_node| (raw_source_node, target_node))
125+
})
126+
.collect();
127+
128+
// Compute which work-products have changed.
129+
let mut dirty_target_nodes = FnvHashSet();
130+
for &(raw_source_node, ref target_node) in &retraced_edges {
131+
if dirty_raw_source_nodes.contains(raw_source_node) {
132+
if !dirty_target_nodes.contains(target_node) {
133+
dirty_target_nodes.insert(target_node.clone());
134+
135+
if tcx.sess.opts.debugging_opts.incremental_info {
136+
// It'd be nice to pretty-print these paths better than just
137+
// using the `Debug` impls, but wev.
138+
println!("module {:?} is dirty because {:?} changed or was removed",
139+
target_node,
140+
raw_source_node.map_def(|&index| {
141+
Some(directory.def_path_string(tcx, index))
142+
}).unwrap());
143+
}
144+
}
145+
}
146+
}
118147

119-
// Find all DepNodes reachable from that core set. This loop
120-
// iterates repeatedly over the list of edges whose source is not
121-
// known to be dirty (`clean_edges`). If it finds an edge whose
122-
// source is dirty, it removes it from that list and adds the
123-
// target to `dirty_nodes`. It stops when it reaches a fixed
124-
// point.
125-
let clean_edges = compute_clean_edges(tcx,
126-
&directory,
127-
&serialized_dep_graph.edges,
128-
&retraced,
129-
&mut dirty_nodes);
148+
// For work-products that are still clean, add their deps into the
149+
// graph. This is needed because later we will have to save this
150+
// back out again!
151+
let dep_graph = tcx.dep_graph.clone();
152+
for (raw_source_node, target_node) in retraced_edges {
153+
if dirty_target_nodes.contains(&target_node) {
154+
continue;
155+
}
130156

131-
// Add synthetic `foo->foo` edges for each clean node `foo` that
132-
// we had before. This is sort of a hack to create clean nodes in
133-
// the graph, since the existence of a node is a signal that the
134-
// work it represents need not be repeated.
135-
let clean_nodes =
136-
serialized_dep_graph.nodes
137-
.iter()
138-
.filter_map(|node| retraced.map(node))
139-
.filter(|node| !dirty_nodes.contains(node))
140-
.map(|node| (node.clone(), node));
157+
let source_node = retraced.map(raw_source_node).unwrap();
141158

142-
// Add nodes and edges that are not dirty into our main graph.
143-
let dep_graph = tcx.dep_graph.clone();
144-
for (source, target) in clean_edges.into_iter().chain(clean_nodes) {
145-
debug!("decode_dep_graph: clean edge: {:?} -> {:?}", source, target);
159+
debug!("decode_dep_graph: clean edge: {:?} -> {:?}", source_node, target_node);
146160

147-
let _task = dep_graph.in_task(target);
148-
dep_graph.read(source);
161+
let _task = dep_graph.in_task(target_node);
162+
dep_graph.read(source_node);
149163
}
150164

151165
// Add in work-products that are still clean, and delete those that are
152166
// dirty.
153167
let mut work_product_decoder = Decoder::new(work_products_data, 0);
154168
let work_products = try!(<Vec<SerializedWorkProduct>>::decode(&mut work_product_decoder));
155-
reconcile_work_products(tcx, work_products, &dirty_nodes);
169+
reconcile_work_products(tcx, work_products, &dirty_target_nodes);
156170

157171
Ok(())
158172
}
159173

160-
fn initial_dirty_nodes<'a, 'tcx>(tcx: TyCtxt<'a, 'tcx, 'tcx>,
161-
hashes: &[SerializedHash],
162-
retraced: &RetracedDefIdDirectory)
163-
-> DirtyNodes {
174+
/// Computes which of the original set of def-ids are dirty. Stored in
175+
/// a bit vector where the index is the DefPathIndex.
176+
fn dirty_nodes<'a, 'tcx>(tcx: TyCtxt<'a, 'tcx, 'tcx>,
177+
hashes: &[SerializedHash],
178+
retraced: &RetracedDefIdDirectory)
179+
-> DirtyNodes {
164180
let mut hcx = HashContext::new(tcx);
165-
let mut items_removed = false;
166181
let mut dirty_nodes = FnvHashSet();
167-
for hash in hashes {
168-
match hash.node.map_def(|&i| retraced.def_id(i)) {
169-
Some(dep_node) => {
170-
let (_, current_hash) = hcx.hash(&dep_node).unwrap();
171-
if current_hash != hash.hash {
172-
debug!("initial_dirty_nodes: {:?} is dirty as hash is {:?}, was {:?}",
173-
dep_node.map_def(|&def_id| Some(tcx.def_path(def_id))).unwrap(),
174-
current_hash,
175-
hash.hash);
176-
dirty_nodes.insert(dep_node);
177-
}
178-
}
179-
None => {
180-
items_removed = true;
181-
}
182-
}
183-
}
184182

185-
// If any of the items in the krate have changed, then we consider
186-
// the meta-node `Krate` to be dirty, since that means something
187-
// which (potentially) read the contents of every single item.
188-
if items_removed || !dirty_nodes.is_empty() {
189-
dirty_nodes.insert(DepNode::Krate);
190-
}
191-
192-
dirty_nodes
193-
}
194-
195-
fn compute_clean_edges(tcx: TyCtxt,
196-
directory: &DefIdDirectory,
197-
serialized_edges: &[(SerializedEdge)],
198-
retraced: &RetracedDefIdDirectory,
199-
dirty_nodes: &mut DirtyNodes)
200-
-> CleanEdges {
201-
// Build up an initial list of edges. Include an edge (source,
202-
// target) if neither node has been removed. If the source has
203-
// been removed, add target to the list of dirty nodes.
204-
let mut clean_edges = Vec::with_capacity(serialized_edges.len());
205-
for &(ref serialized_source, ref serialized_target) in serialized_edges {
206-
if let Some(target) = retraced.map(serialized_target) {
207-
if let Some(source) = retraced.map(serialized_source) {
208-
clean_edges.push((source, target))
209-
} else {
210-
// source removed, target must be dirty
211-
debug!("compute_clean_edges: {:?} dirty because {:?} no longer exists",
212-
target,
213-
serialized_source.map_def(|&index| {
214-
Some(directory.def_path_string(tcx, index))
215-
}).unwrap());
216-
217-
dirty_nodes.insert(target);
183+
for hash in hashes {
184+
if let Some(dep_node) = retraced.map(&hash.dep_node) {
185+
let (_, current_hash) = hcx.hash(&dep_node).unwrap();
186+
if current_hash == hash.hash {
187+
continue;
218188
}
189+
debug!("initial_dirty_nodes: {:?} is dirty as hash is {:?}, was {:?}",
190+
dep_node.map_def(|&def_id| Some(tcx.def_path(def_id))).unwrap(),
191+
current_hash,
192+
hash.hash);
219193
} else {
220-
// target removed, ignore the edge
194+
debug!("initial_dirty_nodes: {:?} is dirty as it was removed",
195+
hash.dep_node);
221196
}
222-
}
223197

224-
debug!("compute_clean_edges: dirty_nodes={:#?}", dirty_nodes);
225-
226-
// Propagate dirty marks by iterating repeatedly over
227-
// `clean_edges`. If we find an edge `(source, target)` where
228-
// `source` is dirty, add `target` to the list of dirty nodes and
229-
// remove it. Keep doing this until we find no more dirty nodes.
230-
let mut previous_size = 0;
231-
while dirty_nodes.len() > previous_size {
232-
debug!("compute_clean_edges: previous_size={}", previous_size);
233-
previous_size = dirty_nodes.len();
234-
let mut i = 0;
235-
while i < clean_edges.len() {
236-
if dirty_nodes.contains(&clean_edges[i].0) {
237-
let (source, target) = clean_edges.swap_remove(i);
238-
debug!("compute_clean_edges: dirty source {:?} -> {:?}",
239-
source, target);
240-
dirty_nodes.insert(target);
241-
} else if dirty_nodes.contains(&clean_edges[i].1) {
242-
let (source, target) = clean_edges.swap_remove(i);
243-
debug!("compute_clean_edges: dirty target {:?} -> {:?}",
244-
source, target);
245-
} else {
246-
i += 1;
247-
}
248-
}
198+
dirty_nodes.insert(hash.dep_node.clone());
249199
}
250200

251-
clean_edges
201+
dirty_nodes
252202
}
253203

254204
/// Go through the list of work-products produced in the previous run.
255205
/// Delete any whose nodes have been found to be dirty or which are
256206
/// otherwise no longer applicable.
257207
fn reconcile_work_products<'a, 'tcx>(tcx: TyCtxt<'a, 'tcx, 'tcx>,
258208
work_products: Vec<SerializedWorkProduct>,
259-
dirty_nodes: &DirtyNodes) {
209+
dirty_target_nodes: &FnvHashSet<DepNode<DefId>>) {
260210
debug!("reconcile_work_products({:?})", work_products);
261211
for swp in work_products {
262-
let dep_node = DepNode::WorkProduct(swp.id.clone());
263-
if dirty_nodes.contains(&dep_node) {
212+
if dirty_target_nodes.contains(&DepNode::WorkProduct(swp.id.clone())) {
264213
debug!("reconcile_work_products: dep-node for {:?} is dirty", swp);
265214
delete_dirty_work_product(tcx, swp);
266215
} else {

0 commit comments

Comments
 (0)