From 76d92142e251888f096adc6700189967e4d1cafd Mon Sep 17 00:00:00 2001
From: lcnr <rust@lcnr.de>
Date: Mon, 13 May 2024 15:08:25 +0000
Subject: [PATCH 1/6] move fixpoint step into subfunction

---
 .../src/solve/search_graph.rs                 | 129 +++++++++++-------
 1 file changed, 76 insertions(+), 53 deletions(-)

diff --git a/compiler/rustc_trait_selection/src/solve/search_graph.rs b/compiler/rustc_trait_selection/src/solve/search_graph.rs
index a48b2f2478b0d..0992a47224bbb 100644
--- a/compiler/rustc_trait_selection/src/solve/search_graph.rs
+++ b/compiler/rustc_trait_selection/src/solve/search_graph.rs
@@ -379,60 +379,10 @@ impl<'tcx> SearchGraph<'tcx> {
         // `with_anon_task` closure.
         let ((final_entry, result), dep_node) =
             tcx.dep_graph.with_anon_task(tcx, dep_kinds::TraitSelect, || {
-                // When we encounter a coinductive cycle, we have to fetch the
-                // result of that cycle while we are still computing it. Because
-                // of this we continuously recompute the cycle until the result
-                // of the previous iteration is equal to the final result, at which
-                // point we are done.
                 for _ in 0..FIXPOINT_STEP_LIMIT {
-                    let result = prove_goal(self, inspect);
-                    let stack_entry = self.pop_stack();
-                    debug_assert_eq!(stack_entry.input, input);
-
-                    // If the current goal is not the root of a cycle, we are done.
-                    if stack_entry.has_been_used.is_empty() {
-                        return (stack_entry, result);
-                    }
-
-                    // If it is a cycle head, we have to keep trying to prove it until
-                    // we reach a fixpoint. We need to do so for all cycle heads,
-                    // not only for the root.
-                    //
-                    // See tests/ui/traits/next-solver/cycles/fixpoint-rerun-all-cycle-heads.rs
-                    // for an example.
-
-                    // Start by clearing all provisional cache entries which depend on this
-                    // the current goal.
-                    Self::clear_dependent_provisional_results(
-                        &mut self.provisional_cache,
-                        self.stack.next_index(),
-                    );
-
-                    // Check whether we reached a fixpoint, either because the final result
-                    // is equal to the provisional result of the previous iteration, or because
-                    // this was only the root of either coinductive or inductive cycles, and the
-                    // final result is equal to the initial response for that case.
-                    let reached_fixpoint = if let Some(r) = stack_entry.provisional_result {
-                        r == result
-                    } else if stack_entry.has_been_used == HasBeenUsed::COINDUCTIVE_CYCLE {
-                        Self::response_no_constraints(tcx, input, Certainty::Yes) == result
-                    } else if stack_entry.has_been_used == HasBeenUsed::INDUCTIVE_CYCLE {
-                        Self::response_no_constraints(tcx, input, Certainty::overflow(false))
-                            == result
-                    } else {
-                        false
-                    };
-
-                    // If we did not reach a fixpoint, update the provisional result and reevaluate.
-                    if reached_fixpoint {
-                        return (stack_entry, result);
-                    } else {
-                        let depth = self.stack.push(StackEntry {
-                            has_been_used: HasBeenUsed::empty(),
-                            provisional_result: Some(result),
-                            ..stack_entry
-                        });
-                        debug_assert_eq!(self.provisional_cache[&input].stack_depth, Some(depth));
+                    match self.fixpoint_step_in_task(tcx, input, inspect, &mut prove_goal) {
+                        StepResult::Done(final_entry, result) => return (final_entry, result),
+                        StepResult::HasChanged => {}
                     }
                 }
 
@@ -484,6 +434,79 @@ impl<'tcx> SearchGraph<'tcx> {
 
         result
     }
+}
+
+enum StepResult<'tcx> {
+    Done(StackEntry<'tcx>, QueryResult<'tcx>),
+    HasChanged,
+}
+
+impl<'tcx> SearchGraph<'tcx> {
+    /// When we encounter a coinductive cycle, we have to fetch the
+    /// result of that cycle while we are still computing it. Because
+    /// of this we continuously recompute the cycle until the result
+    /// of the previous iteration is equal to the final result, at which
+    /// point we are done.
+    fn fixpoint_step_in_task<F>(
+        &mut self,
+        tcx: TyCtxt<'tcx>,
+        input: CanonicalInput<'tcx>,
+        inspect: &mut ProofTreeBuilder<'tcx>,
+        prove_goal: &mut F,
+    ) -> StepResult<'tcx>
+    where
+        F: FnMut(&mut Self, &mut ProofTreeBuilder<'tcx>) -> QueryResult<'tcx>,
+    {
+        let result = prove_goal(self, inspect);
+        let stack_entry = self.pop_stack();
+        debug_assert_eq!(stack_entry.input, input);
+
+        // If the current goal is not the root of a cycle, we are done.
+        if stack_entry.has_been_used.is_empty() {
+            return StepResult::Done(stack_entry, result);
+        }
+
+        // If it is a cycle head, we have to keep trying to prove it until
+        // we reach a fixpoint. We need to do so for all cycle heads,
+        // not only for the root.
+        //
+        // See tests/ui/traits/next-solver/cycles/fixpoint-rerun-all-cycle-heads.rs
+        // for an example.
+
+        // Start by clearing all provisional cache entries which depend on this
+        // the current goal.
+        Self::clear_dependent_provisional_results(
+            &mut self.provisional_cache,
+            self.stack.next_index(),
+        );
+
+        // Check whether we reached a fixpoint, either because the final result
+        // is equal to the provisional result of the previous iteration, or because
+        // this was only the root of either coinductive or inductive cycles, and the
+        // final result is equal to the initial response for that case.
+        let reached_fixpoint = if let Some(r) = stack_entry.provisional_result {
+            r == result
+        } else if stack_entry.has_been_used == HasBeenUsed::COINDUCTIVE_CYCLE {
+            Self::response_no_constraints(tcx, input, Certainty::Yes) == result
+        } else if stack_entry.has_been_used == HasBeenUsed::INDUCTIVE_CYCLE {
+            Self::response_no_constraints(tcx, input, Certainty::overflow(false)) == result
+        } else {
+            false
+        };
+
+        // If we did not reach a fixpoint, update the provisional result and reevaluate.
+        if reached_fixpoint {
+            StepResult::Done(stack_entry, result)
+        } else {
+            let depth = self.stack.push(StackEntry {
+                has_been_used: HasBeenUsed::empty(),
+                provisional_result: Some(result),
+                ..stack_entry
+            });
+            debug_assert_eq!(self.provisional_cache[&input].stack_depth, Some(depth));
+            StepResult::HasChanged
+        }
+    }
 
     fn response_no_constraints(
         tcx: TyCtxt<'tcx>,

From c0b0295638ce31fd796fe7bea94f7cdd9a94ae93 Mon Sep 17 00:00:00 2001
From: lcnr <rust@lcnr.de>
Date: Mon, 13 May 2024 15:50:59 +0000
Subject: [PATCH 2/6] move global cache lookup into fn

---
 .../rustc_middle/src/traits/solve/cache.rs    | 34 ++++----
 .../src/solve/search_graph.rs                 | 85 ++++++++++---------
 src/bootstrap/src/core/builder/tests.rs       | 29 +++++--
 3 files changed, 86 insertions(+), 62 deletions(-)

diff --git a/compiler/rustc_middle/src/traits/solve/cache.rs b/compiler/rustc_middle/src/traits/solve/cache.rs
index 4f90af0a27c07..80c2ee30e39d3 100644
--- a/compiler/rustc_middle/src/traits/solve/cache.rs
+++ b/compiler/rustc_middle/src/traits/solve/cache.rs
@@ -14,11 +14,11 @@ pub struct EvaluationCache<'tcx> {
     map: Lock<FxHashMap<CanonicalInput<'tcx>, CacheEntry<'tcx>>>,
 }
 
-#[derive(PartialEq, Eq)]
+#[derive(Debug, PartialEq, Eq)]
 pub struct CacheData<'tcx> {
     pub result: QueryResult<'tcx>,
     pub proof_tree: Option<&'tcx [inspect::GoalEvaluationStep<'tcx>]>,
-    pub reached_depth: usize,
+    pub additional_depth: usize,
     pub encountered_overflow: bool,
 }
 
@@ -29,7 +29,7 @@ impl<'tcx> EvaluationCache<'tcx> {
         tcx: TyCtxt<'tcx>,
         key: CanonicalInput<'tcx>,
         proof_tree: Option<&'tcx [inspect::GoalEvaluationStep<'tcx>]>,
-        reached_depth: usize,
+        additional_depth: usize,
         encountered_overflow: bool,
         cycle_participants: FxHashSet<CanonicalInput<'tcx>>,
         dep_node: DepNodeIndex,
@@ -40,17 +40,17 @@ impl<'tcx> EvaluationCache<'tcx> {
         let data = WithDepNode::new(dep_node, QueryData { result, proof_tree });
         entry.cycle_participants.extend(cycle_participants);
         if encountered_overflow {
-            entry.with_overflow.insert(reached_depth, data);
+            entry.with_overflow.insert(additional_depth, data);
         } else {
-            entry.success = Some(Success { data, reached_depth });
+            entry.success = Some(Success { data, additional_depth });
         }
 
         if cfg!(debug_assertions) {
             drop(map);
-            if Some(CacheData { result, proof_tree, reached_depth, encountered_overflow })
-                != self.get(tcx, key, |_| false, Limit(reached_depth))
-            {
-                bug!("unable to retrieve inserted element from cache: {key:?}");
+            let expected = CacheData { result, proof_tree, additional_depth, encountered_overflow };
+            let actual = self.get(tcx, key, [], Limit(additional_depth));
+            if !actual.as_ref().is_some_and(|actual| expected == *actual) {
+                bug!("failed to lookup inserted element for {key:?}: {expected:?} != {actual:?}");
             }
         }
     }
@@ -63,23 +63,25 @@ impl<'tcx> EvaluationCache<'tcx> {
         &self,
         tcx: TyCtxt<'tcx>,
         key: CanonicalInput<'tcx>,
-        cycle_participant_in_stack: impl FnOnce(&FxHashSet<CanonicalInput<'tcx>>) -> bool,
+        stack_entries: impl IntoIterator<Item = CanonicalInput<'tcx>>,
         available_depth: Limit,
     ) -> Option<CacheData<'tcx>> {
         let map = self.map.borrow();
         let entry = map.get(&key)?;
 
-        if cycle_participant_in_stack(&entry.cycle_participants) {
-            return None;
+        for stack_entry in stack_entries {
+            if entry.cycle_participants.contains(&stack_entry) {
+                return None;
+            }
         }
 
         if let Some(ref success) = entry.success {
-            if available_depth.value_within_limit(success.reached_depth) {
+            if available_depth.value_within_limit(success.additional_depth) {
                 let QueryData { result, proof_tree } = success.data.get(tcx);
                 return Some(CacheData {
                     result,
                     proof_tree,
-                    reached_depth: success.reached_depth,
+                    additional_depth: success.additional_depth,
                     encountered_overflow: false,
                 });
             }
@@ -90,7 +92,7 @@ impl<'tcx> EvaluationCache<'tcx> {
             CacheData {
                 result,
                 proof_tree,
-                reached_depth: available_depth.0,
+                additional_depth: available_depth.0,
                 encountered_overflow: true,
             }
         })
@@ -99,7 +101,7 @@ impl<'tcx> EvaluationCache<'tcx> {
 
 struct Success<'tcx> {
     data: WithDepNode<QueryData<'tcx>>,
-    reached_depth: usize,
+    additional_depth: usize,
 }
 
 #[derive(Clone, Copy)]
diff --git a/compiler/rustc_trait_selection/src/solve/search_graph.rs b/compiler/rustc_trait_selection/src/solve/search_graph.rs
index 0992a47224bbb..4824432cf3d3e 100644
--- a/compiler/rustc_trait_selection/src/solve/search_graph.rs
+++ b/compiler/rustc_trait_selection/src/solve/search_graph.rs
@@ -129,16 +129,6 @@ impl<'tcx> SearchGraph<'tcx> {
         self.mode
     }
 
-    /// Update the stack and reached depths on cache hits.
-    #[instrument(level = "debug", skip(self))]
-    fn on_cache_hit(&mut self, additional_depth: usize, encountered_overflow: bool) {
-        let reached_depth = self.stack.next_index().plus(additional_depth);
-        if let Some(last) = self.stack.raw.last_mut() {
-            last.reached_depth = last.reached_depth.max(reached_depth);
-            last.encountered_overflow |= encountered_overflow;
-        }
-    }
-
     /// Pops the highest goal from the stack, lazily updating the
     /// the next goal in the stack.
     ///
@@ -266,36 +256,7 @@ impl<'tcx> SearchGraph<'tcx> {
             return Self::response_no_constraints(tcx, input, Certainty::overflow(true));
         };
 
-        // Try to fetch the goal from the global cache.
-        'global: {
-            let Some(CacheData { result, proof_tree, reached_depth, encountered_overflow }) =
-                self.global_cache(tcx).get(
-                    tcx,
-                    input,
-                    |cycle_participants| {
-                        self.stack.iter().any(|entry| cycle_participants.contains(&entry.input))
-                    },
-                    available_depth,
-                )
-            else {
-                break 'global;
-            };
-
-            // If we're building a proof tree and the current cache entry does not
-            // contain a proof tree, we do not use the entry but instead recompute
-            // the goal. We simply overwrite the existing entry once we're done,
-            // caching the proof tree.
-            if !inspect.is_noop() {
-                if let Some(revisions) = proof_tree {
-                    inspect.goal_evaluation_kind(
-                        inspect::WipCanonicalGoalEvaluationKind::Interned { revisions },
-                    );
-                } else {
-                    break 'global;
-                }
-            }
-
-            self.on_cache_hit(reached_depth, encountered_overflow);
+        if let Some(result) = self.lookup_global_cache(tcx, input, available_depth, inspect) {
             return result;
         }
 
@@ -376,7 +337,10 @@ impl<'tcx> SearchGraph<'tcx> {
 
         // This is for global caching, so we properly track query dependencies.
         // Everything that affects the `result` should be performed within this
-        // `with_anon_task` closure.
+        // `with_anon_task` closure. If computing this goal depends on something
+        // not tracked by the cache key and from outside of this anon task, it
+        // must not be added to the global cache. Notably, this is the case for
+        // trait solver cycles participants.
         let ((final_entry, result), dep_node) =
             tcx.dep_graph.with_anon_task(tcx, dep_kinds::TraitSelect, || {
                 for _ in 0..FIXPOINT_STEP_LIMIT {
@@ -434,6 +398,45 @@ impl<'tcx> SearchGraph<'tcx> {
 
         result
     }
+
+    /// Try to fetch a previously computed result from the global cache,
+    /// making sure to only do so if it would match the result of reevaluating
+    /// this goal.
+    fn lookup_global_cache(
+        &mut self,
+        tcx: TyCtxt<'tcx>,
+        input: CanonicalInput<'tcx>,
+        available_depth: Limit,
+        inspect: &mut ProofTreeBuilder<'tcx>,
+    ) -> Option<QueryResult<'tcx>> {
+        let CacheData { result, proof_tree, additional_depth, encountered_overflow } = self
+            .global_cache(tcx)
+            .get(tcx, input, self.stack.iter().map(|e| e.input), available_depth)?;
+
+        // If we're building a proof tree and the current cache entry does not
+        // contain a proof tree, we do not use the entry but instead recompute
+        // the goal. We simply overwrite the existing entry once we're done,
+        // caching the proof tree.
+        if !inspect.is_noop() {
+            if let Some(revisions) = proof_tree {
+                let kind = inspect::WipCanonicalGoalEvaluationKind::Interned { revisions };
+                inspect.goal_evaluation_kind(kind);
+            } else {
+                return None;
+            }
+        }
+
+        // Update the reached depth of the current goal to make sure
+        // its state is the same regardless of whether we've used the
+        // global cache or not.
+        let reached_depth = self.stack.next_index().plus(additional_depth);
+        if let Some(last) = self.stack.raw.last_mut() {
+            last.reached_depth = last.reached_depth.max(reached_depth);
+            last.encountered_overflow |= encountered_overflow;
+        }
+
+        Some(result)
+    }
 }
 
 enum StepResult<'tcx> {
diff --git a/src/bootstrap/src/core/builder/tests.rs b/src/bootstrap/src/core/builder/tests.rs
index 9710365ef114d..276fd0b11d64c 100644
--- a/src/bootstrap/src/core/builder/tests.rs
+++ b/src/bootstrap/src/core/builder/tests.rs
@@ -60,7 +60,14 @@ fn check_cli<const N: usize>(paths: [&str; N]) {
 macro_rules! std {
     ($host:ident => $target:ident, stage = $stage:literal) => {
         compile::Std::new(
-            Compiler { host: TargetSelection::from_user(concat!(stringify!($host), "-", stringify!($host))), stage: $stage },
+            Compiler {
+                host: TargetSelection::from_user(concat!(
+                    stringify!($host),
+                    "-",
+                    stringify!($host)
+                )),
+                stage: $stage,
+            },
             TargetSelection::from_user(concat!(stringify!($target), "-", stringify!($target))),
         )
     };
@@ -83,7 +90,14 @@ macro_rules! doc_std {
 macro_rules! rustc {
     ($host:ident => $target:ident, stage = $stage:literal) => {
         compile::Rustc::new(
-            Compiler { host: TargetSelection::from_user(concat!(stringify!($host), "-", stringify!($host))), stage: $stage },
+            Compiler {
+                host: TargetSelection::from_user(concat!(
+                    stringify!($host),
+                    "-",
+                    stringify!($host)
+                )),
+                stage: $stage,
+            },
             TargetSelection::from_user(concat!(stringify!($target), "-", stringify!($target))),
         )
     };
@@ -141,10 +155,14 @@ fn check_missing_paths_for_x_test_tests() {
 
         // Skip if not a test directory.
         if path.ends_with("tests/auxiliary") || !path.is_dir() {
-            continue
+            continue;
         }
 
-        assert!(tests_remap_paths.iter().any(|item| path.ends_with(*item)), "{} is missing in PATH_REMAP tests list.", path.display());
+        assert!(
+            tests_remap_paths.iter().any(|item| path.ends_with(*item)),
+            "{} is missing in PATH_REMAP tests list.",
+            path.display()
+        );
     }
 }
 
@@ -185,7 +203,8 @@ fn alias_and_path_for_library() {
         &[std!(A => A, stage = 0), std!(A => A, stage = 1)]
     );
 
-    let mut cache = run_build(&["library".into(), "core".into()], configure("doc", &["A-A"], &["A-A"]));
+    let mut cache =
+        run_build(&["library".into(), "core".into()], configure("doc", &["A-A"], &["A-A"]));
     assert_eq!(first(cache.all::<doc::Std>()), &[doc_std!(A => A, stage = 0)]);
 }
 

From 158cfeb851550785dbc2b78faa734f6ba4431b90 Mon Sep 17 00:00:00 2001
From: lcnr <rust@lcnr.de>
Date: Wed, 15 May 2024 17:46:54 +0000
Subject: [PATCH 3/6] cache provisional results in fixpoint

---
 .../src/solve/search_graph.rs                 | 600 ++++++++++++++----
 1 file changed, 493 insertions(+), 107 deletions(-)

diff --git a/compiler/rustc_trait_selection/src/solve/search_graph.rs b/compiler/rustc_trait_selection/src/solve/search_graph.rs
index 4824432cf3d3e..8d111f00a5e0d 100644
--- a/compiler/rustc_trait_selection/src/solve/search_graph.rs
+++ b/compiler/rustc_trait_selection/src/solve/search_graph.rs
@@ -1,8 +1,8 @@
-use crate::solve::FIXPOINT_STEP_LIMIT;
-
+#![allow(rustc::potential_query_instability)]
 use super::inspect;
 use super::inspect::ProofTreeBuilder;
 use super::SolverMode;
+use crate::solve::FIXPOINT_STEP_LIMIT;
 use rustc_data_structures::fx::FxHashMap;
 use rustc_data_structures::fx::FxHashSet;
 use rustc_index::Idx;
@@ -12,7 +12,8 @@ use rustc_middle::traits::solve::CacheData;
 use rustc_middle::traits::solve::{CanonicalInput, Certainty, EvaluationCache, QueryResult};
 use rustc_middle::ty::TyCtxt;
 use rustc_session::Limit;
-use std::mem;
+use std::cmp::Ordering;
+use std::collections::BTreeMap;
 
 rustc_index::newtype_index! {
     #[orderable]
@@ -30,6 +31,12 @@ bitflags::bitflags! {
     }
 }
 
+impl Default for HasBeenUsed {
+    fn default() -> Self {
+        HasBeenUsed::empty()
+    }
+}
+
 #[derive(Debug)]
 struct StackEntry<'tcx> {
     input: CanonicalInput<'tcx>,
@@ -40,13 +47,14 @@ struct StackEntry<'tcx> {
     /// for the top of the stack and lazily updated for the rest.
     reached_depth: StackDepth,
 
-    /// Whether this entry is a non-root cycle participant.
+    /// If this entry is a non-root cycle participant, we store the depth of
+    /// all cycle participants and how that participant has been used.
     ///
     /// We must not move the result of non-root cycle participants to the
     /// global cache. See [SearchGraph::cycle_participants] for more details.
     /// We store the highest stack depth of a head of a cycle this goal is involved
     /// in. This necessary to soundly cache its provisional result.
-    non_root_cycle_participant: Option<StackDepth>,
+    heads: BTreeMap<StackDepth, HasBeenUsed>,
 
     encountered_overflow: bool,
 
@@ -57,7 +65,16 @@ struct StackEntry<'tcx> {
 }
 
 /// The provisional result for a goal which is not on the stack.
+#[derive(Debug)]
 struct DetachedEntry<'tcx> {
+    /// All stack entries upon which this `result` depends. This entry
+    /// is therefore outdated once the provisional result of any of these
+    /// goals changes or gets removed from the stack.
+    heads: BTreeMap<StackDepth, HasBeenUsed>,
+    result: QueryResult<'tcx>,
+}
+
+impl<'tcx> DetachedEntry<'tcx> {
     /// The head of the smallest non-trivial cycle involving this entry.
     ///
     /// Given the following rules, when proving `A` the head for
@@ -67,8 +84,9 @@ struct DetachedEntry<'tcx> {
     /// B :- C
     /// C :- A + B + C
     /// ```
-    head: StackDepth,
-    result: QueryResult<'tcx>,
+    fn head(&self) -> StackDepth {
+        *self.heads.last_key_value().unwrap().0
+    }
 }
 
 /// Stores the stack depth of a currently evaluated goal *and* already
@@ -83,7 +101,7 @@ struct DetachedEntry<'tcx> {
 ///
 /// The provisional cache can theoretically result in changes to the observable behavior,
 /// see tests/ui/traits/next-solver/cycles/provisional-cache-impacts-behavior.rs.
-#[derive(Default)]
+#[derive(Debug, Default)]
 struct ProvisionalCacheEntry<'tcx> {
     stack_depth: Option<StackDepth>,
     with_inductive_stack: Option<DetachedEntry<'tcx>>,
@@ -98,13 +116,89 @@ impl<'tcx> ProvisionalCacheEntry<'tcx> {
     }
 }
 
-pub(super) struct SearchGraph<'tcx> {
-    mode: SolverMode,
-    /// The stack of goals currently being computed.
+#[derive(Debug)]
+struct ReuseableDetachedEntry<'tcx> {
+    /// See the comment on `DetachedEntry::heads`. This entry is only
+    /// valid as long as all heads are on the stack and their provisional
+    /// results have not changed.
+    heads: BTreeMap<StackDepth, HasBeenUsed>,
+    /// The provisional results of all cycle heads used
+    /// when computing this goal.
+    provisional_results: Vec<QueryResult<'tcx>>,
+    /// Whether the path from the `head` of the smallest
+    /// cycle to this goal was coinductive.
+    is_coinductive: bool,
+
+    result: QueryResult<'tcx>,
+}
+
+impl<'tcx> ReuseableDetachedEntry<'tcx> {
+    /// We only want to reuse a provisional cache entry if the
+    /// provisional results of all used cycle heads has not changed
+    /// since computing this result.
+    fn is_applicable(
+        &self,
+        tcx: TyCtxt<'tcx>,
+        stack: &IndexVec<StackDepth, StackEntry<'tcx>>,
+    ) -> bool {
+        itertools::zip_eq(&self.heads, &self.provisional_results).all(|x| {
+            let ((&stack_depth, &usage_kind), &result) = x;
+            let actual = if let Some(result) = stack[stack_depth].provisional_result {
+                result
+            } else if usage_kind == HasBeenUsed::COINDUCTIVE_CYCLE {
+                response_no_constraints(tcx, stack[stack_depth].input, Certainty::Yes)
+            } else if usage_kind == HasBeenUsed::INDUCTIVE_CYCLE {
+                response_no_constraints(tcx, stack[stack_depth].input, Certainty::overflow(false))
+            } else {
+                unreachable!(); // for now
+            };
+
+            actual == result
+        })
+    }
+}
+
+/// When rerunning a goal as we haven't yet reached a fixpoint,
+/// try to reuse as much from the previous iteration as possible to
+/// avoid exponential blowup.
+#[derive(Debug)]
+struct FixpointInstantiationCacheEntry<'tcx> {
+    expected_stack: IndexVec<StackDepth, CanonicalInput<'tcx>>,
+    heads: BTreeMap<StackDepth, HasBeenUsed>,
+    provisional_result: QueryResult<'tcx>,
+    detached_cache_entries: FxHashMap<CanonicalInput<'tcx>, ReuseableDetachedEntry<'tcx>>,
+}
+
+impl<'tcx> FixpointInstantiationCacheEntry<'tcx> {
+    /// When reusing a provisional result we have to make sure the
+    /// result is actually applicable for the given goal. We check this
+    /// by only adding a provisional result if the stack matches the
+    /// previous iteration.
     ///
-    /// An element is *deeper* in the stack if its index is *lower*.
-    stack: IndexVec<StackDepth, StackEntry<'tcx>>,
-    provisional_cache: FxHashMap<CanonicalInput<'tcx>, ProvisionalCacheEntry<'tcx>>,
+    /// This may still impact behavior if there is incompleteness,
+    /// but given that we only cache the root if cycles we can just
+    /// ignore this.
+    fn is_applicable(&self, stack: &IndexVec<StackDepth, StackEntry<'tcx>>) -> bool {
+        self.expected_stack.len() == stack.len()
+            && itertools::zip_eq(&self.expected_stack, stack.iter().map(|e| &e.input))
+                .all(|(l, r)| l == r)
+    }
+}
+
+// FIXME(@lcnr): We may want to move `StackEntry::heads` and
+// `StackEntry::has_been_used` into a separate list in `CycleData`.
+#[derive(Debug)]
+struct CycleData<'tcx> {
+    /// The lowest stack depth of all participants. The root is the only cycle
+    /// participant which will get moved to the global cache.
+    root: StackDepth,
+
+    /// The provisional results for all nested cycle participants we've already computed.
+    /// The next time we evaluate these nested goals we use that result in the first
+    /// iteration.
+    fixpoint_instantiation_cache:
+        FxHashMap<CanonicalInput<'tcx>, Vec<FixpointInstantiationCacheEntry<'tcx>>>,
+
     /// We put only the root goal of a coinductive cycle into the global cache.
     ///
     /// If we were to use that result when later trying to prove another cycle
@@ -115,13 +209,176 @@ pub(super) struct SearchGraph<'tcx> {
     cycle_participants: FxHashSet<CanonicalInput<'tcx>>,
 }
 
+impl<'tcx> CycleData<'tcx> {
+    fn new(root: StackDepth) -> CycleData<'tcx> {
+        CycleData {
+            root,
+            fixpoint_instantiation_cache: Default::default(),
+            cycle_participants: Default::default(),
+        }
+    }
+
+    /// When encountering a solver cycle, the result of the current goal
+    /// depends on goals lower on the stack.
+    ///
+    /// We have to therefore be careful when caching goals. Only the final result
+    /// of the cycle root, i.e. the lowest goal on the stack involved in this cycle,
+    /// is moved to the global cache while all others are stored in a provisional cache.
+    ///
+    /// We update both the head of this cycle to rerun its evaluation until
+    /// we reach a fixpoint and all other cycle participants to make sure that
+    /// their result does not get moved to the global cache.
+    //
+    // TODO
+    fn tag_cycle_participants(
+        &mut self,
+        stack: &mut IndexVec<StackDepth, StackEntry<'tcx>>,
+        heads: &BTreeMap<StackDepth, HasBeenUsed>,
+    ) {
+        let head = *heads.last_key_value().unwrap().0;
+        self.root = self.root.min(head);
+
+        for (&head, &usage_kind) in heads {
+            stack[head].has_been_used |= usage_kind;
+        }
+
+        for stack_entry in stack.iter_mut().skip(head.index() + 1) {
+            for (&head, &usage_kind) in heads {
+                debug_assert_ne!(usage_kind, HasBeenUsed::empty());
+                *stack_entry.heads.entry(head).or_default() |= usage_kind;
+            }
+
+            self.cycle_participants.insert(stack_entry.input);
+        }
+    }
+
+    /// Add the provisional result for a given goal to the storage.
+    fn add_fixpoint_instantiation_cache_entry(
+        &mut self,
+        stack: &IndexVec<StackDepth, StackEntry<'tcx>>,
+        entry: &StackEntry<'tcx>,
+        provisional_result: QueryResult<'tcx>,
+        detached_cache_entries: FxHashMap<CanonicalInput<'tcx>, ReuseableDetachedEntry<'tcx>>,
+    ) {
+        // Store the provisional result for this goal to enable
+        // us to reuse it in case of a fixpoint.
+        let cache_entry = FixpointInstantiationCacheEntry {
+            expected_stack: stack.iter().map(|e| e.input).collect(),
+            heads: entry.heads.clone(),
+            provisional_result,
+            detached_cache_entries,
+        };
+        let cache_entries = self.fixpoint_instantiation_cache.entry(entry.input).or_default();
+        if cfg!(debug_assertions) {
+            // We should only use this function after computing a goal, at
+            // the start of which we remove its previous fixpoint instantiation
+            // cache entry.
+            if let Some(r) = cache_entries.iter().find(|r| r.is_applicable(stack)) {
+                bug!("existing fixpoint instantiation cache entry: {r:?}");
+            }
+        }
+
+        cache_entries.push(cache_entry);
+    }
+
+    /// Tries to reuse results from the previous fixpoint iteration if they
+    /// are still applicable.
+    ///
+    /// We are able to use this to both initialize the provisional result
+    /// and add the results of nested goals to the provisional cache if they
+    /// only depend on stack entries whose provisional result has not changed.
+    fn try_apply_fixpoint_instantiation_cache_entry(
+        &mut self,
+        tcx: TyCtxt<'tcx>,
+        stack: &mut IndexVec<StackDepth, StackEntry<'tcx>>,
+        provisional_cache: &mut FxHashMap<CanonicalInput<'tcx>, ProvisionalCacheEntry<'tcx>>,
+        available_depth: Limit,
+        depth: StackDepth,
+        input: CanonicalInput<'tcx>,
+    ) -> bool {
+        let Some(entries) = self.fixpoint_instantiation_cache.get_mut(&input) else {
+            return false;
+        };
+        let Some(idx) = entries.iter().position(|r| r.is_applicable(stack)) else {
+            return false;
+        };
+
+        let FixpointInstantiationCacheEntry {
+            expected_stack: _,
+            heads,
+            provisional_result,
+            detached_cache_entries,
+        } = entries.remove(idx);
+
+        assert_ne!(*heads.last_key_value().unwrap().0, depth);
+        let stack_entry = StackEntry {
+            input,
+            available_depth,
+            reached_depth: depth,
+            heads: Default::default(),
+            encountered_overflow: false,
+            has_been_used: HasBeenUsed::empty(),
+            provisional_result: Some(provisional_result),
+        };
+        assert_eq!(stack.push(stack_entry), depth);
+
+        for (input, provisional_cache_entry) in detached_cache_entries {
+            if provisional_cache_entry.is_applicable(tcx, stack) {
+                let cache_entry = provisional_cache.entry(input).or_default();
+                let heads = provisional_cache_entry.heads;
+                for (&head, &usage_kind) in heads.iter() {
+                    assert_ne!(usage_kind, HasBeenUsed::empty());
+                    stack[head].has_been_used |= usage_kind;
+                }
+
+                if cfg!(debug_assertions) {
+                    let (&head, &usage_kind) = heads.last_key_value().unwrap();
+                    assert_eq!(head, depth);
+                    assert_ne!(usage_kind, HasBeenUsed::empty());
+                    assert!(stack[head].has_been_used.contains(usage_kind));
+                }
+
+                if provisional_cache_entry.is_coinductive {
+                    cache_entry.with_coinductive_stack =
+                        Some(DetachedEntry { heads, result: provisional_cache_entry.result });
+                } else {
+                    cache_entry.with_inductive_stack =
+                        Some(DetachedEntry { heads, result: provisional_cache_entry.result });
+                }
+            }
+        }
+
+        true
+    }
+}
+
+#[derive(Debug)]
+pub(super) struct SearchGraph<'tcx> {
+    mode: SolverMode,
+    /// The stack of goals currently being computed.
+    ///
+    /// An element is *deeper* in the stack if its index is *lower*.
+    stack: IndexVec<StackDepth, StackEntry<'tcx>>,
+
+    /// In case we're currently in a solver cycle, we track a lot of
+    /// additional data.
+    cycle_data: Option<CycleData<'tcx>>,
+
+    /// A cache for the result of nested goals which depend on goals currently on the
+    /// stack. We remove cached results once we pop any goal used while computing it.
+    ///
+    /// This is not part of `cycle_data` as it contains all stack entries even while we're
+    /// not yet in a cycle.
+    provisional_cache: FxHashMap<CanonicalInput<'tcx>, ProvisionalCacheEntry<'tcx>>,
+}
+
 impl<'tcx> SearchGraph<'tcx> {
     pub(super) fn new(mode: SolverMode) -> SearchGraph<'tcx> {
         Self {
             mode,
             stack: Default::default(),
+            cycle_data: None,
             provisional_cache: Default::default(),
-            cycle_participants: Default::default(),
         }
     }
 
@@ -155,13 +412,8 @@ impl<'tcx> SearchGraph<'tcx> {
     }
 
     pub(super) fn is_empty(&self) -> bool {
-        if self.stack.is_empty() {
-            debug_assert!(self.provisional_cache.is_empty());
-            debug_assert!(self.cycle_participants.is_empty());
-            true
-        } else {
-            false
-        }
+        self.check_invariants();
+        if self.stack.is_empty() { true } else { false }
     }
 
     /// Returns the remaining depth allowed for nested goals.
@@ -199,38 +451,26 @@ impl<'tcx> SearchGraph<'tcx> {
             .all(|entry| entry.input.value.goal.predicate.is_coinductive(tcx))
     }
 
-    // When encountering a solver cycle, the result of the current goal
-    // depends on goals lower on the stack.
-    //
-    // We have to therefore be careful when caching goals. Only the final result
-    // of the cycle root, i.e. the lowest goal on the stack involved in this cycle,
-    // is moved to the global cache while all others are stored in a provisional cache.
-    //
-    // We update both the head of this cycle to rerun its evaluation until
-    // we reach a fixpoint and all other cycle participants to make sure that
-    // their result does not get moved to the global cache.
-    fn tag_cycle_participants(
-        stack: &mut IndexVec<StackDepth, StackEntry<'tcx>>,
-        cycle_participants: &mut FxHashSet<CanonicalInput<'tcx>>,
-        usage_kind: HasBeenUsed,
-        head: StackDepth,
-    ) {
-        stack[head].has_been_used |= usage_kind;
-        debug_assert!(!stack[head].has_been_used.is_empty());
-        for entry in &mut stack.raw[head.index() + 1..] {
-            entry.non_root_cycle_participant = entry.non_root_cycle_participant.max(Some(head));
-            cycle_participants.insert(entry.input);
-        }
-    }
-
     fn clear_dependent_provisional_results(
         provisional_cache: &mut FxHashMap<CanonicalInput<'tcx>, ProvisionalCacheEntry<'tcx>>,
         head: StackDepth,
+        mut f: impl FnMut(
+            CanonicalInput<'tcx>,
+            Option<DetachedEntry<'tcx>>,
+            Option<DetachedEntry<'tcx>>,
+        ),
     ) {
+        let condition = |p: &mut DetachedEntry<'tcx>| match p.head().cmp(&head) {
+            Ordering::Less => false,
+            Ordering::Equal => true,
+            Ordering::Greater => bug!("provisional entry for popped value"),
+        };
+
         #[allow(rustc::potential_query_instability)]
-        provisional_cache.retain(|_, entry| {
-            entry.with_coinductive_stack.take_if(|p| p.head == head);
-            entry.with_inductive_stack.take_if(|p| p.head == head);
+        provisional_cache.retain(|input, entry| {
+            let coinductive = entry.with_coinductive_stack.take_if(condition);
+            let inductive = entry.with_inductive_stack.take_if(condition);
+            f(*input, coinductive, inductive);
             !entry.is_empty()
         });
     }
@@ -246,6 +486,8 @@ impl<'tcx> SearchGraph<'tcx> {
         inspect: &mut ProofTreeBuilder<'tcx>,
         mut prove_goal: impl FnMut(&mut Self, &mut ProofTreeBuilder<'tcx>) -> QueryResult<'tcx>,
     ) -> QueryResult<'tcx> {
+        self.check_invariants();
+
         // Check for overflow.
         let Some(available_depth) = Self::allowed_depth_for_nested(tcx, &self.stack) else {
             if let Some(last) = self.stack.raw.last_mut() {
@@ -253,7 +495,7 @@ impl<'tcx> SearchGraph<'tcx> {
             }
 
             inspect.goal_evaluation_kind(inspect::WipCanonicalGoalEvaluationKind::Overflow);
-            return Self::response_no_constraints(tcx, input, Certainty::overflow(true));
+            return response_no_constraints(tcx, input, Certainty::overflow(true));
         };
 
         if let Some(result) = self.lookup_global_cache(tcx, input, available_depth, inspect) {
@@ -268,12 +510,12 @@ impl<'tcx> SearchGraph<'tcx> {
         if let Some(entry) = cache_entry
             .with_coinductive_stack
             .as_ref()
-            .filter(|p| Self::stack_coinductive_from(tcx, &self.stack, p.head))
+            .filter(|p| Self::stack_coinductive_from(tcx, &self.stack, p.head()))
             .or_else(|| {
                 cache_entry
                     .with_inductive_stack
                     .as_ref()
-                    .filter(|p| !Self::stack_coinductive_from(tcx, &self.stack, p.head))
+                    .filter(|p| !Self::stack_coinductive_from(tcx, &self.stack, p.head()))
             })
         {
             // We have a nested goal which is already in the provisional cache, use
@@ -281,12 +523,7 @@ impl<'tcx> SearchGraph<'tcx> {
             // already set correctly while computing the cache entry.
             inspect
                 .goal_evaluation_kind(inspect::WipCanonicalGoalEvaluationKind::ProvisionalCacheHit);
-            Self::tag_cycle_participants(
-                &mut self.stack,
-                &mut self.cycle_participants,
-                HasBeenUsed::empty(),
-                entry.head,
-            );
+            self.cycle_data.as_mut().unwrap().tag_cycle_participants(&mut self.stack, &entry.heads);
             return entry.result;
         } else if let Some(stack_depth) = cache_entry.stack_depth {
             debug!("encountered cycle with depth {stack_depth:?}");
@@ -303,36 +540,49 @@ impl<'tcx> SearchGraph<'tcx> {
             } else {
                 HasBeenUsed::INDUCTIVE_CYCLE
             };
-            Self::tag_cycle_participants(
-                &mut self.stack,
-                &mut self.cycle_participants,
-                usage_kind,
-                stack_depth,
-            );
+
+            let cycle_data = self.cycle_data.get_or_insert_with(|| CycleData::new(stack_depth));
+            let heads = [(stack_depth, usage_kind)].into_iter().collect(); // TODO
+            cycle_data.tag_cycle_participants(&mut self.stack, &heads);
 
             // Return the provisional result or, if we're in the first iteration,
             // start with no constraints.
             return if let Some(result) = self.stack[stack_depth].provisional_result {
                 result
             } else if is_coinductive_cycle {
-                Self::response_no_constraints(tcx, input, Certainty::Yes)
+                response_no_constraints(tcx, input, Certainty::Yes)
             } else {
-                Self::response_no_constraints(tcx, input, Certainty::overflow(false))
+                response_no_constraints(tcx, input, Certainty::overflow(false))
             };
         } else {
             // No entry, we push this goal on the stack and try to prove it.
             let depth = self.stack.next_index();
-            let entry = StackEntry {
-                input,
-                available_depth,
-                reached_depth: depth,
-                non_root_cycle_participant: None,
-                encountered_overflow: false,
-                has_been_used: HasBeenUsed::empty(),
-                provisional_result: None,
-            };
-            assert_eq!(self.stack.push(entry), depth);
             cache_entry.stack_depth = Some(depth);
+            let from_cache = if let Some(cycle_data) = &mut self.cycle_data {
+                cycle_data.try_apply_fixpoint_instantiation_cache_entry(
+                    tcx,
+                    &mut self.stack,
+                    &mut self.provisional_cache,
+                    available_depth,
+                    depth,
+                    input,
+                )
+            } else {
+                false
+            };
+
+            if !from_cache {
+                let stack_entry = StackEntry {
+                    input,
+                    available_depth,
+                    reached_depth: depth,
+                    heads: Default::default(),
+                    encountered_overflow: false,
+                    has_been_used: HasBeenUsed::empty(),
+                    provisional_result: None,
+                };
+                assert_eq!(self.stack.push(stack_entry), depth);
+            }
         }
 
         // This is for global caching, so we properly track query dependencies.
@@ -353,7 +603,7 @@ impl<'tcx> SearchGraph<'tcx> {
                 debug!("canonical cycle overflow");
                 let current_entry = self.pop_stack();
                 debug_assert!(current_entry.has_been_used.is_empty());
-                let result = Self::response_no_constraints(tcx, input, Certainty::overflow(false));
+                let result = response_no_constraints(tcx, input, Certainty::overflow(false));
                 (current_entry, result)
             });
 
@@ -362,20 +612,10 @@ impl<'tcx> SearchGraph<'tcx> {
         // We're now done with this goal. In case this goal is involved in a larger cycle
         // do not remove it from the provisional cache and update its provisional result.
         // We only add the root of cycles to the global cache.
-        if let Some(head) = final_entry.non_root_cycle_participant {
-            let coinductive_stack = Self::stack_coinductive_from(tcx, &self.stack, head);
-
-            let entry = self.provisional_cache.get_mut(&input).unwrap();
-            entry.stack_depth = None;
-            if coinductive_stack {
-                entry.with_coinductive_stack = Some(DetachedEntry { head, result });
-            } else {
-                entry.with_inductive_stack = Some(DetachedEntry { head, result });
-            }
-        } else {
-            self.provisional_cache.remove(&input);
-            let reached_depth = final_entry.reached_depth.as_usize() - self.stack.len();
-            let cycle_participants = mem::take(&mut self.cycle_participants);
+        if final_entry.heads.is_empty() {
+            let depth = self.stack.next_index();
+            let provisional_cache_entry = self.provisional_cache.remove(&input);
+            assert_eq!(provisional_cache_entry.unwrap().stack_depth, Some(depth));
             // When encountering a cycle, both inductive and coinductive, we only
             // move the root into the global cache. We also store all other cycle
             // participants involved.
@@ -384,6 +624,14 @@ impl<'tcx> SearchGraph<'tcx> {
             // participant is on the stack. This is necessary to prevent unstable
             // results. See the comment of `SearchGraph::cycle_participants` for
             // more details.
+            let cycle_participants =
+                if let Some(cycle_data) = self.cycle_data.take_if(|data| data.root == depth) {
+                    cycle_data.cycle_participants
+                } else {
+                    Default::default()
+                };
+
+            let reached_depth = final_entry.reached_depth.as_usize() - self.stack.len();
             self.global_cache(tcx).insert(
                 tcx,
                 input,
@@ -394,8 +642,23 @@ impl<'tcx> SearchGraph<'tcx> {
                 dep_node,
                 result,
             )
+        } else {
+            let heads = final_entry.heads;
+            let head = *heads.last_key_value().unwrap().0;
+            debug_assert!(head < self.stack.next_index());
+            debug_assert_ne!(self.stack[head].has_been_used, HasBeenUsed::empty());
+
+            let coinductive_stack = Self::stack_coinductive_from(tcx, &self.stack, head);
+            let entry = self.provisional_cache.get_mut(&input).unwrap();
+            entry.stack_depth = None;
+            if coinductive_stack {
+                entry.with_coinductive_stack = Some(DetachedEntry { heads, result });
+            } else {
+                entry.with_inductive_stack = Some(DetachedEntry { heads, result });
+            }
         }
 
+        self.check_invariants();
         result
     }
 
@@ -460,12 +723,22 @@ impl<'tcx> SearchGraph<'tcx> {
     where
         F: FnMut(&mut Self, &mut ProofTreeBuilder<'tcx>) -> QueryResult<'tcx>,
     {
+        self.check_invariants();
         let result = prove_goal(self, inspect);
         let stack_entry = self.pop_stack();
         debug_assert_eq!(stack_entry.input, input);
 
         // If the current goal is not the root of a cycle, we are done.
         if stack_entry.has_been_used.is_empty() {
+            Self::clear_dependent_provisional_results(
+                &mut self.provisional_cache,
+                self.stack.next_index(),
+                |_, co, ind| {
+                    if co.is_some() || ind.is_some() {
+                        bug!()
+                    }
+                },
+            );
             return StepResult::Done(stack_entry, result);
         }
 
@@ -476,13 +749,6 @@ impl<'tcx> SearchGraph<'tcx> {
         // See tests/ui/traits/next-solver/cycles/fixpoint-rerun-all-cycle-heads.rs
         // for an example.
 
-        // Start by clearing all provisional cache entries which depend on this
-        // the current goal.
-        Self::clear_dependent_provisional_results(
-            &mut self.provisional_cache,
-            self.stack.next_index(),
-        );
-
         // Check whether we reached a fixpoint, either because the final result
         // is equal to the provisional result of the previous iteration, or because
         // this was only the root of either coinductive or inductive cycles, and the
@@ -490,32 +756,152 @@ impl<'tcx> SearchGraph<'tcx> {
         let reached_fixpoint = if let Some(r) = stack_entry.provisional_result {
             r == result
         } else if stack_entry.has_been_used == HasBeenUsed::COINDUCTIVE_CYCLE {
-            Self::response_no_constraints(tcx, input, Certainty::Yes) == result
+            response_no_constraints(tcx, input, Certainty::Yes) == result
         } else if stack_entry.has_been_used == HasBeenUsed::INDUCTIVE_CYCLE {
-            Self::response_no_constraints(tcx, input, Certainty::overflow(false)) == result
+            response_no_constraints(tcx, input, Certainty::overflow(false)) == result
         } else {
             false
         };
 
-        // If we did not reach a fixpoint, update the provisional result and reevaluate.
         if reached_fixpoint {
+            // If we're done, move this goal and all detached cache entries which depend on
+            // it into the fixpoint instantiation cache.
+            let cycle_data = self.cycle_data.as_mut().unwrap();
+            let mut detached_cache_entries = FxHashMap::default();
+            Self::clear_dependent_provisional_results(
+                &mut self.provisional_cache,
+                self.stack.next_index(),
+                |input, coinductive, inductive| {
+                    let (entry, is_coinductive) = match (coinductive, inductive) {
+                        (Some(entry), None) => (entry, true),
+                        (None, Some(entry)) => (entry, false),
+                        _ => return,
+                    };
+
+                    let mut provisional_results = Vec::new();
+                    for (&head, &usage_kind) in &entry.heads {
+                        // We compute this after already popping the current goal from the stack.
+                        let elem = &self.stack.get(head).unwrap_or(&stack_entry);
+                        provisional_results.push(if let Some(result) = elem.provisional_result {
+                            result
+                        } else if usage_kind == HasBeenUsed::COINDUCTIVE_CYCLE {
+                            response_no_constraints(tcx, elem.input, Certainty::Yes)
+                        } else if usage_kind == HasBeenUsed::COINDUCTIVE_CYCLE {
+                            response_no_constraints(tcx, elem.input, Certainty::overflow(false))
+                        } else {
+                            return; // just bail and ignore that result in this case for now
+                        });
+                    }
+                    let entry = ReuseableDetachedEntry {
+                        heads: entry.heads,
+                        provisional_results,
+                        is_coinductive,
+                        result: entry.result,
+                    };
+                    assert!(detached_cache_entries.insert(input, entry).is_none());
+                },
+            );
+            cycle_data.add_fixpoint_instantiation_cache_entry(
+                &self.stack,
+                &stack_entry,
+                result,
+                detached_cache_entries,
+            );
             StepResult::Done(stack_entry, result)
         } else {
+            // If not, recompute after throwing out all provisional cache
+            // entries which depend on the current goal. We then update
+            // the provisional result and recompute.
+            Self::clear_dependent_provisional_results(
+                &mut self.provisional_cache,
+                self.stack.next_index(),
+                |_, _, _| {},
+            );
             let depth = self.stack.push(StackEntry {
                 has_been_used: HasBeenUsed::empty(),
                 provisional_result: Some(result),
+                heads: Default::default(),
                 ..stack_entry
             });
             debug_assert_eq!(self.provisional_cache[&input].stack_depth, Some(depth));
             StepResult::HasChanged
         }
     }
+}
 
-    fn response_no_constraints(
-        tcx: TyCtxt<'tcx>,
-        goal: CanonicalInput<'tcx>,
-        certainty: Certainty,
-    ) -> QueryResult<'tcx> {
-        Ok(super::response_no_constraints_raw(tcx, goal.max_universe, goal.variables, certainty))
+fn response_no_constraints<'tcx>(
+    tcx: TyCtxt<'tcx>,
+    goal: CanonicalInput<'tcx>,
+    certainty: Certainty,
+) -> QueryResult<'tcx> {
+    Ok(super::response_no_constraints_raw(tcx, goal.max_universe, goal.variables, certainty))
+}
+
+impl<'tcx> SearchGraph<'tcx> {
+    fn check_invariants(&self) {
+        if !cfg!(debug_assertions) {
+            return;
+        }
+
+        let SearchGraph { mode: _, stack, cycle_data, provisional_cache } = self;
+        if stack.is_empty() {
+            debug_assert!(cycle_data.is_none());
+            debug_assert!(provisional_cache.is_empty());
+        }
+
+        for (depth, entry) in stack.iter_enumerated() {
+            let StackEntry {
+                input: _,
+                available_depth: _,
+                reached_depth: _,
+                heads,
+                encountered_overflow: _,
+                has_been_used: _,
+                provisional_result,
+            } = entry;
+            let cache_entry = provisional_cache.get(&entry.input).unwrap();
+            assert_eq!(cache_entry.stack_depth, Some(depth));
+            for (&head, &usage_kind) in heads {
+                assert!(head < depth);
+                assert!(cycle_data.is_some());
+                assert_ne!(usage_kind, HasBeenUsed::empty());
+                stack[head].has_been_used.contains(usage_kind);
+            }
+
+            if provisional_result.is_some() {
+                assert!(cycle_data.is_some());
+            }
+        }
+
+        for (&input, entry) in &self.provisional_cache {
+            let ProvisionalCacheEntry { stack_depth, with_coinductive_stack, with_inductive_stack } =
+                entry;
+            assert!(
+                stack_depth.is_some()
+                    || with_coinductive_stack.is_some()
+                    || with_inductive_stack.is_some()
+            );
+
+            if let &Some(stack_depth) = stack_depth {
+                assert_eq!(stack[stack_depth].input, input);
+            }
+
+            let check_detached = |detached_entry: &DetachedEntry<'tcx>| {
+                assert!(cycle_data.is_some());
+                let DetachedEntry { heads, result: _ } = detached_entry;
+                for (&head, &usage_kind) in heads {
+                    assert_ne!(usage_kind, HasBeenUsed::empty());
+                    stack[head].has_been_used.contains(usage_kind);
+                }
+            };
+
+            if let Some(with_coinductive_stack) = with_coinductive_stack {
+                check_detached(with_coinductive_stack);
+            }
+
+            if let Some(with_inductive_stack) = with_inductive_stack {
+                check_detached(with_inductive_stack);
+            }
+        }
     }
 }

From 7afe01744699e58cdc566dbf8fc424752bb11f74 Mon Sep 17 00:00:00 2001
From: lcnr <rust@lcnr.de>
Date: Wed, 15 May 2024 21:16:06 +0000
Subject: [PATCH 4/6] change test to pass

---
 .../cycles/coinduction/incompleteness-unstable-result.rs   | 7 +++++++
 1 file changed, 7 insertions(+)

diff --git a/tests/ui/traits/next-solver/cycles/coinduction/incompleteness-unstable-result.rs b/tests/ui/traits/next-solver/cycles/coinduction/incompleteness-unstable-result.rs
index 7eea81ce03c66..430e10f2e38ea 100644
--- a/tests/ui/traits/next-solver/cycles/coinduction/incompleteness-unstable-result.rs
+++ b/tests/ui/traits/next-solver/cycles/coinduction/incompleteness-unstable-result.rs
@@ -1,6 +1,12 @@
+//@ revisions: with without
 //@ compile-flags: -Znext-solver
+//@ check-pass
 #![feature(rustc_attrs)]
 
+// While this originally tested for unstable global caching, this is no
+// longer the case due to changes in the provisional cache. This is still
+// useful to catch provisional cache bugs however.
+
 // This test is incredibly subtle. At its core the goal is to get a coinductive cycle,
 // which, depending on its root goal, either holds or errors. We achieve this by getting
 // incomplete inference via a `ParamEnv` candidate in the `A<T>` impl and required
@@ -56,6 +62,7 @@ where
     X: IncompleteGuidance<u32, i8>,
     X: IncompleteGuidance<u32, i16>,
 {
+    #[cfg(with)]
     impls_trait::<B<X>, _, _, _>(); // entering the cycle from `B` works
 
     // entering the cycle from `A` fails, but would work if we were to use the cache

From 6a10b4a0f28b98fb590322bb34cee10387c6bcee Mon Sep 17 00:00:00 2001
From: lcnr <rust@lcnr.de>
Date: Sat, 18 May 2024 21:10:13 +0000
Subject: [PATCH 5/6] check additional invariants, add test

---
 .../src/solve/search_graph.rs                 | 31 ++++++++++++++++---
 .../incompleteness-unstable-result.rs         |  1 -
 2 files changed, 27 insertions(+), 5 deletions(-)

diff --git a/compiler/rustc_trait_selection/src/solve/search_graph.rs b/compiler/rustc_trait_selection/src/solve/search_graph.rs
index 8d111f00a5e0d..388067e0f1b65 100644
--- a/compiler/rustc_trait_selection/src/solve/search_graph.rs
+++ b/compiler/rustc_trait_selection/src/solve/search_graph.rs
@@ -733,10 +733,9 @@ impl<'tcx> SearchGraph<'tcx> {
             Self::clear_dependent_provisional_results(
                 &mut self.provisional_cache,
                 self.stack.next_index(),
-                |_, co, ind| {
-                    if co.is_some() || ind.is_some() {
-                        bug!()
-                    }
+                |_, co, ind| match (co, ind) {
+                    (Some(_), _) | (_, Some(_)) => bug!(),
+                    _ => {}
                 },
             );
             return StepResult::Done(stack_entry, result);
@@ -873,6 +872,30 @@ impl<'tcx> SearchGraph<'tcx> {
             }
         }
 
+        if let Some(CycleData { root, ref fixpoint_instantiation_cache, ref cycle_participants }) =
+            *cycle_data
+        {
+            for (depth, stack_entry) in stack.iter_enumerated() {
+                let is_cycle_participant = cycle_participants.contains(&stack_entry.input);
+                assert_eq!(depth >= root, is_cycle_participant);
+            }
+
+            for (input, entries) in fixpoint_instantiation_cache {
+                for entry in entries {
+                    let FixpointInstantiationCacheEntry {
+                        expected_stack,
+                        heads,
+                        provisional_result: _,
+                        detached_cache_entries: _,
+                    } = entry;
+                    assert!(!heads.is_empty());
+                    for i in (0..root.as_usize()).map(StackDepth::from_usize) {
+                        assert_eq!(stack[i].input, expected_stack[i]);
+                    }
+                }
+            }
+        }
+
         for (&input, entry) in &self.provisional_cache {
             let ProvisionalCacheEntry { stack_depth, with_coinductive_stack, with_inductive_stack } =
                 entry;
diff --git a/tests/ui/traits/next-solver/cycles/coinduction/incompleteness-unstable-result.rs b/tests/ui/traits/next-solver/cycles/coinduction/incompleteness-unstable-result.rs
index 430e10f2e38ea..aaf71d3ddadd1 100644
--- a/tests/ui/traits/next-solver/cycles/coinduction/incompleteness-unstable-result.rs
+++ b/tests/ui/traits/next-solver/cycles/coinduction/incompleteness-unstable-result.rs
@@ -68,7 +68,6 @@ where
     // entering the cycle from `A` fails, but would work if we were to use the cache
     // result of `B<X>`.
     impls_trait::<A<X>, _, _, _>();
-    //~^ ERROR the trait bound `A<X>: Trait<_, _, _>` is not satisfied
 }
 
 fn main() {

From ad07cca11bbbb4dab655cea835cf2b8f61f46fb7 Mon Sep 17 00:00:00 2001
From: lcnr <rust@lcnr.de>
Date: Sat, 18 May 2024 21:25:42 +0000
Subject: [PATCH 6/6] fix assert, remove TODO

---
 compiler/rustc_trait_selection/src/solve/search_graph.rs | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/compiler/rustc_trait_selection/src/solve/search_graph.rs b/compiler/rustc_trait_selection/src/solve/search_graph.rs
index 388067e0f1b65..7fcfb71e890b6 100644
--- a/compiler/rustc_trait_selection/src/solve/search_graph.rs
+++ b/compiler/rustc_trait_selection/src/solve/search_graph.rs
@@ -542,7 +542,7 @@ impl<'tcx> SearchGraph<'tcx> {
             };
 
             let cycle_data = self.cycle_data.get_or_insert_with(|| CycleData::new(stack_depth));
-            let heads = [(stack_depth, usage_kind)].into_iter().collect(); // TODO
+            let heads = [(stack_depth, usage_kind)].into_iter().collect();
             cycle_data.tag_cycle_participants(&mut self.stack, &heads);
 
             // Return the provisional result or, if we're in the first iteration,