From 209cb2aa2e0d67bf89a130b070f7116178e9ddb4 Mon Sep 17 00:00:00 2001
From: Valentine Wallace <vwallace@protonmail.com>
Date: Thu, 3 Apr 2025 17:33:08 -0400
Subject: [PATCH 1/2] Fix spurious MPP pathfinding failure

This bug was recently surfaced to us by a user who wrote a test where the
sender is attempting to route an MPP payment split across the two channels that
it has with its LSP, where the LSP has a single large channel to the recipient.

Previously this led to a pathfinding failure because our router was not sending
the maximum possible value over the first MPP path it found due to
overestimating the fees needed to cover the following hops. Because the path
that had just been found was not maxxed out, our router assumed that we had
already found enough paths to cover the full payment amount and that we were
finding additional paths for the purpose of redundant path selection. This
caused the router to mark the recipient's only channel as exhausted, with the
intention of choosing more unique paths in future iterations. In reality, this
ended up with the recipient's only channel being disabled and subsequently
failing to find a route entirely.

Update the router to fully utilize the capacity of any paths it finds in this
situation, preventing the "redundant path selection" behavior from kicking in.
---
 lightning/src/blinded_path/payment.rs | 52 ++++++++++++++-------
 lightning/src/ln/payment_tests.rs     | 51 ++++++++++++++++++++
 lightning/src/routing/router.rs       | 67 +++++++++++++++++++++------
 3 files changed, 138 insertions(+), 32 deletions(-)
diff --git a/lightning/src/blinded_path/payment.rs b/lightning/src/blinded_path/payment.rs
index b0639dd57e5..f015886933a 100644
--- a/lightning/src/blinded_path/payment.rs
+++ b/lightning/src/blinded_path/payment.rs
@@ -31,6 +31,7 @@ use crate::routing::gossip::{NodeId, ReadOnlyNetworkGraph};
 use crate::sign::{EntropySource, NodeSigner, Recipient};
 use crate::types::features::BlindedHopFeatures;
 use crate::types::payment::PaymentSecret;
+use crate::types::routing::RoutingFees;
 use crate::util::ser::{
 	FixedLengthReader, HighZeroBytesDroppedBigSize, LengthReadableArgs, Readable, WithoutLength,
 	Writeable, Writer,
@@ -692,22 +693,17 @@ pub(crate) fn amt_to_forward_msat(
 	u64::try_from(amt_to_forward).ok()
 }
 
-pub(super) fn compute_payinfo(
-	intermediate_nodes: &[PaymentForwardNode], payee_tlvs: &UnauthenticatedReceiveTlvs,
-	payee_htlc_maximum_msat: u64, min_final_cltv_expiry_delta: u16,
-) -> Result<BlindedPayInfo, ()> {
+// Returns (aggregated_base_fee, aggregated_proportional_fee)
+pub(crate) fn compute_aggregated_base_prop_fee<I>(hops_fees: I) -> Result<(u64, u64), ()>
+where
+	I: DoubleEndedIterator<Item = RoutingFees>,
+{
 	let mut curr_base_fee: u64 = 0;
 	let mut curr_prop_mil: u64 = 0;
-	let mut cltv_expiry_delta: u16 = min_final_cltv_expiry_delta;
-	for tlvs in intermediate_nodes.iter().rev().map(|n| &n.tlvs) {
-		// In the future, we'll want to take the intersection of all supported features for the
-		// `BlindedPayInfo`, but there are no features in that context right now.
-		if tlvs.features.requires_unknown_bits_from(&BlindedHopFeatures::empty()) {
-			return Err(());
-		}
+	for fees in hops_fees.rev() {
+		let next_base_fee = fees.base_msat as u64;
+		let next_prop_mil = fees.proportional_millionths as u64;
 
-		let next_base_fee = tlvs.payment_relay.fee_base_msat as u64;
-		let next_prop_mil = tlvs.payment_relay.fee_proportional_millionths as u64;
 		// Use integer arithmetic to compute `ceil(a/b)` as `(a+b-1)/b`
 		// ((curr_base_fee * (1_000_000 + next_prop_mil)) / 1_000_000) + next_base_fee
 		curr_base_fee = curr_base_fee
@@ -724,14 +720,34 @@ pub(super) fn compute_payinfo(
 			.map(|f| f / 1_000_000)
 			.and_then(|f| f.checked_sub(1_000_000))
 			.ok_or(())?;
-
-		cltv_expiry_delta =
-			cltv_expiry_delta.checked_add(tlvs.payment_relay.cltv_expiry_delta).ok_or(())?;
 	}
 
+	Ok((curr_base_fee, curr_prop_mil))
+}
+
+pub(super) fn compute_payinfo(
+	intermediate_nodes: &[PaymentForwardNode], payee_tlvs: &UnauthenticatedReceiveTlvs,
+	payee_htlc_maximum_msat: u64, min_final_cltv_expiry_delta: u16,
+) -> Result<BlindedPayInfo, ()> {
+	let (aggregated_base_fee, aggregated_prop_fee) =
+		compute_aggregated_base_prop_fee(intermediate_nodes.iter().map(|node| RoutingFees {
+			base_msat: node.tlvs.payment_relay.fee_base_msat,
+			proportional_millionths: node.tlvs.payment_relay.fee_proportional_millionths,
+		}))?;
+
 	let mut htlc_minimum_msat: u64 = 1;
 	let mut htlc_maximum_msat: u64 = 21_000_000 * 100_000_000 * 1_000; // Total bitcoin supply
+	let mut cltv_expiry_delta: u16 = min_final_cltv_expiry_delta;
 	for node in intermediate_nodes.iter() {
+		// In the future, we'll want to take the intersection of all supported features for the
+		// `BlindedPayInfo`, but there are no features in that context right now.
+		if node.tlvs.features.requires_unknown_bits_from(&BlindedHopFeatures::empty()) {
+			return Err(());
+		}
+
+		cltv_expiry_delta =
+			cltv_expiry_delta.checked_add(node.tlvs.payment_relay.cltv_expiry_delta).ok_or(())?;
+
 		// The min htlc for an intermediate node is that node's min minus the fees charged by all of the
 		// following hops for forwarding that min, since that fee amount will automatically be included
 		// in the amount that this node receives and contribute towards reaching its min.
@@ -754,8 +770,8 @@ pub(super) fn compute_payinfo(
 		return Err(());
 	}
 	Ok(BlindedPayInfo {
-		fee_base_msat: u32::try_from(curr_base_fee).map_err(|_| ())?,
-		fee_proportional_millionths: u32::try_from(curr_prop_mil).map_err(|_| ())?,
+		fee_base_msat: u32::try_from(aggregated_base_fee).map_err(|_| ())?,
+		fee_proportional_millionths: u32::try_from(aggregated_prop_fee).map_err(|_| ())?,
 		cltv_expiry_delta,
 		htlc_minimum_msat,
 		htlc_maximum_msat,
diff --git a/lightning/src/ln/payment_tests.rs b/lightning/src/ln/payment_tests.rs
index 84a90c50ecd..12e9fdb4afb 100644
--- a/lightning/src/ln/payment_tests.rs
+++ b/lightning/src/ln/payment_tests.rs
@@ -4515,3 +4515,54 @@ fn pay_route_without_params() {
 		ClaimAlongRouteArgs::new(&nodes[0], &[&[&nodes[1]]], payment_preimage)
 	);
 }
+
+#[test]
+fn max_out_mpp_path() {
+	// In this setup, the sender is attempting to route an MPP payment split across the two channels
+	// that it has with its LSP, where the LSP has a single large channel to the recipient.
+	//
+	// Previously a user ran into a pathfinding failure here because our router was not sending the
+	// maximum possible value over the first MPP path it found due to overestimating the fees needed
+	// to cover the following hops. Because the path that had just been found was not maxxed out, our
+	// router assumed that we had already found enough paths to cover the full payment amount and that
+	// we were finding additional paths for the purpose of redundant path selection. This caused the
+	// router to mark the recipient's only channel as exhausted, with the intention of choosing more
+	// unique paths in future iterations. In reality, this ended up with the recipient's only channel
+	// being disabled and subsequently failing to find a route entirely.
+	//
+	// The router has since been updated to fully utilize the capacity of any paths it finds in this
+	// situation, preventing the "redundant path selection" behavior from kicking in.
+
+	let mut user_cfg = test_default_channel_config();
+	user_cfg.channel_config.forwarding_fee_base_msat = 0;
+	user_cfg.channel_handshake_config.max_inbound_htlc_value_in_flight_percent_of_channel = 100;
+	let mut lsp_cfg = test_default_channel_config();
+	lsp_cfg.channel_config.forwarding_fee_base_msat = 0;
+	lsp_cfg.channel_config.forwarding_fee_proportional_millionths = 3000;
+	lsp_cfg.channel_handshake_config.max_inbound_htlc_value_in_flight_percent_of_channel = 100;
+
+	let chanmon_cfgs = create_chanmon_cfgs(3);
+	let node_cfgs = create_node_cfgs(3, &chanmon_cfgs);
+	let node_chanmgrs = create_node_chanmgrs(
+		3, &node_cfgs, &[Some(user_cfg.clone()), Some(lsp_cfg.clone()), Some(user_cfg.clone())]
+	);
+	let nodes = create_network(3, &node_cfgs, &node_chanmgrs);
+
+	create_unannounced_chan_between_nodes_with_value(&nodes, 0, 1, 200_000, 0);
+	create_unannounced_chan_between_nodes_with_value(&nodes, 0, 1, 300_000, 0);
+	create_unannounced_chan_between_nodes_with_value(&nodes, 1, 2, 600_000, 0);
+
+	let amt_msat = 350_000_000;
+	let invoice_params = crate::ln::channelmanager::Bolt11InvoiceParameters {
+		amount_msats: Some(amt_msat),
+		..Default::default()
+	};
+	let invoice = nodes[2].node.create_bolt11_invoice(invoice_params).unwrap();
+	let route_params_cfg = crate::routing::router::RouteParametersConfig::default();
+
+	nodes[0].node.pay_for_bolt11_invoice(&invoice, PaymentId([42; 32]), None, route_params_cfg, Retry::Attempts(0)).unwrap();
+
+	assert!(nodes[0].node.list_recent_payments().len() == 1);
+	check_added_monitors(&nodes[0], 2); // one monitor update per MPP part
+	nodes[0].node.get_and_clear_pending_msg_events();
+}
diff --git a/lightning/src/routing/router.rs b/lightning/src/routing/router.rs
index 14d06355bc0..bc4210b5c59 100644
--- a/lightning/src/routing/router.rs
+++ b/lightning/src/routing/router.rs
@@ -2043,9 +2043,10 @@ impl<'a> PaymentPath<'a> {
 	// that it the value being transferred has decreased while we were doing path finding, leading
 	// to the fees being paid not lining up with the actual limits.
 	//
-	// Note that this function is not aware of the available_liquidity limit, and thus does not
-	// support increasing the value being transferred beyond what was selected during the initial
-	// routing passes.
+	// This function may also be used to increase the value being transferred in the case that
+	// overestimating later hops' fees caused us to underutilize earlier hops' capacity.
+	//
+	// Note that this function is not aware of the available_liquidity limit of any hops.
 	//
 	// Returns the amount that this path contributes to the total payment value, which may be greater
 	// than `value_msat` if we had to overpay to meet the final node's `htlc_minimum_msat`.
@@ -2110,15 +2111,56 @@ impl<'a> PaymentPath<'a> {
 					cur_hop.hop_use_fee_msat = new_fee;
 					total_fee_paid_msat += new_fee;
 				} else {
-					// It should not be possible because this function is called only to reduce the
-					// value. In that case, compute_fee was already called with the same fees for
-					// larger amount and there was no overflow.
+					// It should not be possible because this function is only called either to reduce the
+					// value or with a larger amount that was already checked for overflow in
+					// `compute_max_final_value_contribution`. In the former case, compute_fee was already
+					// called with the same fees for larger amount and there was no overflow.
 					unreachable!();
 				}
 			}
 		}
 		value_msat + extra_contribution_msat
 	}
+
+	// Returns the maximum contribution that this path can make to the final value of the payment. May
+	// be slightly lower than the actual max due to rounding errors when aggregating fees along the
+	// path.
+	fn compute_max_final_value_contribution(
+		&self, used_liquidities: &HashMap<CandidateHopId, u64>, channel_saturation_pow_half: u8
+	) -> u64 {
+		let mut max_path_contribution = u64::MAX;
+		for (idx, (hop, _)) in self.hops.iter().enumerate() {
+			let hop_effective_capacity_msat = hop.candidate.effective_capacity();
+			let hop_max_msat = max_htlc_from_capacity(
+				hop_effective_capacity_msat, channel_saturation_pow_half
+			).saturating_sub(*used_liquidities.get(&hop.candidate.id()).unwrap_or(&0_u64));
+
+			let next_hops_feerates_iter = self.hops
+				.iter()
+				.skip(idx + 1)
+				.map(|(hop, _)| hop.candidate.fees());
+
+			// Aggregate the fees of the hops that come after this one, and use those fees to compute the
+			// maximum amount that this hop can contribute to the final value received by the payee.
+			let (next_hops_aggregated_base, next_hops_aggregated_prop) =
+				crate::blinded_path::payment::compute_aggregated_base_prop_fee(next_hops_feerates_iter).unwrap();
+
+			// ceil(((hop_max_msat - agg_base) * 1_000_000) / (1_000_000 + agg_prop))
+			let hop_max_final_value_contribution = (hop_max_msat as u128)
+				.checked_sub(next_hops_aggregated_base as u128)
+				.and_then(|f| f.checked_mul(1_000_000))
+				.and_then(|f| f.checked_add(1_000_000 - 1))
+				.and_then(|f| f.checked_add(next_hops_aggregated_prop as u128))
+				.map(|f| f / ((next_hops_aggregated_prop as u128).saturating_add(1_000_000)));
+
+			if let Some(hop_contribution) = hop_max_final_value_contribution {
+				let hop_contribution: u64 = hop_contribution.try_into().unwrap_or(u64::MAX);
+				max_path_contribution = core::cmp::min(hop_contribution, max_path_contribution);
+			} else { debug_assert!(false); }
+		}
+
+		max_path_contribution
+	}
 }
 
 #[inline(always)]
@@ -3269,7 +3311,10 @@ where L::Target: Logger {
 				// recompute the fees again, so that if that's the case, we match the currently
 				// underpaid htlc_minimum_msat with fees.
 				debug_assert_eq!(payment_path.get_value_msat(), value_contribution_msat);
-				let desired_value_contribution = cmp::min(value_contribution_msat, final_value_msat);
+				let max_path_contribution_msat = payment_path.compute_max_final_value_contribution(
+					&used_liquidities, channel_saturation_pow_half
+				);
+				let desired_value_contribution = cmp::min(max_path_contribution_msat, final_value_msat);
 				value_contribution_msat = payment_path.update_value_and_recompute_fees(desired_value_contribution);
 
 				// Since a path allows to transfer as much value as
@@ -3281,7 +3326,6 @@ where L::Target: Logger {
 				// might have been computed considering a larger value.
 				// Remember that we used these channels so that we don't rely
 				// on the same liquidity in future paths.
-				let mut prevented_redundant_path_selection = false;
 				for (hop, _) in payment_path.hops.iter() {
 					let spent_on_hop_msat = value_contribution_msat + hop.next_hops_fee_msat;
 					let used_liquidity_msat = used_liquidities
@@ -3290,14 +3334,9 @@ where L::Target: Logger {
 						.or_insert(spent_on_hop_msat);
 					let hop_capacity = hop.candidate.effective_capacity();
 					let hop_max_msat = max_htlc_from_capacity(hop_capacity, channel_saturation_pow_half);
-					if *used_liquidity_msat == hop_max_msat {
-						// If this path used all of this channel's available liquidity, we know
-						// this path will not be selected again in the next loop iteration.
-						prevented_redundant_path_selection = true;
-					}
 					debug_assert!(*used_liquidity_msat <= hop_max_msat);
 				}
-				if !prevented_redundant_path_selection {
+				if max_path_contribution_msat > value_contribution_msat {
 					// If we weren't capped by hitting a liquidity limit on a channel in the path,
 					// we'll probably end up picking the same path again on the next iteration.
 					// Decrease the available liquidity of a hop in the middle of the path.

From 056d5da7653cebead90a346c0537cbe41c1b864c Mon Sep 17 00:00:00 2001
From: Matt Corallo <git@bluematt.me>
Date: Thu, 10 Apr 2025 20:46:57 +0000
Subject: [PATCH 2/2] Log which hop in a path was the most limiting in capacity

Its generally rather difficult to debug the pathfinding logic from
a log, and sadly because we cannot feasibly log each step in a
pathfinding search there's relatively few options we have for
improving this.

However, one specific question we occasionally get is "why did the
pathfinder decide to use MPP"? While we similarly cannot
practically log every possible path the pathfinder could have taken
to explain why a specific path which required MPP was taken, we can
at least explain which hop in the path was the most limited, which
we do here.
---
 lightning/src/routing/router.rs | 24 ++++++++++++++----------
 1 file changed, 14 insertions(+), 10 deletions(-)

diff --git a/lightning/src/routing/router.rs b/lightning/src/routing/router.rs
index bc4210b5c59..c73a35f4e1e 100644
--- a/lightning/src/routing/router.rs
+++ b/lightning/src/routing/router.rs
@@ -2122,13 +2122,14 @@ impl<'a> PaymentPath<'a> {
 		value_msat + extra_contribution_msat
 	}
 
-	// Returns the maximum contribution that this path can make to the final value of the payment. May
-	// be slightly lower than the actual max due to rounding errors when aggregating fees along the
-	// path.
-	fn compute_max_final_value_contribution(
+	/// Returns the hop which most limited our maximum contribution as well as the maximum
+	/// contribution this path can make to the final value of the payment.
+	/// May be slightly lower than the actual max due to rounding errors when aggregating fees
+	/// along the path.
+	fn max_final_value_msat(
 		&self, used_liquidities: &HashMap<CandidateHopId, u64>, channel_saturation_pow_half: u8
-	) -> u64 {
-		let mut max_path_contribution = u64::MAX;
+	) -> (usize, u64) {
+		let mut max_path_contribution = (0, u64::MAX);
 		for (idx, (hop, _)) in self.hops.iter().enumerate() {
 			let hop_effective_capacity_msat = hop.candidate.effective_capacity();
 			let hop_max_msat = max_htlc_from_capacity(
@@ -2155,7 +2156,9 @@ impl<'a> PaymentPath<'a> {
 
 			if let Some(hop_contribution) = hop_max_final_value_contribution {
 				let hop_contribution: u64 = hop_contribution.try_into().unwrap_or(u64::MAX);
-				max_path_contribution = core::cmp::min(hop_contribution, max_path_contribution);
+				if hop_contribution <= max_path_contribution.1 {
+					max_path_contribution = (idx, hop_contribution);
+				}
 			} else { debug_assert!(false); }
 		}
 
@@ -3311,9 +3314,8 @@ where L::Target: Logger {
 				// recompute the fees again, so that if that's the case, we match the currently
 				// underpaid htlc_minimum_msat with fees.
 				debug_assert_eq!(payment_path.get_value_msat(), value_contribution_msat);
-				let max_path_contribution_msat = payment_path.compute_max_final_value_contribution(
-					&used_liquidities, channel_saturation_pow_half
-				);
+				let (lowest_value_contrib_hop, max_path_contribution_msat) =
+					payment_path.max_final_value_msat(&used_liquidities, channel_saturation_pow_half);
 				let desired_value_contribution = cmp::min(max_path_contribution_msat, final_value_msat);
 				value_contribution_msat = payment_path.update_value_and_recompute_fees(desired_value_contribution);
 
@@ -3349,6 +3351,8 @@ where L::Target: Logger {
 						*used_liquidities.entry(CandidateHopId::Clear((scid, false))).or_default() = exhausted;
 						*used_liquidities.entry(CandidateHopId::Clear((scid, true))).or_default() = exhausted;
 					}
+				} else {
+					log_trace!(logger, "Path was limited to {}msat by hop {}", max_path_contribution_msat, lowest_value_contrib_hop);
 				}
 
 				// Track the total amount all our collected paths allow to send so that we know