From 93e645daf46f85949ae0edf60d36bf21e9fde8af Mon Sep 17 00:00:00 2001
From: Matt Corallo <git@bluematt.me>
Date: Wed, 6 Jul 2022 19:17:40 +0000
Subject: [PATCH 1/3] Track channels which a given payment part failed to
 traverse

When an HTLC fails, we currently rely on the scorer learning the
failed channel and assigning an infinite (`u64::max_value()`)
penalty to the channel so as to avoid retrying over the exact same
path (if there's only one available path). This is common when
trying to pay a mobile client behind an LSP if the mobile client is
currently offline.

This leads to the scorer being overly conservative in some cases -
returning `u64::max_value()` when a given path hasn't been tried
for a given payment may not be the best decision, even if that
channel failed 50 minutes ago.

By tracking channels which failed on a payment part level and
explicitly refusing to route over them we can relax the
requirements on the scorer, allowing it to make different decisions
on how to treat channels that failed relatively recently without
causing payments to retry the same path forever.

This does have the drawback that it could allow two separate part
of a payment to traverse the same path even though that path just
failed, however this should only occur if the payment is going to
fail anyway, at least as long as the scorer is properly learning.

Closes #1241, superseding #1252.
---
 lightning/src/ln/channelmanager.rs        |  9 +++-
 lightning/src/ln/functional_test_utils.rs |  5 ++-
 lightning/src/routing/router.rs           | 54 ++++++++++++++++++++---
 3 files changed, 59 insertions(+), 9 deletions(-)

diff --git a/lightning/src/ln/channelmanager.rs b/lightning/src/ln/channelmanager.rs
index bae408d29f0..611e8a562c6 100644
--- a/lightning/src/ln/channelmanager.rs
+++ b/lightning/src/ln/channelmanager.rs
@@ -3894,7 +3894,7 @@ impl<Signer: Sign, M: Deref, T: Deref, K: Deref, F: Deref, L: Deref> ChannelMana
 					return;
 				}
 				mem::drop(channel_state_lock);
-				let retry = if let Some(payment_params_data) = payment_params {
+				let mut retry = if let Some(payment_params_data) = payment_params {
 					let path_last_hop = path.last().expect("Outbound payments must have had a valid path");
 					Some(RouteParameters {
 						payment_params: payment_params_data.clone(),
@@ -3930,6 +3930,9 @@ impl<Signer: Sign, M: Deref, T: Deref, K: Deref, F: Deref, L: Deref> ChannelMana
 							// TODO: If we decided to blame ourselves (or one of our channels) in
 							// process_onion_failure we should close that channel as it implies our
 							// next-hop is needlessly blaming us!
+							if let Some(scid) = short_channel_id {
+								retry.as_mut().map(|r| r.payment_params.previously_failed_channels.push(scid));
+							}
 							events::Event::PaymentPathFailed {
 								payment_id: Some(payment_id),
 								payment_hash: payment_hash.clone(),
@@ -3959,6 +3962,8 @@ impl<Signer: Sign, M: Deref, T: Deref, K: Deref, F: Deref, L: Deref> ChannelMana
 						// ChannelDetails.
 						// TODO: For non-temporary failures, we really should be closing the
 						// channel here as we apparently can't relay through them anyway.
+						let scid = path.first().unwrap().short_channel_id;
+						retry.as_mut().map(|r| r.payment_params.previously_failed_channels.push(scid));
 						events::Event::PaymentPathFailed {
 							payment_id: Some(payment_id),
 							payment_hash: payment_hash.clone(),
@@ -3966,7 +3971,7 @@ impl<Signer: Sign, M: Deref, T: Deref, K: Deref, F: Deref, L: Deref> ChannelMana
 							network_update: None,
 							all_paths_failed,
 							path: path.clone(),
-							short_channel_id: Some(path.first().unwrap().short_channel_id),
+							short_channel_id: Some(scid),
 							retry,
 #[cfg(test)]
 							error_code: Some(*failure_code),
diff --git a/lightning/src/ln/functional_test_utils.rs b/lightning/src/ln/functional_test_utils.rs
index fe78395e2f9..a49e711b808 100644
--- a/lightning/src/ln/functional_test_utils.rs
+++ b/lightning/src/ln/functional_test_utils.rs
@@ -1492,7 +1492,7 @@ pub fn expect_payment_failed_conditions<'a, 'b, 'c, 'd, 'e>(
 	let mut events = node.node.get_and_clear_pending_events();
 	assert_eq!(events.len(), 1);
 	let expected_payment_id = match events.pop().unwrap() {
-		Event::PaymentPathFailed { payment_hash, rejected_by_dest, path, retry, payment_id, network_update,
+		Event::PaymentPathFailed { payment_hash, rejected_by_dest, path, retry, payment_id, network_update, short_channel_id,
 			#[cfg(test)]
 			error_code,
 			#[cfg(test)]
@@ -1502,6 +1502,9 @@ pub fn expect_payment_failed_conditions<'a, 'b, 'c, 'd, 'e>(
 			assert!(retry.is_some(), "expected retry.is_some()");
 			assert_eq!(retry.as_ref().unwrap().final_value_msat, path.last().unwrap().fee_msat, "Retry amount should match last hop in path");
 			assert_eq!(retry.as_ref().unwrap().payment_params.payee_pubkey, path.last().unwrap().pubkey, "Retry payee node_id should match last hop in path");
+			if let Some(scid) = short_channel_id {
+				assert!(retry.as_ref().unwrap().payment_params.previously_failed_channels.contains(&scid));
+			}
 
 			#[cfg(test)]
 			{
diff --git a/lightning/src/routing/router.rs b/lightning/src/routing/router.rs
index 27a21a1899e..a97e1b6ffa4 100644
--- a/lightning/src/routing/router.rs
+++ b/lightning/src/routing/router.rs
@@ -240,6 +240,11 @@ pub struct PaymentParameters {
 	///
 	/// Default value: 1
 	pub max_channel_saturation_power_of_half: u8,
+
+	/// A list of SCIDs which this payment was previously attempted over and which caused the
+	/// payment to fail. Future attempts for the same payment shouldn't be relayed through any of
+	/// these SCIDs.
+	pub previously_failed_channels: Vec<u64>,
 }
 
 impl_writeable_tlv_based!(PaymentParameters, {
@@ -250,6 +255,7 @@ impl_writeable_tlv_based!(PaymentParameters, {
 	(4, route_hints, vec_type),
 	(5, max_channel_saturation_power_of_half, (default_value, 1)),
 	(6, expiry_time, option),
+	(7, previously_failed_channels, vec_type),
 });
 
 impl PaymentParameters {
@@ -263,6 +269,7 @@ impl PaymentParameters {
 			max_total_cltv_expiry_delta: DEFAULT_MAX_TOTAL_CLTV_EXPIRY_DELTA,
 			max_path_count: DEFAULT_MAX_PATH_COUNT,
 			max_channel_saturation_power_of_half: 1,
+			previously_failed_channels: Vec::new(),
 		}
 	}
 
@@ -1002,7 +1009,7 @@ where L::Target: Logger {
 					let contributes_sufficient_value = available_value_contribution_msat >= minimal_value_contribution_msat;
 					// Do not consider candidate hops that would exceed the maximum path length.
 					let path_length_to_node = $next_hops_path_length + 1;
-					let doesnt_exceed_max_path_length = path_length_to_node <= MAX_PATH_LENGTH_ESTIMATE;
+					let exceeds_max_path_length = path_length_to_node > MAX_PATH_LENGTH_ESTIMATE;
 
 					// Do not consider candidates that exceed the maximum total cltv expiry limit.
 					// In order to already account for some of the privacy enhancing random CLTV
@@ -1013,7 +1020,7 @@ where L::Target: Logger {
 						.unwrap_or(payment_params.max_total_cltv_expiry_delta - final_cltv_expiry_delta);
 					let hop_total_cltv_delta = ($next_hops_cltv_delta as u32)
 						.saturating_add($candidate.cltv_expiry_delta());
-					let doesnt_exceed_cltv_delta_limit = hop_total_cltv_delta <= max_total_cltv_expiry_delta;
+					let exceeds_cltv_delta_limit = hop_total_cltv_delta > max_total_cltv_expiry_delta;
 
 					let value_contribution_msat = cmp::min(available_value_contribution_msat, $next_hops_value_contribution);
 					// Includes paying fees for the use of the following channels.
@@ -1033,15 +1040,19 @@ where L::Target: Logger {
 						 (amount_to_transfer_over_msat < $next_hops_path_htlc_minimum_msat &&
 						  recommended_value_msat > $next_hops_path_htlc_minimum_msat));
 
+					let payment_failed_on_this_channel =
+						payment_params.previously_failed_channels.contains(&short_channel_id);
+
 					// If HTLC minimum is larger than the amount we're going to transfer, we shouldn't
 					// bother considering this channel. If retrying with recommended_value_msat may
 					// allow us to hit the HTLC minimum limit, set htlc_minimum_limit so that we go
 					// around again with a higher amount.
-					if contributes_sufficient_value && doesnt_exceed_max_path_length &&
-						doesnt_exceed_cltv_delta_limit && may_overpay_to_meet_path_minimum_msat {
+					if !contributes_sufficient_value || exceeds_max_path_length ||
+						exceeds_cltv_delta_limit || payment_failed_on_this_channel {
+						// Path isn't useful, ignore it and move on.
+					} else if may_overpay_to_meet_path_minimum_msat {
 						hit_minimum_limit = true;
-					} else if contributes_sufficient_value && doesnt_exceed_max_path_length &&
-						doesnt_exceed_cltv_delta_limit && over_path_minimum_msat {
+					} else if over_path_minimum_msat {
 						// Note that low contribution here (limited by available_liquidity_msat)
 						// might violate htlc_minimum_msat on the hops which are next along the
 						// payment path (upstream to the payee). To avoid that, we recompute
@@ -1993,6 +2004,8 @@ mod tests {
 	use prelude::*;
 	use sync::{self, Arc};
 
+	use core::convert::TryInto;
+
 	fn get_channel_details(short_channel_id: Option<u64>, node_id: PublicKey,
 			features: InitFeatures, outbound_capacity_msat: u64) -> channelmanager::ChannelDetails {
 		channelmanager::ChannelDetails {
@@ -5573,6 +5586,35 @@ mod tests {
 		}
 	}
 
+	#[test]
+	fn avoids_recently_failed_paths() {
+		// Ensure that the router always avoids all of the `previously_failed_channels` channels by
+		// randomly inserting channels into it until we can't find a route anymore.
+		let (secp_ctx, network, _, _, logger) = build_graph();
+		let (_, our_id, _, nodes) = get_nodes(&secp_ctx);
+		let network_graph = network.read_only();
+
+		let scorer = test_utils::TestScorer::with_penalty(0);
+		let mut payment_params = PaymentParameters::from_node_id(nodes[6]).with_route_hints(last_hops(&nodes))
+			.with_max_path_count(1);
+		let keys_manager = test_utils::TestKeysInterface::new(&[0u8; 32], Network::Testnet);
+		let random_seed_bytes = keys_manager.get_secure_random_bytes();
+
+		// We should be able to find a route initially, and then after we fail a few random
+		// channels eventually we won't be able to any longer.
+		assert!(get_route(&our_id, &payment_params, &network_graph, None, 100, 0, Arc::clone(&logger), &scorer, &random_seed_bytes).is_ok());
+		loop {
+			if let Ok(route) = get_route(&our_id, &payment_params, &network_graph, None, 100, 0, Arc::clone(&logger), &scorer, &random_seed_bytes) {
+				for chan in route.paths[0].iter() {
+					assert!(!payment_params.previously_failed_channels.contains(&chan.short_channel_id));
+				}
+				let victim = (u64::from_ne_bytes(random_seed_bytes[0..8].try_into().unwrap()) as usize)
+					% route.paths[0].len();
+				payment_params.previously_failed_channels.push(route.paths[0][victim].short_channel_id);
+			} else { break; }
+		}
+	}
+
 	#[test]
 	fn limits_path_length() {
 		let (secp_ctx, network, _, _, logger) = build_line_graph();

From ec2b1ced077c879b0f51ef4fcd0b5d2d22aa570d Mon Sep 17 00:00:00 2001
From: Matt Corallo <git@bluematt.me>
Date: Wed, 6 Jul 2022 21:05:54 +0000
Subject: [PATCH 2/3] Make the `ProbabilisticScorer` impossibility penalty
 configurable

When we consider sending an HTLC over a given channel impossible
due to our current knowledge of the channel's liquidity, we
currently always assign a penalty of `u64::max_value()`. However,
because we now refuse to retry a payment along the same path in
the router itself, we can now make this value configurable. This
allows users to have a relatively high knowledge decay interval
without the side-effect of refusing to try the only available path
in cases where a channel is intermittently available.
---
 lightning/src/routing/scoring.rs | 55 ++++++++++++++++++++++----------
 1 file changed, 38 insertions(+), 17 deletions(-)

diff --git a/lightning/src/routing/scoring.rs b/lightning/src/routing/scoring.rs
index 4922fad7847..cd2ae6d5d01 100644
--- a/lightning/src/routing/scoring.rs
+++ b/lightning/src/routing/scoring.rs
@@ -394,6 +394,25 @@ pub struct ProbabilisticScoringParameters {
 	///
 	/// Default value: 250 msat
 	pub anti_probing_penalty_msat: u64,
+
+	/// This penalty is applied when the amount we're attempting to send over a channel exceeds our
+	/// current estimate of the channel's available liquidity.
+	///
+	/// Note that in this case all other penalties, including the
+	/// [`liquidity_penalty_multiplier_msat`] and [`amount_penalty_multiplier_msat`]-based
+	/// penalties, as well as the [`base_penalty_msat`] and the [`anti_probing_penalty_msat`], if
+	/// applicable, are still included in the overall penalty.
+	///
+	/// If you wish to avoid creating paths with such channels entirely, setting this to a value of
+	/// `u64::max_value()` will guarantee that.
+	///
+	/// Default value: `u64::max_value()`
+	///
+	/// [`liquidity_penalty_multiplier_msat`]: Self::liquidity_penalty_multiplier_msat
+	/// [`amount_penalty_multiplier_msat`]: Self::amount_penalty_multiplier_msat
+	/// [`base_penalty_msat`]: Self::base_penalty_msat
+	/// [`anti_probing_penalty_msat`]: Self::anti_probing_penalty_msat
+	pub considered_impossible_penalty_msat: u64,
 }
 
 /// Accounting for channel liquidity balance uncertainty.
@@ -522,6 +541,7 @@ impl ProbabilisticScoringParameters {
 			amount_penalty_multiplier_msat: 0,
 			manual_node_penalties: HashMap::new(),
 			anti_probing_penalty_msat: 0,
+			considered_impossible_penalty_msat: 0,
 		}
 	}
 
@@ -543,6 +563,7 @@ impl Default for ProbabilisticScoringParameters {
 			amount_penalty_multiplier_msat: 256,
 			manual_node_penalties: HashMap::new(),
 			anti_probing_penalty_msat: 250,
+			considered_impossible_penalty_msat: u64::max_value(),
 		}
 	}
 }
@@ -620,17 +641,12 @@ impl<L: Deref<Target = u64>, T: Time, U: Deref<Target = T>> DirectedChannelLiqui
 		if amount_msat <= min_liquidity_msat {
 			0
 		} else if amount_msat >= max_liquidity_msat {
-			if amount_msat > max_liquidity_msat {
-				u64::max_value()
-			} else if max_liquidity_msat != self.capacity_msat {
-				// Avoid using the failed channel on retry.
-				u64::max_value()
-			} else {
-				// Equivalent to hitting the else clause below with the amount equal to the
-				// effective capacity and without any certainty on the liquidity upper bound.
-				let negative_log10_times_2048 = NEGATIVE_LOG10_UPPER_BOUND * 2048;
-				self.combined_penalty_msat(amount_msat, negative_log10_times_2048, params)
-			}
+			// Equivalent to hitting the else clause below with the amount equal to the effective
+			// capacity and without any certainty on the liquidity upper bound, plus the
+			// impossibility penalty.
+			let negative_log10_times_2048 = NEGATIVE_LOG10_UPPER_BOUND * 2048;
+			self.combined_penalty_msat(amount_msat, negative_log10_times_2048, params)
+				.saturating_add(params.considered_impossible_penalty_msat)
 		} else {
 			let numerator = (max_liquidity_msat - amount_msat).saturating_add(1);
 			let denominator = (max_liquidity_msat - min_liquidity_msat).saturating_add(1);
@@ -1624,7 +1640,7 @@ mod tests {
 		assert_eq!(scorer.channel_penalty_msat(42, &source, &target, usage), 0);
 		let usage = ChannelUsage { amount_msat: 102_400, ..usage };
 		assert_eq!(scorer.channel_penalty_msat(42, &source, &target, usage), 47);
-		let usage = ChannelUsage { amount_msat: 1_024_000, ..usage };
+		let usage = ChannelUsage { amount_msat: 1_023_999, ..usage };
 		assert_eq!(scorer.channel_penalty_msat(42, &source, &target, usage), 2_000);
 
 		let usage = ChannelUsage {
@@ -1654,6 +1670,7 @@ mod tests {
 		let network_graph = network_graph(&logger);
 		let params = ProbabilisticScoringParameters {
 			liquidity_penalty_multiplier_msat: 1_000,
+			considered_impossible_penalty_msat: u64::max_value(),
 			..ProbabilisticScoringParameters::zero_penalty()
 		};
 		let scorer = ProbabilisticScorer::new(params, &network_graph, &logger)
@@ -1745,6 +1762,7 @@ mod tests {
 		let network_graph = network_graph(&logger);
 		let params = ProbabilisticScoringParameters {
 			liquidity_penalty_multiplier_msat: 1_000,
+			considered_impossible_penalty_msat: u64::max_value(),
 			..ProbabilisticScoringParameters::zero_penalty()
 		};
 		let mut scorer = ProbabilisticScorer::new(params, &network_graph, &logger);
@@ -1811,6 +1829,7 @@ mod tests {
 		let params = ProbabilisticScoringParameters {
 			liquidity_penalty_multiplier_msat: 1_000,
 			liquidity_offset_half_life: Duration::from_secs(10),
+			considered_impossible_penalty_msat: u64::max_value(),
 			..ProbabilisticScoringParameters::zero_penalty()
 		};
 		let mut scorer = ProbabilisticScorer::new(params, &network_graph, &logger);
@@ -1820,10 +1839,10 @@ mod tests {
 		let usage = ChannelUsage {
 			amount_msat: 0,
 			inflight_htlc_msat: 0,
-			effective_capacity: EffectiveCapacity::Total { capacity_msat: 1_024, htlc_maximum_msat: Some(1_000) },
+			effective_capacity: EffectiveCapacity::Total { capacity_msat: 1_024, htlc_maximum_msat: Some(1_024) },
 		};
 		assert_eq!(scorer.channel_penalty_msat(42, &source, &target, usage), 0);
-		let usage = ChannelUsage { amount_msat: 1_024, ..usage };
+		let usage = ChannelUsage { amount_msat: 1_023, ..usage };
 		assert_eq!(scorer.channel_penalty_msat(42, &source, &target, usage), 2_000);
 
 		scorer.payment_path_failed(&payment_path_for_amount(768).iter().collect::<Vec<_>>(), 42);
@@ -1867,20 +1886,20 @@ mod tests {
 		let usage = ChannelUsage { amount_msat: 1_023, ..usage };
 		assert_eq!(scorer.channel_penalty_msat(42, &source, &target, usage), 2_000);
 		let usage = ChannelUsage { amount_msat: 1_024, ..usage };
-		assert_eq!(scorer.channel_penalty_msat(42, &source, &target, usage), 2_000);
+		assert_eq!(scorer.channel_penalty_msat(42, &source, &target, usage), u64::max_value());
 
 		// Fully decay liquidity upper bound.
 		SinceEpoch::advance(Duration::from_secs(10));
 		let usage = ChannelUsage { amount_msat: 0, ..usage };
 		assert_eq!(scorer.channel_penalty_msat(42, &source, &target, usage), 0);
 		let usage = ChannelUsage { amount_msat: 1_024, ..usage };
-		assert_eq!(scorer.channel_penalty_msat(42, &source, &target, usage), 2_000);
+		assert_eq!(scorer.channel_penalty_msat(42, &source, &target, usage), u64::max_value());
 
 		SinceEpoch::advance(Duration::from_secs(10));
 		let usage = ChannelUsage { amount_msat: 0, ..usage };
 		assert_eq!(scorer.channel_penalty_msat(42, &source, &target, usage), 0);
 		let usage = ChannelUsage { amount_msat: 1_024, ..usage };
-		assert_eq!(scorer.channel_penalty_msat(42, &source, &target, usage), 2_000);
+		assert_eq!(scorer.channel_penalty_msat(42, &source, &target, usage), u64::max_value());
 	}
 
 	#[test]
@@ -1965,6 +1984,7 @@ mod tests {
 		let params = ProbabilisticScoringParameters {
 			liquidity_penalty_multiplier_msat: 1_000,
 			liquidity_offset_half_life: Duration::from_secs(10),
+			considered_impossible_penalty_msat: u64::max_value(),
 			..ProbabilisticScoringParameters::zero_penalty()
 		};
 		let mut scorer = ProbabilisticScorer::new(params.clone(), &network_graph, &logger);
@@ -2001,6 +2021,7 @@ mod tests {
 		let params = ProbabilisticScoringParameters {
 			liquidity_penalty_multiplier_msat: 1_000,
 			liquidity_offset_half_life: Duration::from_secs(10),
+			considered_impossible_penalty_msat: u64::max_value(),
 			..ProbabilisticScoringParameters::zero_penalty()
 		};
 		let mut scorer = ProbabilisticScorer::new(params.clone(), &network_graph, &logger);

From a3547e29e5409413a0864b72813ac9c57bbd6d6f Mon Sep 17 00:00:00 2001
From: Matt Corallo <git@bluematt.me>
Date: Thu, 7 Jul 2022 17:37:22 +0000
Subject: [PATCH 3/3] Change default "impossibility penalty" to one Bitcoin

In general we should avoid taking paths that we are confident will
not work as much possible, but we should be willing to try each
payment at least once, even if its over a channel that failed
recently. A full Bitcoin penalty for such a channel seems
reasonable - lightning fees are unlikely to ever reach that point
so such channels will be scored much worse than any other potential
path, while still being below `u64::max_value()`.
---
 lightning/src/routing/scoring.rs | 9 ++++++---
 1 file changed, 6 insertions(+), 3 deletions(-)

diff --git a/lightning/src/routing/scoring.rs b/lightning/src/routing/scoring.rs
index cd2ae6d5d01..e2e0bdd18a6 100644
--- a/lightning/src/routing/scoring.rs
+++ b/lightning/src/routing/scoring.rs
@@ -406,7 +406,7 @@ pub struct ProbabilisticScoringParameters {
 	/// If you wish to avoid creating paths with such channels entirely, setting this to a value of
 	/// `u64::max_value()` will guarantee that.
 	///
-	/// Default value: `u64::max_value()`
+	/// Default value: 1_0000_0000_000 msat (1 Bitcoin)
 	///
 	/// [`liquidity_penalty_multiplier_msat`]: Self::liquidity_penalty_multiplier_msat
 	/// [`amount_penalty_multiplier_msat`]: Self::amount_penalty_multiplier_msat
@@ -563,7 +563,7 @@ impl Default for ProbabilisticScoringParameters {
 			amount_penalty_multiplier_msat: 256,
 			manual_node_penalties: HashMap::new(),
 			anti_probing_penalty_msat: 250,
-			considered_impossible_penalty_msat: u64::max_value(),
+			considered_impossible_penalty_msat: 1_0000_0000_000,
 		}
 	}
 }
@@ -2192,7 +2192,10 @@ mod tests {
 	fn accounts_for_inflight_htlc_usage() {
 		let logger = TestLogger::new();
 		let network_graph = network_graph(&logger);
-		let params = ProbabilisticScoringParameters::default();
+		let params = ProbabilisticScoringParameters {
+			considered_impossible_penalty_msat: u64::max_value(),
+			..ProbabilisticScoringParameters::zero_penalty()
+		};
 		let scorer = ProbabilisticScorer::new(params, &network_graph, &logger);
 		let source = source_node_id();
 		let target = target_node_id();