diff --git a/lightning/src/ln/chanmon_update_fail_tests.rs b/lightning/src/ln/chanmon_update_fail_tests.rs index 878608e14f6..0a8f258935c 100644 --- a/lightning/src/ln/chanmon_update_fail_tests.rs +++ b/lightning/src/ln/chanmon_update_fail_tests.rs @@ -4218,13 +4218,17 @@ fn test_glacial_peer_cant_hang() { do_test_glacial_peer_cant_hang(true); } -#[test] -fn test_partial_claim_mon_update_compl_actions() { +fn do_test_partial_claim_mon_update_compl_actions(reload_a: bool, reload_b: bool) { // Test that if we have an MPP claim that we ensure the preimage for the claim is retained in // all the `ChannelMonitor`s until the preimage reaches every `ChannelMonitor` for a channel // which was a part of the MPP. let chanmon_cfgs = create_chanmon_cfgs(4); let node_cfgs = create_node_cfgs(4, &chanmon_cfgs); + + let (persister, persister_2, persister_3); + let (new_chain_mon, new_chain_mon_2, new_chain_mon_3); + let (nodes_3_reload, nodes_3_reload_2, nodes_3_reload_3); + let node_chanmgrs = create_node_chanmgrs(4, &node_cfgs, &[None, None, None, None]); let mut nodes = create_network(4, &node_cfgs, &node_chanmgrs); @@ -4253,6 +4257,8 @@ fn test_partial_claim_mon_update_compl_actions() { let paths = &[&[&nodes[1], &nodes[3]][..], &[&nodes[2], &nodes[3]][..]]; send_along_route_with_secret(&nodes[0], route, paths, 200_000, payment_hash, payment_secret); + // Store the monitor for channel 4 without the preimage to use on reload + let chan_4_monitor_serialized = get_monitor!(nodes[3], chan_4_id).encode(); // Claim along both paths, but only complete one of the two monitor updates. chanmon_cfgs[3].persister.set_update_ret(ChannelMonitorUpdateStatus::InProgress); chanmon_cfgs[3].persister.set_update_ret(ChannelMonitorUpdateStatus::InProgress); @@ -4264,7 +4270,13 @@ fn test_partial_claim_mon_update_compl_actions() { // Complete the 1<->3 monitor update and play the commitment_signed dance forward until it // blocks. nodes[3].chain_monitor.complete_sole_pending_chan_update(&chan_3_id); - expect_payment_claimed!(&nodes[3], payment_hash, 200_000); + let payment_claimed = nodes[3].node.get_and_clear_pending_events(); + assert_eq!(payment_claimed.len(), 1, "{payment_claimed:?}"); + if let Event::PaymentClaimed { payment_hash: ev_hash, .. } = &payment_claimed[0] { + assert_eq!(*ev_hash, payment_hash); + } else { + panic!("{payment_claimed:?}"); + } let mut updates = get_htlc_update_msgs(&nodes[3], &node_b_id); nodes[1].node.handle_update_fulfill_htlc(node_d_id, updates.update_fulfill_htlcs.remove(0)); @@ -4283,15 +4295,41 @@ fn test_partial_claim_mon_update_compl_actions() { check_added_monitors(&nodes[3], 0); assert!(nodes[3].node.get_and_clear_pending_msg_events().is_empty()); + if reload_a { + // After a reload (with the monitor not yet fully updated), the RAA should still be blocked + // waiting until the monitor update completes. + let node_ser = nodes[3].node.encode(); + let chan_3_monitor_serialized = get_monitor!(nodes[3], chan_3_id).encode(); + let mons = &[&chan_3_monitor_serialized[..], &chan_4_monitor_serialized[..]]; + reload_node!(nodes[3], &node_ser, mons, persister, new_chain_mon, nodes_3_reload); + // The final update to channel 4 should be replayed. + persister.set_update_ret(ChannelMonitorUpdateStatus::InProgress); + assert!(nodes[3].node.get_and_clear_pending_msg_events().is_empty()); + check_added_monitors(&nodes[3], 1); + + // Because the HTLCs aren't yet cleared, the PaymentClaimed event will be replayed on + // restart. + let second_payment_claimed = nodes[3].node.get_and_clear_pending_events(); + assert_eq!(payment_claimed, second_payment_claimed); + + nodes[1].node.peer_disconnected(node_d_id); + nodes[2].node.peer_disconnected(node_d_id); + reconnect_nodes(ReconnectArgs::new(&nodes[1], &nodes[3])); + reconnect_nodes(ReconnectArgs::new(&nodes[2], &nodes[3])); + + assert!(nodes[3].node.get_and_clear_pending_msg_events().is_empty()); + } + // Now double-check that the preimage is still in the 1<->3 channel and complete the pending // monitor update, allowing node 3 to claim the payment on the 2<->3 channel. This also // unblocks the 1<->3 channel, allowing node 3 to release the two blocked monitor updates and // respond to the final commitment_signed. assert!(get_monitor!(nodes[3], chan_3_id).get_stored_preimages().contains_key(&payment_hash)); + assert!(nodes[3].node.get_and_clear_pending_events().is_empty()); nodes[3].chain_monitor.complete_sole_pending_chan_update(&chan_4_id); let mut ds_msgs = nodes[3].node.get_and_clear_pending_msg_events(); - assert_eq!(ds_msgs.len(), 2); + assert_eq!(ds_msgs.len(), 2, "{ds_msgs:?}"); check_added_monitors(&nodes[3], 2); match remove_first_msg_event_to_node(&node_b_id, &mut ds_msgs) { @@ -4335,13 +4373,86 @@ fn test_partial_claim_mon_update_compl_actions() { assert!(get_monitor!(nodes[3], chan_3_id).get_stored_preimages().contains_key(&payment_hash)); assert!(get_monitor!(nodes[3], chan_4_id).get_stored_preimages().contains_key(&payment_hash)); + if reload_b { + // Ensure that the channel pause logic doesn't accidentally get restarted after a second + // reload once the HTLCs for the first payment have been removed and the monitors + // completed. + let node_ser = nodes[3].node.encode(); + let chan_3_monitor_serialized = get_monitor!(nodes[3], chan_3_id).encode(); + let chan_4_monitor_serialized = get_monitor!(nodes[3], chan_4_id).encode(); + let mons = &[&chan_3_monitor_serialized[..], &chan_4_monitor_serialized[..]]; + reload_node!(nodes[3], &node_ser, mons, persister_2, new_chain_mon_2, nodes_3_reload_2); + check_added_monitors(&nodes[3], 0); + + nodes[1].node.peer_disconnected(node_d_id); + nodes[2].node.peer_disconnected(node_d_id); + reconnect_nodes(ReconnectArgs::new(&nodes[1], &nodes[3])); + reconnect_nodes(ReconnectArgs::new(&nodes[2], &nodes[3])); + + assert!(nodes[3].node.get_and_clear_pending_msg_events().is_empty()); + + // Because the HTLCs aren't yet cleared, the PaymentClaimed event will be replayed on + // restart. + let third_payment_claimed = nodes[3].node.get_and_clear_pending_events(); + assert_eq!(payment_claimed, third_payment_claimed); + } + send_payment(&nodes[1], &[&nodes[3]], 100_000); assert!(!get_monitor!(nodes[3], chan_3_id).get_stored_preimages().contains_key(&payment_hash)); + if reload_b { + // Ensure that the channel pause logic doesn't accidentally get restarted after a second + // reload once the HTLCs for the first payment have been removed and the monitors + // completed, even if only one of the two monitors still knows about the first payment. + let node_ser = nodes[3].node.encode(); + let chan_3_monitor_serialized = get_monitor!(nodes[3], chan_3_id).encode(); + let chan_4_monitor_serialized = get_monitor!(nodes[3], chan_4_id).encode(); + let mons = &[&chan_3_monitor_serialized[..], &chan_4_monitor_serialized[..]]; + reload_node!(nodes[3], &node_ser, mons, persister_3, new_chain_mon_3, nodes_3_reload_3); + check_added_monitors(&nodes[3], 0); + + nodes[1].node.peer_disconnected(node_d_id); + nodes[2].node.peer_disconnected(node_d_id); + reconnect_nodes(ReconnectArgs::new(&nodes[1], &nodes[3])); + reconnect_nodes(ReconnectArgs::new(&nodes[2], &nodes[3])); + + assert!(nodes[3].node.get_and_clear_pending_msg_events().is_empty()); + + // Because the HTLCs aren't yet cleared, the PaymentClaimed events for both payments will + // be replayed on restart. + // Use this as an opportunity to check the payment_ids are unique. + let mut events = nodes[3].node.get_and_clear_pending_events(); + assert_eq!(events.len(), 2); + events.retain(|ev| *ev != payment_claimed[0]); + assert_eq!(events.len(), 1); + if let Event::PaymentClaimed { payment_id: original_payment_id, .. } = &payment_claimed[0] { + assert!(original_payment_id.is_some()); + if let Event::PaymentClaimed { amount_msat, payment_id, .. } = &events[0] { + assert!(payment_id.is_some()); + assert_ne!(original_payment_id, payment_id); + assert_eq!(*amount_msat, 100_000); + } else { + panic!("{events:?}"); + } + } else { + panic!("{events:?}"); + } + + send_payment(&nodes[1], &[&nodes[3]], 100_000); + } + send_payment(&nodes[2], &[&nodes[3]], 100_000); assert!(!get_monitor!(nodes[3], chan_4_id).get_stored_preimages().contains_key(&payment_hash)); } +#[test] +fn test_partial_claim_mon_update_compl_actions() { + do_test_partial_claim_mon_update_compl_actions(true, true); + do_test_partial_claim_mon_update_compl_actions(true, false); + do_test_partial_claim_mon_update_compl_actions(false, true); + do_test_partial_claim_mon_update_compl_actions(false, false); +} + #[test] fn test_claim_to_closed_channel_blocks_forwarded_preimage_removal() { // One of the last features for async persistence we implemented was the correct blocking of diff --git a/lightning/src/ln/channelmanager.rs b/lightning/src/ln/channelmanager.rs index 6f411273aab..7c6eb765204 100644 --- a/lightning/src/ln/channelmanager.rs +++ b/lightning/src/ln/channelmanager.rs @@ -8274,18 +8274,21 @@ where // payment claim from a `ChannelMonitor`. In some cases (MPP or // if the HTLC was only recently removed) we make such claims // after an HTLC has been removed from a channel entirely, and - // thus the RAA blocker has long since completed. + // thus the RAA blocker may have long since completed. // - // In any other case, the RAA blocker must still be present and - // blocking RAAs. - debug_assert!( - during_init - || peer_state - .actions_blocking_raa_monitor_updates - .get(&chan_id) - .unwrap() - .contains(&raa_blocker) - ); + // However, its possible that the `ChannelMonitorUpdate` containing + // the preimage never completed and is still pending. In that case, + // we need to re-add the RAA blocker, which we do here. Handling + // the post-update action, below, will remove it again. + // + // In any other case (i.e. not during startup), the RAA blocker + // must still be present and blocking RAAs. + let actions = &mut peer_state.actions_blocking_raa_monitor_updates; + let actions_list = actions.entry(chan_id).or_insert_with(Vec::new); + if !actions_list.contains(&raa_blocker) { + debug_assert!(during_init); + actions_list.push(raa_blocker); + } } let action = if let Some(action) = action_opt { action @@ -8293,6 +8296,22 @@ where return; }; + // If there are monitor updates in flight, we may be in the case + // described above, replaying a claim on startup which needs an RAA + // blocker to remain blocked. Thus, in such a case we simply push the + // post-update action to the blocked list and move on. + // In any case, we should err on the side of caution and not process + // the post-update action no matter the situation. + let in_flight_mons = peer_state.in_flight_monitor_updates.get(&chan_id); + if in_flight_mons.map(|(_, mons)| !mons.is_empty()).unwrap_or(false) { + peer_state + .monitor_update_blocked_actions + .entry(chan_id) + .or_insert_with(Vec::new) + .push(action); + return; + } + mem::drop(peer_state_opt); log_trace!(logger, "Completing monitor update completion action for channel {} as claim was redundant: {:?}",