@@ -3438,9 +3438,9 @@ def test_given_unfinished_first_parent_partition_no_parent_state_update():
3438
3438
assert mock_cursor_2 .stream_slices .call_count == 1 # Called once for each partition
3439
3439
3440
3440
assert len (cursor ._semaphore_per_partition ) == 1
3441
- assert len (cursor ._finished_partitions ) == 1
3442
- assert len (cursor ._open_seqs ) == 1
3443
- assert len (cursor ._seq_by_partition ) == 1
3441
+ assert len (cursor ._partitions_done_generating_stream_slices ) == 1
3442
+ assert len (cursor ._processing_partitions_indexes ) == 1
3443
+ assert len (cursor ._partition_key_to_index ) == 1
3444
3444
3445
3445
3446
3446
def test_given_unfinished_last_parent_partition_with_partial_parent_state_update ():
@@ -3526,9 +3526,9 @@ def test_given_unfinished_last_parent_partition_with_partial_parent_state_update
3526
3526
assert mock_cursor_2 .stream_slices .call_count == 1 # Called once for each partition
3527
3527
3528
3528
assert len (cursor ._semaphore_per_partition ) == 1
3529
- assert len (cursor ._finished_partitions ) == 1
3530
- assert len (cursor ._open_seqs ) == 1
3531
- assert len (cursor ._seq_by_partition ) == 1
3529
+ assert len (cursor ._partitions_done_generating_stream_slices ) == 1
3530
+ assert len (cursor ._processing_partitions_indexes ) == 1
3531
+ assert len (cursor ._partition_key_to_index ) == 1
3532
3532
3533
3533
3534
3534
def test_given_all_partitions_finished_when_close_partition_then_final_state_emitted ():
@@ -3606,9 +3606,9 @@ def test_given_all_partitions_finished_when_close_partition_then_final_state_emi
3606
3606
3607
3607
# Checks that all internal variables are cleaned up
3608
3608
assert len (cursor ._semaphore_per_partition ) == 0
3609
- assert len (cursor ._finished_partitions ) == 0
3610
- assert len (cursor ._open_seqs ) == 0
3611
- assert len (cursor ._seq_by_partition ) == 0
3609
+ assert len (cursor ._partitions_done_generating_stream_slices ) == 0
3610
+ assert len (cursor ._processing_partitions_indexes ) == 0
3611
+ assert len (cursor ._partition_key_to_index ) == 0
3612
3612
3613
3613
3614
3614
def test_given_partition_limit_exceeded_when_close_partition_then_switch_to_global_cursor ():
@@ -3727,8 +3727,8 @@ def test_semaphore_cleanup():
3727
3727
# Verify initial state
3728
3728
assert len (cursor ._semaphore_per_partition ) == 2
3729
3729
assert len (cursor ._partition_parent_state_map ) == 2
3730
- assert len (cursor ._open_seqs ) == 2
3731
- assert len (cursor ._seq_by_partition ) == 2
3730
+ assert len (cursor ._processing_partitions_indexes ) == 2
3731
+ assert len (cursor ._partition_key_to_index ) == 2
3732
3732
assert cursor ._partition_parent_state_map ['{"id":"1"}' ][0 ] == {"parent" : {"state" : "state1" }}
3733
3733
assert cursor ._partition_parent_state_map ['{"id":"2"}' ][0 ] == {"parent" : {"state" : "state2" }}
3734
3734
@@ -3737,10 +3737,10 @@ def test_semaphore_cleanup():
3737
3737
cursor .close_partition (DeclarativePartition ("test_stream" , {}, MagicMock (), MagicMock (), s ))
3738
3738
3739
3739
# Check state after closing partitions
3740
- assert len (cursor ._finished_partitions ) == 0
3740
+ assert len (cursor ._partitions_done_generating_stream_slices ) == 0
3741
3741
assert len (cursor ._semaphore_per_partition ) == 0
3742
- assert len (cursor ._open_seqs ) == 0
3743
- assert len (cursor ._seq_by_partition ) == 0
3742
+ assert len (cursor ._processing_partitions_indexes ) == 0
3743
+ assert len (cursor ._partition_key_to_index ) == 0
3744
3744
assert len (cursor ._partition_parent_state_map ) == 0 # All parent states should be popped
3745
3745
assert cursor ._parent_state == {"parent" : {"state" : "state2" }} # Last parent state
3746
3746
@@ -3788,3 +3788,127 @@ def test_given_global_state_when_read_then_state_is_not_per_partition() -> None:
3788
3788
"use_global_cursor" : True , # ensures that it is running the Concurrent CDK version as this is not populated in the declarative implementation
3789
3789
}, # this state does have per partition which would be under `states`
3790
3790
)
3791
+
3792
+
3793
+ def _make_inner_cursor (ts : str ) -> MagicMock :
3794
+ """Return an inner cursor that yields exactly one slice and has a proper state."""
3795
+ inner = MagicMock ()
3796
+ inner .stream_slices .return_value = iter ([{"dummy" : "slice" }])
3797
+ inner .state = {"updated_at" : ts }
3798
+ inner .close_partition .return_value = None
3799
+ inner .observe .return_value = None
3800
+ return inner
3801
+
3802
+
3803
+ def test_duplicate_partition_after_cleanup ():
3804
+ inner_cursors = [
3805
+ _make_inner_cursor ("2024-01-01T00:00:00Z" ), # for first "1"
3806
+ _make_inner_cursor ("2024-01-02T00:00:00Z" ), # for "2"
3807
+ _make_inner_cursor ("2024-01-03T00:00:00Z" ), # for second "1"
3808
+ ]
3809
+ cursor_factory_mock = MagicMock ()
3810
+ cursor_factory_mock .create .side_effect = inner_cursors
3811
+
3812
+ converter = CustomFormatConcurrentStreamStateConverter (
3813
+ datetime_format = "%Y-%m-%dT%H:%M:%SZ" ,
3814
+ input_datetime_formats = ["%Y-%m-%dT%H:%M:%SZ" ],
3815
+ is_sequential_state = True ,
3816
+ cursor_granularity = timedelta (0 ),
3817
+ )
3818
+
3819
+ cursor = ConcurrentPerPartitionCursor (
3820
+ cursor_factory = cursor_factory_mock ,
3821
+ partition_router = MagicMock (),
3822
+ stream_name = "dup_stream" ,
3823
+ stream_namespace = None ,
3824
+ stream_state = {},
3825
+ message_repository = MagicMock (),
3826
+ connector_state_manager = MagicMock (),
3827
+ connector_state_converter = converter ,
3828
+ cursor_field = CursorField (cursor_field_key = "updated_at" ),
3829
+ )
3830
+
3831
+ cursor .DEFAULT_MAX_PARTITIONS_NUMBER = 1
3832
+
3833
+ # ── Partition sequence: 1 → 2 → 1 ──────────────────────────────────
3834
+ partitions = [
3835
+ StreamSlice (partition = {"id" : "1" }, cursor_slice = {}, extra_fields = {}),
3836
+ StreamSlice (partition = {"id" : "2" }, cursor_slice = {}, extra_fields = {}),
3837
+ StreamSlice (partition = {"id" : "1" }, cursor_slice = {}, extra_fields = {}),
3838
+ ]
3839
+ pr = cursor ._partition_router
3840
+ pr .stream_slices .return_value = iter (partitions )
3841
+ pr .get_stream_state .return_value = {}
3842
+
3843
+ # Iterate lazily so that the first "1" gets cleaned before
3844
+ # the second "1" arrives.
3845
+ slice_gen = cursor .stream_slices ()
3846
+
3847
+ first_1 = next (slice_gen )
3848
+ cursor .close_partition (
3849
+ DeclarativePartition ("dup_stream" , {}, MagicMock (), MagicMock (), first_1 )
3850
+ )
3851
+
3852
+ two = next (slice_gen )
3853
+ cursor .close_partition (DeclarativePartition ("dup_stream" , {}, MagicMock (), MagicMock (), two ))
3854
+
3855
+ second_1 = next (slice_gen )
3856
+ cursor .close_partition (
3857
+ DeclarativePartition ("dup_stream" , {}, MagicMock (), MagicMock (), second_1 )
3858
+ )
3859
+
3860
+ assert cursor ._IS_PARTITION_DUPLICATION_LOGGED is False # No duplicate detected
3861
+ assert len (cursor ._semaphore_per_partition ) == 0
3862
+ assert len (cursor ._processing_partitions_indexes ) == 0
3863
+ assert len (cursor ._partition_key_to_index ) == 0
3864
+
3865
+
3866
+ def test_duplicate_partition_while_processing ():
3867
+ inner_cursors = [
3868
+ _make_inner_cursor ("2024-01-01T00:00:00Z" ), # first “1”
3869
+ _make_inner_cursor ("2024-01-02T00:00:00Z" ), # “2”
3870
+ _make_inner_cursor ("2024-01-03T00:00:00Z" ), # for second "1"
3871
+ ]
3872
+
3873
+ factory = MagicMock ()
3874
+ factory .create .side_effect = inner_cursors
3875
+
3876
+ cursor = ConcurrentPerPartitionCursor (
3877
+ cursor_factory = factory ,
3878
+ partition_router = MagicMock (),
3879
+ stream_name = "dup_stream" ,
3880
+ stream_namespace = None ,
3881
+ stream_state = {},
3882
+ message_repository = MagicMock (),
3883
+ connector_state_manager = MagicMock (),
3884
+ connector_state_converter = MagicMock (),
3885
+ cursor_field = CursorField (cursor_field_key = "updated_at" ),
3886
+ )
3887
+
3888
+ partitions = [
3889
+ StreamSlice (partition = {"id" : "1" }, cursor_slice = {}, extra_fields = {}),
3890
+ StreamSlice (partition = {"id" : "2" }, cursor_slice = {}, extra_fields = {}),
3891
+ StreamSlice (partition = {"id" : "1" }, cursor_slice = {}, extra_fields = {}),
3892
+ ]
3893
+ pr = cursor ._partition_router
3894
+ pr .stream_slices .return_value = iter (partitions )
3895
+ pr .get_stream_state .return_value = {}
3896
+
3897
+ generated = list (cursor .stream_slices ())
3898
+ # Only “1” and “2” emitted – duplicate “1” skipped
3899
+ assert len (generated ) == 2
3900
+
3901
+ # Close “2” first
3902
+ cursor .close_partition (
3903
+ DeclarativePartition ("dup_stream" , {}, MagicMock (), MagicMock (), generated [1 ])
3904
+ )
3905
+ # Now close the initial “1”
3906
+ cursor .close_partition (
3907
+ DeclarativePartition ("dup_stream" , {}, MagicMock (), MagicMock (), generated [0 ])
3908
+ )
3909
+
3910
+ assert cursor ._IS_PARTITION_DUPLICATION_LOGGED is True # warning emitted
3911
+ assert len (cursor ._cursor_per_partition ) == 2
3912
+ assert len (cursor ._semaphore_per_partition ) == 0
3913
+ assert len (cursor ._processing_partitions_indexes ) == 0
3914
+ assert len (cursor ._partition_key_to_index ) == 0
0 commit comments