Skip to content

Commit 41e15e6

Browse files
committed
Improve duplicate detection in _plan_partition_changes
Fixes #46 The bug was that the partition deduplication only happened against existing partitions; if we _planned_ to produce two duplicates, because the rates of change are crazy, then we wouldn't fix them, leaving it for the later exception to be thrown during SQL generation.
1 parent ab3fc1d commit 41e15e6

File tree

2 files changed

+49
-1
lines changed

2 files changed

+49
-1
lines changed

partitionmanager/table_append_partition.py

Lines changed: 5 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -608,10 +608,12 @@ def _plan_partition_changes(
608608
)
609609

610610
# Confirm we won't make timestamp conflicts
611-
existing_timestamps = list(map(lambda p: p.timestamp(), partition_list))
612611
conflict_found = True
613612
while conflict_found:
614613
conflict_found = False
614+
615+
existing_timestamps = set(map(lambda p: p.timestamp(), partition_list))
616+
615617
for partition in results:
616618
if partition.timestamp() in existing_timestamps:
617619
if (
@@ -628,6 +630,8 @@ def _plan_partition_changes(
628630
conflict_found = True
629631
break
630632

633+
existing_timestamps.add(partition.timestamp())
634+
631635
# Final result is always MAXVALUE
632636
results[-1].set_as_max_value()
633637

partitionmanager/table_append_partition_test.py

Lines changed: 44 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -801,6 +801,50 @@ def test_plan_partition_changes_misprediction(self):
801801
],
802802
)
803803

804+
def test_plan_partition_changes_misprediction_duplicate(self):
805+
"""We have to handle the case where a mispredicted rate of change
806+
calculation produces results that themselves have duplicates"""
807+
self.maxDiff = None
808+
planned = _plan_partition_changes(
809+
MockDatabase(),
810+
Table("table"),
811+
[
812+
mkPPart("p_20220419", 81567449545, 99982222560),
813+
mkPPart("p_20220519", 90007334722, 110234961540),
814+
mkPPart("p_20220520", 94841841817, 116162938085),
815+
mkPPart("p_20220521", 99676348912, 122090914630),
816+
mkPPart("p_20220522", 102672012866, 127123677707),
817+
mkTailPart("p_20220523", count=2),
818+
],
819+
mkPos(90408556246, 110749398176),
820+
datetime(2022, 5, 20, 18, 55, 16, 155, tzinfo=timezone.utc),
821+
timedelta(days=30),
822+
3,
823+
)
824+
825+
# this configuration could prompt a duplicate p_20220524 partition, which
826+
# should end up with the second being moved to 5-25
827+
828+
self.assertEqual(
829+
planned,
830+
[
831+
ChangePlannedPartition(
832+
mkPPart("p_20220520", 94841841817, 116162938085)
833+
),
834+
ChangePlannedPartition(mkPPart("p_20220521", 99676348912, 122090914630))
835+
.set_timestamp(datetime(2022, 5, 24, tzinfo=timezone.utc))
836+
.set_important(),
837+
ChangePlannedPartition(
838+
mkPPart("p_20220522", 102672012866, 127123677707)
839+
)
840+
.set_timestamp(datetime(2022, 5, 25, tzinfo=timezone.utc))
841+
.set_important(),
842+
ChangePlannedPartition(mkTailPart("p_20220523", count=2)).set_timestamp(
843+
datetime(2022, 5, 26, tzinfo=timezone.utc)
844+
),
845+
],
846+
)
847+
804848
def test_get_rate_partitions_with_implicit_timestamps(self):
805849
eval_time = datetime(2021, 6, 8, tzinfo=timezone.utc)
806850

0 commit comments

Comments
 (0)