Skip to content

Commit 719a4c2

Browse files
committed
Handle mispredictions cleanly
In the real world, we might mess up when naming a partition. This should be rare if partitionmanager is running often, since it'll rename partitions to match reality, but when it's running only rarely, things get out of date. This change avoids attempting to calculate rates-of-change using partitions that don't make sense - e.g., today is July 1, and our active partition says it starts in a week. That is plainly wrong, but we can still use our current rate-of-change. This expands on PR #12 by changing what the start-datetime is for new partitions after we mispredicted - without this change, if we had partitions through to December, but it's only August and we need more, the new partitions would be named for January instead of reflecting reality that they need to be named for Right Now. This also catches a bug where we could get timestamp name collisions. This is a lot less of an issue when I implement Tim's suggestion in #19, but for now this just increases dates by a day to avoid a collision, and that works well.
1 parent 51e10be commit 719a4c2

File tree

3 files changed

+66
-32
lines changed

3 files changed

+66
-32
lines changed

partitionmanager/cli_test.py

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -79,8 +79,8 @@ def test_partition_cmd_noop(self):
7979
"sql": (
8080
"ALTER TABLE `testtable_noop` REORGANIZE PARTITION "
8181
"`p_20201204` INTO "
82-
"(PARTITION `p_20201205` VALUES LESS THAN (548), "
83-
"PARTITION `p_20210104` VALUES LESS THAN MAXVALUE);"
82+
"(PARTITION `p_20201112` VALUES LESS THAN (548), "
83+
"PARTITION `p_20201212` VALUES LESS THAN MAXVALUE);"
8484
),
8585
"noop": True,
8686
}
@@ -101,8 +101,8 @@ def test_partition_cmd_final(self):
101101
"sql": (
102102
"ALTER TABLE `testtable_commit` REORGANIZE PARTITION "
103103
"`p_20201204` INTO "
104-
"(PARTITION `p_20201205` VALUES LESS THAN (548), "
105-
"PARTITION `p_20210104` VALUES LESS THAN MAXVALUE);"
104+
"(PARTITION `p_20201112` VALUES LESS THAN (548), "
105+
"PARTITION `p_20201212` VALUES LESS THAN MAXVALUE);"
106106
),
107107
}
108108
},

partitionmanager/table_append_partition.py

Lines changed: 44 additions & 13 deletions
Original file line numberDiff line numberDiff line change
@@ -308,7 +308,8 @@ def _predict_forward_time(current_position, end_position, rates, evaluation_time
308308

309309
if max(days_remaining) < 0:
310310
raise ValueError(f"All values are negative: {days_remaining}")
311-
return evaluation_time + (max(days_remaining) * timedelta(days=1))
311+
calculated = evaluation_time + (max(days_remaining) * timedelta(days=1))
312+
return calculated.replace(minute=0, second=0, microsecond=0)
312313

313314

314315
def _calculate_start_time(last_changed_time, evaluation_time, allowed_lifespan):
@@ -323,7 +324,7 @@ def _calculate_start_time(last_changed_time, evaluation_time, allowed_lifespan):
323324
if partition_start_time < evaluation_time:
324325
# Partition start times should never be in the past.
325326
return evaluation_time
326-
return partition_start_time
327+
return partition_start_time.replace(minute=0, second=0, microsecond=0)
327328

328329

329330
def _plan_partition_changes(
@@ -378,10 +379,13 @@ def _plan_partition_changes(
378379
# to exclude the future-dated, irrelevant partition.
379380
log.debug(
380381
f"Misprediction: Evaluation time ({evaluation_time}) is "
381-
f"before the active partition {active_partition}. Excluding from "
382-
"rate calculations."
382+
f"before the active partition {active_partition}. Excluding "
383+
"mispredicted partitions from the rate calculations."
383384
)
384-
rate_relevant_partitions = filled_partitions + [
385+
filled_partitions = filter(
386+
lambda f: f.timestamp() < evaluation_time, filled_partitions
387+
)
388+
rate_relevant_partitions = list(filled_partitions) + [
385389
partitionmanager.types.InstantPartition(evaluation_time, current_position)
386390
]
387391

@@ -403,16 +407,16 @@ def _plan_partition_changes(
403407

404408
changed_partition = partitionmanager.types.ChangePlannedPartition(partition)
405409

410+
start_of_fill_time = _predict_forward_time(
411+
current_position, last_changed.position, rates, evaluation_time
412+
)
413+
406414
if isinstance(partition, partitionmanager.types.PositionPartition):
407415
# We can't change the position on this partition, but we can adjust
408416
# the name to be more exact as to what date we expect it to begin
409417
# filling. If we calculate the start-of-fill date and it doesn't
410418
# match the partition's name, let's rename it and mark it as an
411419
# important change.
412-
start_of_fill_time = _predict_forward_time(
413-
current_position, last_changed.position, rates, evaluation_time
414-
)
415-
416420
if start_of_fill_time.date() != partition.timestamp().date():
417421
log.info(
418422
f"Start-of-fill predicted at {start_of_fill_time.date()} "
@@ -427,15 +431,21 @@ def _plan_partition_changes(
427431
# we calculate forward what position we expect and use it in the
428432
# future.
429433

430-
partition_start_time = _calculate_start_time(
434+
nominal_partition_start_time = _calculate_start_time(
431435
last_changed.timestamp(), evaluation_time, allowed_lifespan
432436
)
437+
438+
# We use the nearest timestamp, which should generally be the
439+
# calculated time, but could be the fill time based on predicting
440+
# forward if we have gotten far off in our predictions in the past.
441+
changed_partition.set_timestamp(
442+
min(nominal_partition_start_time, start_of_fill_time)
443+
)
444+
433445
changed_part_pos = _predict_forward_position(
434446
last_changed.position.as_list(), rates, allowed_lifespan
435447
)
436-
changed_partition.set_position(changed_part_pos).set_timestamp(
437-
partition_start_time
438-
)
448+
changed_partition.set_position(changed_part_pos)
439449

440450
results.append(changed_partition)
441451

@@ -455,6 +465,27 @@ def _plan_partition_changes(
455465
.set_timestamp(partition_start_time)
456466
)
457467

468+
# Confirm we won't make timestamp conflicts
469+
existing_timestamps = list(map(lambda p: p.timestamp(), partition_list))
470+
conflict_found = True
471+
while conflict_found:
472+
conflict_found = False
473+
for partition in results:
474+
if partition.timestamp() in existing_timestamps:
475+
if (
476+
isinstance(partition, partitionmanager.types.ChangePlannedPartition)
477+
and partition.timestamp() == partition.old.timestamp()
478+
):
479+
# That's not a conflict
480+
continue
481+
482+
log.debug(
483+
f"{partition} has a conflict for its timestamp, increasing by 1 day."
484+
)
485+
partition.set_timestamp(partition.timestamp() + timedelta(days=1))
486+
conflict_found = True
487+
break
488+
458489
# Final result is always MAXVALUE
459490
results[-1].set_as_max_value()
460491

partitionmanager/table_append_partition_test.py

Lines changed: 18 additions & 15 deletions
Original file line numberDiff line numberDiff line change
@@ -565,16 +565,16 @@ def test_plan_partition_changes_wildly_off_dates(self):
565565
)
566566

567567
self.assertEqual(
568-
planned,
569568
[
570569
ChangePlannedPartition(mkPPart("p_20201231", 100)),
571570
ChangePlannedPartition(mkPPart("p_20210104", 200))
572571
.set_timestamp(datetime(2021, 1, 2, tzinfo=timezone.utc))
573572
.set_important(),
574573
ChangePlannedPartition(mkTailPart("future")).set_timestamp(
575-
datetime(2021, 1, 9, tzinfo=timezone.utc)
574+
datetime(2021, 1, 5, tzinfo=timezone.utc)
576575
),
577576
],
577+
planned,
578578
)
579579

580580
def test_plan_partition_changes_long_delay(self):
@@ -604,6 +604,7 @@ def test_plan_partition_changes_long_delay(self):
604604
)
605605

606606
def test_plan_partition_changes_short_names(self):
607+
self.maxDiff = None
607608
planned = _plan_partition_changes(
608609
[
609610
mkPPart("p_2019", 1912499867),
@@ -679,6 +680,7 @@ def test_plan_partition_changes_bespoke_names(self):
679680
)
680681

681682
def test_plan_partition_changes(self):
683+
self.maxDiff = None
682684
planned = _plan_partition_changes(
683685
[
684686
mkPPart("p_20201231", 100),
@@ -697,7 +699,7 @@ def test_plan_partition_changes(self):
697699
ChangePlannedPartition(mkPPart("p_20201231", 100)),
698700
ChangePlannedPartition(mkPPart("p_20210102", 200)),
699701
ChangePlannedPartition(mkTailPart("future")).set_timestamp(
700-
datetime(2021, 1, 9, tzinfo=timezone.utc)
702+
datetime(2021, 1, 4, tzinfo=timezone.utc)
701703
),
702704
],
703705
)
@@ -718,19 +720,20 @@ def test_plan_partition_changes(self):
718720
ChangePlannedPartition(mkPPart("p_20210102", 200)).set_position([200]),
719721
ChangePlannedPartition(mkTailPart("future"))
720722
.set_position([320])
721-
.set_timestamp(datetime(2021, 1, 9, tzinfo=timezone.utc)),
723+
.set_timestamp(datetime(2021, 1, 3, tzinfo=timezone.utc)),
722724
NewPlannedPartition()
723725
.set_position([440])
724-
.set_timestamp(datetime(2021, 1, 16, tzinfo=timezone.utc)),
726+
.set_timestamp(datetime(2021, 1, 10, tzinfo=timezone.utc)),
725727
NewPlannedPartition()
726728
.set_columns(1)
727-
.set_timestamp(datetime(2021, 1, 23, tzinfo=timezone.utc)),
729+
.set_timestamp(datetime(2021, 1, 17, tzinfo=timezone.utc)),
728730
],
729731
)
730732

731733
def test_plan_partition_changes_misprediction(self):
732734
""" We have to handle the case where the partition list doesn't cleanly
733735
match reality. """
736+
self.maxDiff = None
734737
planned = _plan_partition_changes(
735738
[
736739
mkPPart("p_20210505", 9505010028),
@@ -748,15 +751,15 @@ def test_plan_partition_changes_misprediction(self):
748751
planned,
749752
[
750753
ChangePlannedPartition(mkPPart("p_20210704", 10799505006)),
751-
ChangePlannedPartition(mkTailPart("p_20210803")).set_position(
752-
[11578057459]
753-
),
754+
ChangePlannedPartition(mkTailPart("p_20210803"))
755+
.set_position([11578057459])
756+
.set_timestamp(datetime(2021, 6, 28, tzinfo=timezone.utc)),
754757
NewPlannedPartition()
755758
.set_position([12356609912])
756-
.set_timestamp(datetime(2021, 9, 2, tzinfo=timezone.utc)),
759+
.set_timestamp(datetime(2021, 7, 28, tzinfo=timezone.utc)),
757760
NewPlannedPartition()
758761
.set_columns(1)
759-
.set_timestamp(datetime(2021, 10, 2, tzinfo=timezone.utc)),
762+
.set_timestamp(datetime(2021, 8, 27, tzinfo=timezone.utc)),
760763
],
761764
)
762765

@@ -984,7 +987,7 @@ def test_plan_andgenerate_sql_reorganize_partition_commands_with_future_partitio
984987
list(generate_sql_reorganize_partition_commands(Table("water"), planned)),
985988
[
986989
"ALTER TABLE `water` REORGANIZE PARTITION `future` INTO "
987-
"(PARTITION `p_20210109` VALUES LESS THAN MAXVALUE);",
990+
"(PARTITION `p_20210105` VALUES LESS THAN MAXVALUE);",
988991
"ALTER TABLE `water` REORGANIZE PARTITION `p_20210104` INTO "
989992
"(PARTITION `p_20210102` VALUES LESS THAN (200));",
990993
],
@@ -1046,9 +1049,9 @@ def test_get_pending_sql_reorganize_partition_commands_with_changes(self):
10461049
list(cmds),
10471050
[
10481051
"ALTER TABLE `plushies` REORGANIZE PARTITION `future` INTO "
1049-
"(PARTITION `p_20210109` VALUES LESS THAN (550), "
1050-
"PARTITION `p_20210116` VALUES LESS THAN (900), "
1051-
"PARTITION `p_20210123` VALUES LESS THAN MAXVALUE);"
1052+
"(PARTITION `p_20210104` VALUES LESS THAN (550), "
1053+
"PARTITION `p_20210111` VALUES LESS THAN (900), "
1054+
"PARTITION `p_20210118` VALUES LESS THAN MAXVALUE);"
10521055
],
10531056
)
10541057

0 commit comments

Comments
 (0)