Skip to content

Commit 477b750

Browse files
authored
Merge pull request #1 from Toruitas/feature/hourly_partitions
Feature/hourly partitions
2 parents e5503cb + c329d85 commit 477b750

File tree

7 files changed

+130
-7
lines changed

7 files changed

+130
-7
lines changed

.gitignore

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -28,3 +28,6 @@ dist/
2828

2929
# Ignore PyCharm / IntelliJ files
3030
.idea/
31+
build/
32+
.python-version
33+
docker-compose.yml

docs/source/table_partitioning.rst

Lines changed: 11 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -174,6 +174,17 @@ Time-based partitioning
174174
count=12,
175175
),
176176
),
177+
178+
# 24 partitions ahead, each partition is 1 hour
179+
# old partitions are never deleted, `max_age` is not set
180+
# partitions will be named `[table_name]_[year]_[month]_[month day number]_[hour (24h)]`.
181+
PostgresPartitioningConfig(
182+
model=MyPartitionedModel,
183+
strategy=PostgresCurrentTimePartitioningStrategy(
184+
size=PostgresTimePartitionSize(hours=1),
185+
count=24,
186+
),
187+
),
177188
])
178189
179190

psqlextra/partitioning/current_time_strategy.py

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -16,7 +16,8 @@ class PostgresCurrentTimePartitioningStrategy(
1616
1717
All buckets will be equal in size and start at the start of the
1818
unit. With monthly partitioning, partitions start on the 1st and
19-
with weekly partitioning, partitions start on monday.
19+
with weekly partitioning, partitions start on monday, with hourly
20+
partitioning, partitions start at 00:00.
2021
"""
2122

2223
def __init__(

psqlextra/partitioning/shorthands.py

Lines changed: 5 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -16,6 +16,7 @@ def partition_by_current_time(
1616
months: Optional[int] = None,
1717
weeks: Optional[int] = None,
1818
days: Optional[int] = None,
19+
hours: Optional[int] = None,
1920
max_age: Optional[relativedelta] = None,
2021
name_format: Optional[str] = None,
2122
) -> PostgresPartitioningConfig:
@@ -43,6 +44,9 @@ def partition_by_current_time(
4344
days:
4445
The amount of days each partition should contain.
4546
47+
hours:
48+
The amount of hours each partition should contain.
49+
4650
max_age:
4751
The maximum age of a partition (calculated from the
4852
start of the partition).
@@ -56,7 +60,7 @@ def partition_by_current_time(
5660
"""
5761

5862
size = PostgresTimePartitionSize(
59-
years=years, months=months, weeks=weeks, days=days
63+
years=years, months=months, weeks=weeks, days=days, hours=hours
6064
)
6165

6266
return PostgresPartitioningConfig(

psqlextra/partitioning/time_partition.py

Lines changed: 3 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -20,6 +20,7 @@ class PostgresTimePartition(PostgresRangePartition):
2020
PostgresTimePartitionUnit.MONTHS: "%Y_%b",
2121
PostgresTimePartitionUnit.WEEKS: "%Y_week_%W",
2222
PostgresTimePartitionUnit.DAYS: "%Y_%b_%d",
23+
PostgresTimePartitionUnit.HOURS: "%Y_%b_%d_%H:00:00",
2324
}
2425

2526
def __init__(
@@ -31,8 +32,8 @@ def __init__(
3132
end_datetime = start_datetime + size.as_delta()
3233

3334
super().__init__(
34-
from_values=start_datetime.strftime("%Y-%m-%d"),
35-
to_values=end_datetime.strftime("%Y-%m-%d"),
35+
from_values=start_datetime.strftime("%Y-%m-%d %H:00:00"),
36+
to_values=end_datetime.strftime("%Y-%m-%d %H:00:00"),
3637
)
3738

3839
self.size = size

psqlextra/partitioning/time_partition_size.py

Lines changed: 14 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -13,6 +13,7 @@ class PostgresTimePartitionUnit(enum.Enum):
1313
MONTHS = "months"
1414
WEEKS = "weeks"
1515
DAYS = "days"
16+
HOURS = "hours"
1617

1718

1819
class PostgresTimePartitionSize:
@@ -27,8 +28,9 @@ def __init__(
2728
months: Optional[int] = None,
2829
weeks: Optional[int] = None,
2930
days: Optional[int] = None,
31+
hours: Optional[int] = None,
3032
) -> None:
31-
sizes = [years, months, weeks, days]
33+
sizes = [years, months, weeks, days, hours]
3234

3335
if not any(sizes):
3436
raise PostgresPartitioningError("Partition cannot be 0 in size.")
@@ -50,6 +52,9 @@ def __init__(
5052
elif days:
5153
self.unit = PostgresTimePartitionUnit.DAYS
5254
self.value = days
55+
elif hours:
56+
self.unit = PostgresTimePartitionUnit.HOURS
57+
self.value = hours
5358
else:
5459
raise PostgresPartitioningError(
5560
"Unsupported time partitioning unit"
@@ -68,6 +73,9 @@ def as_delta(self) -> relativedelta:
6873
if self.unit == PostgresTimePartitionUnit.DAYS:
6974
return relativedelta(days=self.value)
7075

76+
if self.unit == PostgresTimePartitionUnit.HOURS:
77+
return relativedelta(hours=self.value)
78+
7179
raise PostgresPartitioningError(
7280
"Unsupported time partitioning unit: %s" % self.unit
7381
)
@@ -81,12 +89,15 @@ def start(self, dt: datetime) -> datetime:
8189

8290
if self.unit == PostgresTimePartitionUnit.WEEKS:
8391
return self._ensure_datetime(dt - relativedelta(days=dt.weekday()))
92+
93+
if self.unit == PostgresTimePartitionUnit.DAYS:
94+
return self._ensure_datetime(dt)
8495

85-
return self._ensure_datetime(dt)
96+
return self._ensure_datetime(dt.replace(hour=0))
8697

8798
@staticmethod
8899
def _ensure_datetime(dt: Union[date, datetime]) -> datetime:
89-
return datetime(year=dt.year, month=dt.month, day=dt.day)
100+
return datetime(year=dt.year, month=dt.month, day=dt.day, hour = dt.hour)
90101

91102
def __repr__(self) -> str:
92103
return "PostgresTimePartitionSize<%s, %s>" % (self.unit, self.value)

tests/test_partitioning_time.py

Lines changed: 92 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -254,6 +254,57 @@ def test_partitioning_time_daily_apply():
254254
assert table.partitions[6].name == "2019_jun_04"
255255

256256

257+
258+
@pytest.mark.postgres_version(lt=110000)
259+
def test_partitioning_time_hourly_apply():
260+
"""Tests whether automatically creating new partitions ahead hourly works as
261+
expected."""
262+
263+
model = define_fake_partitioned_model(
264+
{"timestamp": models.DateTimeField()}, {"key": ["timestamp"]}
265+
)
266+
267+
schema_editor = connection.schema_editor()
268+
schema_editor.create_partitioned_model(model)
269+
270+
# create partitions for the next 4 hours (including the current)
271+
with freezegun.freeze_time("2019-1-23"):
272+
manager = PostgresPartitioningManager(
273+
[partition_by_current_time(model, hours=1, count=4)]
274+
)
275+
manager.plan().apply()
276+
277+
table = _get_partitioned_table(model)
278+
assert len(table.partitions) == 4
279+
assert table.partitions[0].name == "2019_jan_23_00:00:00"
280+
assert table.partitions[1].name == "2019_jan_23_01:00:00"
281+
assert table.partitions[2].name == "2019_jan_23_02:00:00"
282+
assert table.partitions[3].name == "2019_jan_23_03:00:00"
283+
284+
# re-running it with 5, should just create one additional partition
285+
with freezegun.freeze_time("2019-1-23"):
286+
manager = PostgresPartitioningManager(
287+
[partition_by_current_time(model, hours=1, count=5)]
288+
)
289+
manager.plan().apply()
290+
291+
table = _get_partitioned_table(model)
292+
assert len(table.partitions) == 5
293+
assert table.partitions[4].name == "2019_jan_23_04:00:00"
294+
295+
# it's june now, we want to partition two hours ahead
296+
with freezegun.freeze_time("2019-06-03"):
297+
manager = PostgresPartitioningManager(
298+
[partition_by_current_time(model, hours=1, count=2)]
299+
)
300+
manager.plan().apply()
301+
302+
table = _get_partitioned_table(model)
303+
assert len(table.partitions) == 7
304+
assert table.partitions[5].name == "2019_jun_03_00:00:00"
305+
assert table.partitions[6].name == "2019_jun_03_01:00:00"
306+
307+
257308
@pytest.mark.postgres_version(lt=110000)
258309
def test_partitioning_time_monthly_apply_insert():
259310
"""Tests whether automatically created monthly partitions line up
@@ -372,10 +423,51 @@ def test_partitioning_time_daily_apply_insert():
372423
model.objects.create(timestamp=datetime.date(2019, 1, 10))
373424

374425

426+
@pytest.mark.postgres_version(lt=110000)
427+
def test_partitioning_time_hourly_apply_insert():
428+
"""Tests whether automatically created hourly partitions line up
429+
perfectly."""
430+
431+
model = define_fake_partitioned_model(
432+
{"timestamp": models.DateTimeField()}, {"key": ["timestamp"]}
433+
)
434+
435+
schema_editor = connection.schema_editor()
436+
schema_editor.create_partitioned_model(model)
437+
438+
# that's a monday
439+
with freezegun.freeze_time("2019-1-07"):
440+
manager = PostgresPartitioningManager(
441+
[partition_by_current_time(model, hours=1, count=2)]
442+
)
443+
manager.plan().apply()
444+
445+
table = _get_partitioned_table(model)
446+
assert len(table.partitions) == 2
447+
448+
model.objects.create(timestamp=datetime.datetime(2019, 1, 7, 0))
449+
model.objects.create(timestamp=datetime.datetime(2019, 1, 7 , 1))
450+
451+
with transaction.atomic():
452+
with pytest.raises(IntegrityError):
453+
model.objects.create(timestamp=datetime.datetime(2019, 1, 7, 2))
454+
model.objects.create(timestamp=datetime.datetime(2019, 1, 7, 3))
455+
456+
with freezegun.freeze_time("2019-1-07"):
457+
manager = PostgresPartitioningManager(
458+
[partition_by_current_time(model, hours=1, count=4)]
459+
)
460+
manager.plan().apply()
461+
462+
model.objects.create(timestamp=datetime.datetime(2019, 1, 7, 2))
463+
model.objects.create(timestamp=datetime.datetime(2019, 1, 7, 3))
464+
465+
375466
@pytest.mark.postgres_version(lt=110000)
376467
@pytest.mark.parametrize(
377468
"kwargs,partition_names",
378469
[
470+
(dict(hours=2), ["2019_jan_01_00:00:00", "2019_jan_01_02:00:00"]),
379471
(dict(days=2), ["2019_jan_01", "2019_jan_03"]),
380472
(dict(weeks=2), ["2018_week_53", "2019_week_02"]),
381473
(dict(months=2), ["2019_jan", "2019_mar"]),

0 commit comments

Comments
 (0)