Skip to content

Add in Hourly Partition Support #224

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Open
wants to merge 5 commits into
base: master
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
3 changes: 3 additions & 0 deletions .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -28,3 +28,6 @@ dist/

# Ignore PyCharm / IntelliJ files
.idea/
build/
.python-version
docker-compose.yml
11 changes: 11 additions & 0 deletions docs/source/table_partitioning.rst
Original file line number Diff line number Diff line change
Expand Up @@ -174,6 +174,17 @@ Time-based partitioning
count=12,
),
),

# 24 partitions ahead, each partition is 1 hour, for a total of 24 hours. Starting with hour 0 of current day
# old partitions are never deleted, `max_age` is not set
# partitions will be named `[table_name]_[year]_[month]_[month day number]_[hour (24h)]:00:00`.
PostgresPartitioningConfig(
model=MyPartitionedModel,
strategy=PostgresCurrentTimePartitioningStrategy(
size=PostgresTimePartitionSize(hours=1),
count=24,
),
),
])


Expand Down
3 changes: 2 additions & 1 deletion psqlextra/partitioning/current_time_strategy.py
Original file line number Diff line number Diff line change
Expand Up @@ -16,7 +16,8 @@ class PostgresCurrentTimePartitioningStrategy(

All buckets will be equal in size and start at the start of the
unit. With monthly partitioning, partitions start on the 1st and
with weekly partitioning, partitions start on monday.
with weekly partitioning, partitions start on monday, with hourly
partitioning, partitions start at 00:00.
"""

def __init__(
Expand Down
6 changes: 5 additions & 1 deletion psqlextra/partitioning/shorthands.py
Original file line number Diff line number Diff line change
Expand Up @@ -16,6 +16,7 @@ def partition_by_current_time(
months: Optional[int] = None,
weeks: Optional[int] = None,
days: Optional[int] = None,
hours: Optional[int] = None,
max_age: Optional[relativedelta] = None,
name_format: Optional[str] = None,
) -> PostgresPartitioningConfig:
Expand Down Expand Up @@ -43,6 +44,9 @@ def partition_by_current_time(
days:
The amount of days each partition should contain.

hours:
The amount of hours each partition should contain.

max_age:
The maximum age of a partition (calculated from the
start of the partition).
Expand All @@ -56,7 +60,7 @@ def partition_by_current_time(
"""

size = PostgresTimePartitionSize(
years=years, months=months, weeks=weeks, days=days
years=years, months=months, weeks=weeks, days=days, hours=hours
)

return PostgresPartitioningConfig(
Expand Down
5 changes: 3 additions & 2 deletions psqlextra/partitioning/time_partition.py
Original file line number Diff line number Diff line change
Expand Up @@ -20,6 +20,7 @@ class PostgresTimePartition(PostgresRangePartition):
PostgresTimePartitionUnit.MONTHS: "%Y_%b",
PostgresTimePartitionUnit.WEEKS: "%Y_week_%W",
PostgresTimePartitionUnit.DAYS: "%Y_%b_%d",
PostgresTimePartitionUnit.HOURS: "%Y_%b_%d_%H:00:00",
}

def __init__(
Expand All @@ -31,8 +32,8 @@ def __init__(
end_datetime = start_datetime + size.as_delta()

super().__init__(
from_values=start_datetime.strftime("%Y-%m-%d"),
to_values=end_datetime.strftime("%Y-%m-%d"),
from_values=start_datetime.strftime("%Y-%m-%d %H:00:00"),
to_values=end_datetime.strftime("%Y-%m-%d %H:00:00"),
)

self.size = size
Expand Down
17 changes: 14 additions & 3 deletions psqlextra/partitioning/time_partition_size.py
Original file line number Diff line number Diff line change
Expand Up @@ -13,6 +13,7 @@ class PostgresTimePartitionUnit(enum.Enum):
MONTHS = "months"
WEEKS = "weeks"
DAYS = "days"
HOURS = "hours"


class PostgresTimePartitionSize:
Expand All @@ -27,8 +28,9 @@ def __init__(
months: Optional[int] = None,
weeks: Optional[int] = None,
days: Optional[int] = None,
hours: Optional[int] = None,
) -> None:
sizes = [years, months, weeks, days]
sizes = [years, months, weeks, days, hours]

if not any(sizes):
raise PostgresPartitioningError("Partition cannot be 0 in size.")
Expand All @@ -50,6 +52,9 @@ def __init__(
elif days:
self.unit = PostgresTimePartitionUnit.DAYS
self.value = days
elif hours:
self.unit = PostgresTimePartitionUnit.HOURS
self.value = hours
else:
raise PostgresPartitioningError(
"Unsupported time partitioning unit"
Expand All @@ -68,6 +73,9 @@ def as_delta(self) -> relativedelta:
if self.unit == PostgresTimePartitionUnit.DAYS:
return relativedelta(days=self.value)

if self.unit == PostgresTimePartitionUnit.HOURS:
return relativedelta(hours=self.value)

raise PostgresPartitioningError(
"Unsupported time partitioning unit: %s" % self.unit
)
Expand All @@ -82,11 +90,14 @@ def start(self, dt: datetime) -> datetime:
if self.unit == PostgresTimePartitionUnit.WEEKS:
return self._ensure_datetime(dt - relativedelta(days=dt.weekday()))

return self._ensure_datetime(dt)
if self.unit == PostgresTimePartitionUnit.DAYS:
return self._ensure_datetime(dt)

return self._ensure_datetime(dt.replace(hour=0))

@staticmethod
def _ensure_datetime(dt: Union[date, datetime]) -> datetime:
return datetime(year=dt.year, month=dt.month, day=dt.day)
return datetime(year=dt.year, month=dt.month, day=dt.day, hour=dt.hour)

def __repr__(self) -> str:
return "PostgresTimePartitionSize<%s, %s>" % (self.unit, self.value)
Expand Down
91 changes: 91 additions & 0 deletions tests/test_partitioning_time.py
Original file line number Diff line number Diff line change
Expand Up @@ -254,6 +254,56 @@ def test_partitioning_time_daily_apply():
assert table.partitions[6].name == "2019_jun_04"


@pytest.mark.postgres_version(lt=110000)
def test_partitioning_time_hourly_apply():
"""Tests whether automatically creating new partitions ahead hourly works as
expected."""

model = define_fake_partitioned_model(
{"timestamp": models.DateTimeField()}, {"key": ["timestamp"]}
)

schema_editor = connection.schema_editor()
schema_editor.create_partitioned_model(model)

# create partitions for the next 4 hours (including the current)
with freezegun.freeze_time("2019-1-23"):
manager = PostgresPartitioningManager(
[partition_by_current_time(model, hours=1, count=4)]
)
manager.plan().apply()

table = _get_partitioned_table(model)
assert len(table.partitions) == 4
assert table.partitions[0].name == "2019_jan_23_00:00:00"
assert table.partitions[1].name == "2019_jan_23_01:00:00"
assert table.partitions[2].name == "2019_jan_23_02:00:00"
assert table.partitions[3].name == "2019_jan_23_03:00:00"

# re-running it with 5, should just create one additional partition
with freezegun.freeze_time("2019-1-23"):
manager = PostgresPartitioningManager(
[partition_by_current_time(model, hours=1, count=5)]
)
manager.plan().apply()

table = _get_partitioned_table(model)
assert len(table.partitions) == 5
assert table.partitions[4].name == "2019_jan_23_04:00:00"

# it's june now, we want to partition two hours ahead
with freezegun.freeze_time("2019-06-03"):
manager = PostgresPartitioningManager(
[partition_by_current_time(model, hours=1, count=2)]
)
manager.plan().apply()

table = _get_partitioned_table(model)
assert len(table.partitions) == 7
assert table.partitions[5].name == "2019_jun_03_00:00:00"
assert table.partitions[6].name == "2019_jun_03_01:00:00"


@pytest.mark.postgres_version(lt=110000)
def test_partitioning_time_monthly_apply_insert():
"""Tests whether automatically created monthly partitions line up
Expand Down Expand Up @@ -372,10 +422,51 @@ def test_partitioning_time_daily_apply_insert():
model.objects.create(timestamp=datetime.date(2019, 1, 10))


@pytest.mark.postgres_version(lt=110000)
def test_partitioning_time_hourly_apply_insert():
"""Tests whether automatically created hourly partitions line up
perfectly."""

model = define_fake_partitioned_model(
{"timestamp": models.DateTimeField()}, {"key": ["timestamp"]}
)

schema_editor = connection.schema_editor()
schema_editor.create_partitioned_model(model)

# that's a monday
with freezegun.freeze_time("2019-1-07"):
manager = PostgresPartitioningManager(
[partition_by_current_time(model, hours=1, count=2)]
)
manager.plan().apply()

table = _get_partitioned_table(model)
assert len(table.partitions) == 2

model.objects.create(timestamp=datetime.datetime(2019, 1, 7, 0))
model.objects.create(timestamp=datetime.datetime(2019, 1, 7, 1))

with transaction.atomic():
with pytest.raises(IntegrityError):
model.objects.create(timestamp=datetime.datetime(2019, 1, 7, 2))
model.objects.create(timestamp=datetime.datetime(2019, 1, 7, 3))

with freezegun.freeze_time("2019-1-07"):
manager = PostgresPartitioningManager(
[partition_by_current_time(model, hours=1, count=4)]
)
manager.plan().apply()

model.objects.create(timestamp=datetime.datetime(2019, 1, 7, 2))
model.objects.create(timestamp=datetime.datetime(2019, 1, 7, 3))


@pytest.mark.postgres_version(lt=110000)
@pytest.mark.parametrize(
"kwargs,partition_names",
[
(dict(hours=2), ["2019_jan_01_00:00:00", "2019_jan_01_02:00:00"]),
(dict(days=2), ["2019_jan_01", "2019_jan_03"]),
(dict(weeks=2), ["2018_week_53", "2019_week_02"]),
(dict(months=2), ["2019_jan", "2019_mar"]),
Expand Down