-
Notifications
You must be signed in to change notification settings - Fork 7
Retention-based Partition Dropping #44
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Merged
Merged
Changes from all commits
Commits
Show all changes
10 commits
Select commit
Hold shift + click to select a range
8f6b927
Add command: drop, to calculate partition drops based on retention pe…
jcjones cc9c7e9
Deduplicate methods that moved into database_helpers
jcjones 0675e33
Add database helper tests
jcjones 84c61eb
Add dropper tests
jcjones d19f7c1
More test cleanups
jcjones 2313825
Merge branch 'main' into age_analysis
jcjones 41f2e3d
Update to PyLint 2.17.7 to fix Python11
jcjones cc9bd01
More tests
jcjones 44799fb
pytlint needs pytest
jcjones f4d546e
Add an assertion for correct ordering of partitions
jcjones File filter
Filter by extension
Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
There are no files selected for viewing
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,72 @@ | ||
""" | ||
Helper functions for database operations | ||
""" | ||
|
||
from datetime import datetime, timezone | ||
import logging | ||
|
||
import partitionmanager.table_append_partition as pm_tap | ||
import partitionmanager.types | ||
|
||
|
||
def get_position_of_table(database, table, map_data): | ||
"""Returns a Position of the table at the current moment.""" | ||
|
||
pos_list = pm_tap.get_current_positions(database, table, map_data["range_cols"]) | ||
|
||
cur_pos = partitionmanager.types.Position() | ||
cur_pos.set_position([pos_list[col] for col in map_data["range_cols"]]) | ||
|
||
return cur_pos | ||
|
||
|
||
def calculate_exact_timestamp_via_query(database, table, position_partition): | ||
"""Calculates the exact timestamp of a PositionPartition. | ||
|
||
raises ValueError if the position is incalculable | ||
""" | ||
|
||
log = logging.getLogger(f"calculate_exact_timestamp_via_query:{table.name}") | ||
|
||
if not table.has_date_query: | ||
raise ValueError("Table has no defined date query") | ||
|
||
if not isinstance(position_partition, partitionmanager.types.PositionPartition): | ||
raise ValueError("Only PositionPartitions are supported") | ||
|
||
if len(position_partition.position) != 1: | ||
raise ValueError( | ||
"This method is only valid for single-column partitions right now" | ||
) | ||
arg = position_partition.position.as_sql_input()[0] | ||
|
||
sql_select_cmd = table.earliest_utc_timestamp_query.get_statement_with_argument(arg) | ||
log.debug( | ||
"Executing %s to derive partition %s at position %s", | ||
sql_select_cmd, | ||
position_partition.name, | ||
position_partition.position, | ||
) | ||
|
||
start = datetime.now() | ||
exact_time_result = database.run(sql_select_cmd) | ||
end = datetime.now() | ||
|
||
if not len(exact_time_result) == 1: | ||
raise partitionmanager.types.NoExactTimeException("No exact timestamp result") | ||
if not len(exact_time_result[0]) == 1: | ||
raise partitionmanager.types.NoExactTimeException( | ||
"Unexpected column count for the timestamp result" | ||
) | ||
for key, value in exact_time_result[0].items(): | ||
exact_time = datetime.fromtimestamp(value, tz=timezone.utc) | ||
break | ||
|
||
log.debug( | ||
"Exact time of %s returned for %s at position %s, query took %s", | ||
exact_time, | ||
position_partition.name, | ||
position_partition.position, | ||
(end - start), | ||
) | ||
return exact_time |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,109 @@ | ||
import unittest | ||
|
||
from .database_helpers import get_position_of_table, calculate_exact_timestamp_via_query | ||
|
||
from .types import ( | ||
DatabaseCommand, | ||
NoExactTimeException, | ||
PositionPartition, | ||
SqlInput, | ||
SqlQuery, | ||
Table, | ||
) | ||
|
||
|
||
class MockDatabase(DatabaseCommand): | ||
def __init__(self): | ||
self._responses = list() | ||
self.num_queries = 0 | ||
|
||
def add_response(self, expected, response): | ||
self._responses.insert(0, {"expected": expected, "response": response}) | ||
|
||
def run(self, cmd): | ||
self.num_queries += 1 | ||
if not self._responses: | ||
raise Exception(f"No mock responses available for cmd [{cmd}]") | ||
|
||
r = self._responses.pop() | ||
if r["expected"] in cmd: | ||
return r["response"] | ||
|
||
raise Exception(f"Received command [{cmd}] and expected [{r['expected']}]") | ||
|
||
def db_name(self): | ||
return SqlInput("the-database") | ||
|
||
|
||
class TestDatabaseHelpers(unittest.TestCase): | ||
def test_position_of_table(self): | ||
db = MockDatabase() | ||
db.add_response("SELECT id FROM `burgers` ORDER BY", [{"id": 90210}]) | ||
|
||
table = Table("burgers") | ||
data = {"range_cols": ["id"]} | ||
|
||
pos = get_position_of_table(db, table, data) | ||
self.assertEqual(pos.as_list(), [90210]) | ||
|
||
def test_exact_timestamp_no_query(self): | ||
db = MockDatabase() | ||
db.add_response("SELECT id FROM `burgers` ORDER BY", [{"id": 42}]) | ||
|
||
table = Table("burgers") | ||
self.assertFalse(table.has_date_query) | ||
|
||
pos = PositionPartition("p_start") | ||
pos.set_position([42]) | ||
|
||
with self.assertRaises(ValueError): | ||
calculate_exact_timestamp_via_query(db, table, pos) | ||
|
||
def test_exact_timestamp(self): | ||
db = MockDatabase() | ||
db.add_response( | ||
"SELECT UNIX_TIMESTAMP(`cooked`)", [{"UNIX_TIMESTAMP": 17541339060}] | ||
) | ||
|
||
table = Table("burgers") | ||
table.set_earliest_utc_timestamp_query( | ||
SqlQuery( | ||
"SELECT UNIX_TIMESTAMP(`cooked`) FROM `orders` " | ||
"WHERE `type` = \"burger\" AND `id` > '?' ORDER BY `id` ASC LIMIT 1;" | ||
) | ||
) | ||
|
||
pos = PositionPartition("p_start") | ||
pos.set_position([150]) | ||
|
||
ts = calculate_exact_timestamp_via_query(db, table, pos) | ||
assert f"{ts}" == "2525-11-11 18:11:00+00:00" | ||
|
||
def test_no_exact_timestamp(self): | ||
db = MockDatabase() | ||
db.add_response( | ||
"SELECT UNIX_TIMESTAMP(`cooked`)", | ||
[{"UNIX_TIMESTAMP": 17541339060}, {"UNIX_TIMESTAMP": 17541339070}], | ||
) | ||
|
||
table = Table("burgers") | ||
table.set_earliest_utc_timestamp_query( | ||
SqlQuery( | ||
"SELECT UNIX_TIMESTAMP(`cooked`) FROM `orders` " | ||
"WHERE `type` = \"burger\" AND `id` > '?' ORDER BY `id` ASC LIMIT 1;" | ||
) | ||
) | ||
|
||
pos = PositionPartition("p_start") | ||
pos.set_position([150]) | ||
|
||
with self.assertRaises(NoExactTimeException): | ||
calculate_exact_timestamp_via_query(db, table, pos) | ||
|
||
db.add_response( | ||
"SELECT UNIX_TIMESTAMP(`cooked`)", | ||
[{"UNIX_TIMESTAMP": 17541339060, "column2": True}], | ||
) | ||
|
||
with self.assertRaises(NoExactTimeException): | ||
calculate_exact_timestamp_via_query(db, table, pos) |
Oops, something went wrong.
Add this suggestion to a batch that can be applied as a single commit.
This suggestion is invalid because no changes were made to the code.
Suggestions cannot be applied while the pull request is closed.
Suggestions cannot be applied while viewing a subset of changes.
Only one suggestion per line can be applied in a batch.
Add this suggestion to a batch that can be applied as a single commit.
Applying suggestions on deleted lines is not supported.
You must change the existing code in this line in order to create a valid suggestion.
Outdated suggestions cannot be applied.
This suggestion has been applied or marked resolved.
Suggestions cannot be applied from pending reviews.
Suggestions cannot be applied on multi-line comments.
Suggestions cannot be applied while the pull request is queued to merge.
Suggestion cannot be applied right now. Please check back later.
Uh oh!
There was an error while loading. Please reload this page.