Skip to content

Commit 5d3bce6

Browse files
committed
Fix problem with covid_hosp skipping state revisions.
Includes a migration to run after deploy and before next acquisition.
1 parent 44ff896 commit 5d3bce6

File tree

8 files changed

+30
-15
lines changed

8 files changed

+30
-15
lines changed

src/acquisition/covid_hosp/common/database.py

Lines changed: 5 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -19,6 +19,7 @@ class Database:
1919
def __init__(self,
2020
connection,
2121
table_name=None,
22+
dataset_name=None,
2223
columns_and_types=None,
2324
key_columns=None,
2425
additional_fields=None):
@@ -40,6 +41,7 @@ def __init__(self,
4041

4142
self.connection = connection
4243
self.table_name = table_name
44+
self.dataset_name = dataset_name
4345
self.publication_col_name = "issue" if table_name == 'covid_hosp_state_timeseries' else \
4446
'publication_date'
4547
self.columns_and_types = {
@@ -116,7 +118,7 @@ def contains_revision(self, revision):
116118
`covid_hosp_meta`
117119
WHERE
118120
`dataset_name` = %s AND `revision_timestamp` = %s
119-
''', (self.table_name, revision))
121+
''', (self.dataset_name, revision))
120122
for (result,) in cursor:
121123
return bool(result)
122124

@@ -145,7 +147,7 @@ def insert_metadata(self, publication_date, revision, meta_json):
145147
)
146148
VALUES
147149
(%s, %s, %s, %s, NOW())
148-
''', (self.table_name, publication_date, revision, meta_json))
150+
''', (self.dataset_name, publication_date, revision, meta_json))
149151

150152
def insert_dataset(self, publication_date, dataframe):
151153
"""Add a dataset to the database.
@@ -232,7 +234,7 @@ def get_max_issue(self):
232234
from
233235
`covid_hosp_meta`
234236
WHERE
235-
dataset_name = "{self.table_name}"
237+
dataset_name = "{self.dataset_name}"
236238
''')
237239
for (result,) in cursor:
238240
if result is not None:

src/acquisition/covid_hosp/common/utils.py

Lines changed: 8 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -169,19 +169,20 @@ def update_dataset(database, network, newer_than=None, older_than=None):
169169
# download the dataset and add it to the database
170170
dataset = Utils.merge_by_key_cols([network.fetch_dataset(url) for url, _ in revisions],
171171
db.KEY_COLS)
172-
# add metadata to the database using the last revision seen.
173-
last_url, last_index = revisions[-1]
174-
metadata_json = metadata.loc[last_index].reset_index().to_json()
172+
# add metadata to the database
173+
all_metadata = []
174+
for url, index in revisions:
175+
all_metadata.append((url, metadata.loc[index].reset_index().to_json()))
175176
datasets.append((
176177
issue_int,
177178
dataset,
178-
last_url,
179-
metadata_json
179+
all_metadata
180180
))
181181
with database.connect() as db:
182-
for issue_int, dataset, last_url, metadata_json in datasets:
182+
for issue_int, dataset, all_metadata in datasets:
183183
db.insert_dataset(issue_int, dataset)
184-
db.insert_metadata(issue_int, last_url, metadata_json)
184+
for url, metadata_json in all_metadata:
185+
db.insert_metadata(issue_int, url, metadata_json)
185186
print(f'successfully acquired {len(dataset)} rows')
186187

187188
# note that the transaction is committed by exiting the `with` block

src/acquisition/covid_hosp/facility/database.py

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -2,6 +2,7 @@
22
from delphi.epidata.acquisition.covid_hosp.common.database import Database as BaseDatabase
33
from delphi.epidata.acquisition.covid_hosp.common.database import Columndef
44
from delphi.epidata.acquisition.covid_hosp.common.utils import Utils
5+
from delphi.epidata.acquisition.covid_hosp.facility.network import Network
56

67

78
class Database(BaseDatabase):
@@ -213,5 +214,6 @@ def __init__(self, *args, **kwargs):
213214
*args,
214215
**kwargs,
215216
table_name=Database.TABLE_NAME,
217+
dataset_name=Network.DATASET_ID,
216218
key_columns=Database.KEY_COLS,
217219
columns_and_types=Database.ORDERED_CSV_COLUMNS)

src/acquisition/covid_hosp/state_daily/database.py

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -2,6 +2,7 @@
22
from delphi.epidata.acquisition.covid_hosp.common.database import Database as BaseDatabase
33
from delphi.epidata.acquisition.covid_hosp.common.database import Columndef
44
from delphi.epidata.acquisition.covid_hosp.common.utils import Utils
5+
from delphi.epidata.acquisition.covid_hosp.state_daily.network import Network
56

67

78
class Database(BaseDatabase):
@@ -223,6 +224,7 @@ def __init__(self, *args, **kwargs):
223224
*args,
224225
**kwargs,
225226
table_name=Database.TABLE_NAME,
227+
dataset_name=Network.DATASET_ID,
226228
columns_and_types=Database.ORDERED_CSV_COLUMNS,
227229
key_columns=Database.KEY_COLS,
228230
additional_fields=[Columndef('D', 'record_type', None)])

src/acquisition/covid_hosp/state_timeseries/database.py

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -2,6 +2,7 @@
22
from delphi.epidata.acquisition.covid_hosp.common.database import Database as BaseDatabase
33
from delphi.epidata.acquisition.covid_hosp.common.database import Columndef
44
from delphi.epidata.acquisition.covid_hosp.common.utils import Utils
5+
from delphi.epidata.acquisition.covid_hosp.state_timeseries.network import Network
56

67

78
class Database(BaseDatabase):
@@ -222,6 +223,7 @@ def __init__(self, *args, **kwargs):
222223
*args,
223224
**kwargs,
224225
table_name=Database.TABLE_NAME,
226+
dataset_name=Network.DATASET_ID,
225227
columns_and_types=Database.ORDERED_CSV_COLUMNS,
226228
key_columns=Database.KEY_COLS,
227229
additional_fields=[Columndef('T', 'record_type', None)])
Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,3 @@
1+
UPDATE covid_hosp_meta SET dataset_name="g62h-syeh" WHERE revision_timestamp like "%g62h-syeh%";
2+
UPDATE covid_hosp_meta SET dataset_name="6xf2-c3ie" where revision_timestamp like "%6xf2-c3ie%";
3+
UPDATE covid_hosp_meta SET dataset_name="anag-cw7u" WHERE revision_timestamp LIKE "%anag-cw7u%";

tests/acquisition/covid_hosp/common/test_database.py

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -68,7 +68,7 @@ def test_contains_revision(self):
6868

6969
mock_connection = MagicMock()
7070
mock_cursor = mock_connection.cursor()
71-
database = Database(mock_connection, table_name=sentinel.table_name)
71+
database = Database(mock_connection, table_name=sentinel.table_name, dataset_name=sentinel.dataset_name)
7272

7373
with self.subTest(name='new revision'):
7474
mock_cursor.__iter__.return_value = [(0,)]
@@ -78,7 +78,7 @@ def test_contains_revision(self):
7878
# compare with boolean literal to test the type cast
7979
self.assertIs(result, False)
8080
query_values = mock_cursor.execute.call_args[0][-1]
81-
self.assertEqual(query_values, (sentinel.table_name, sentinel.revision))
81+
self.assertEqual(query_values, (sentinel.dataset_name, sentinel.revision))
8282

8383
with self.subTest(name='old revision'):
8484
mock_cursor.__iter__.return_value = [(1,)]
@@ -88,7 +88,7 @@ def test_contains_revision(self):
8888
# compare with boolean literal to test the type cast
8989
self.assertIs(result, True)
9090
query_values = mock_cursor.execute.call_args[0][-1]
91-
self.assertEqual(query_values, (sentinel.table_name, sentinel.revision))
91+
self.assertEqual(query_values, (sentinel.dataset_name, sentinel.revision))
9292

9393
def test_insert_metadata(self):
9494
"""Add new metadata to the database."""
@@ -98,7 +98,7 @@ def test_insert_metadata(self):
9898

9999
mock_connection = MagicMock()
100100
mock_cursor = mock_connection.cursor()
101-
database = Database(mock_connection, table_name=sentinel.dataset_name)
101+
database = Database(mock_connection, table_name=sentinel.table_name, dataset_name=sentinel.dataset_name)
102102

103103
result = database.insert_metadata(
104104
sentinel.publication_date,

tests/acquisition/covid_hosp/common/test_utils.py

Lines changed: 4 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -129,7 +129,10 @@ def test_run_acquire_new_dataset(self):
129129

130130
self.assertTrue(result)
131131

132-
mock_connection.insert_metadata.assert_called_once()
132+
# should have been called twice
133+
mock_connection.insert_metadata.assert_called()
134+
assert mock_connection.insert_metadata.call_count == 2
135+
# most recent call should be for the final revision at url2
133136
args = mock_connection.insert_metadata.call_args[0]
134137
self.assertEqual(args[:2], (20210315, "url2"))
135138
pd.testing.assert_frame_equal(

0 commit comments

Comments
 (0)