Skip to content

Commit 247a1e3

Browse files
committed
[covid_hosp] add new column instead of repurpose old column
1 parent 5d3bce6 commit 247a1e3

File tree

7 files changed

+24
-18
lines changed

7 files changed

+24
-18
lines changed

src/acquisition/covid_hosp/common/database.py

Lines changed: 10 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -19,7 +19,7 @@ class Database:
1919
def __init__(self,
2020
connection,
2121
table_name=None,
22-
dataset_name=None,
22+
hhs_dataset_id=None,
2323
columns_and_types=None,
2424
key_columns=None,
2525
additional_fields=None):
@@ -31,6 +31,8 @@ def __init__(self,
3131
An open connection to a database.
3232
table_name : str
3333
The name of the table which holds the dataset.
34+
hhs_dataset_id : str
35+
The 9-character healthdata.gov identifier for this dataset.
3436
columns_and_types : tuple[str, str, Callable]
3537
List of 3-tuples of (CSV header name, SQL column name, data type) for
3638
all the columns in the CSV file.
@@ -41,7 +43,7 @@ def __init__(self,
4143

4244
self.connection = connection
4345
self.table_name = table_name
44-
self.dataset_name = dataset_name
46+
self.hhs_dataset_id = hhs_dataset_id
4547
self.publication_col_name = "issue" if table_name == 'covid_hosp_state_timeseries' else \
4648
'publication_date'
4749
self.columns_and_types = {
@@ -117,8 +119,8 @@ def contains_revision(self, revision):
117119
FROM
118120
`covid_hosp_meta`
119121
WHERE
120-
`dataset_name` = %s AND `revision_timestamp` = %s
121-
''', (self.dataset_name, revision))
122+
`hhs_dataset_id` = %s AND `revision_timestamp` = %s
123+
''', (self.hhs_dataset_id, revision))
122124
for (result,) in cursor:
123125
return bool(result)
124126

@@ -140,14 +142,15 @@ def insert_metadata(self, publication_date, revision, meta_json):
140142
INSERT INTO
141143
`covid_hosp_meta` (
142144
`dataset_name`,
145+
`hhs_dataset_id`,
143146
`publication_date`,
144147
`revision_timestamp`,
145148
`metadata_json`,
146149
`acquisition_datetime`
147150
)
148151
VALUES
149-
(%s, %s, %s, %s, NOW())
150-
''', (self.dataset_name, publication_date, revision, meta_json))
152+
(%s, %s, %s, %s, %s, NOW())
153+
''', (self.table_name, self.hhs_dataset_id, publication_date, revision, meta_json))
151154

152155
def insert_dataset(self, publication_date, dataframe):
153156
"""Add a dataset to the database.
@@ -234,7 +237,7 @@ def get_max_issue(self):
234237
from
235238
`covid_hosp_meta`
236239
WHERE
237-
dataset_name = "{self.dataset_name}"
240+
hhs_dataset_id = "{self.hhs_dataset_id}"
238241
''')
239242
for (result,) in cursor:
240243
if result is not None:

src/acquisition/covid_hosp/facility/database.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -214,6 +214,6 @@ def __init__(self, *args, **kwargs):
214214
*args,
215215
**kwargs,
216216
table_name=Database.TABLE_NAME,
217-
dataset_name=Network.DATASET_ID,
217+
hhs_dataset_id=Network.DATASET_ID,
218218
key_columns=Database.KEY_COLS,
219219
columns_and_types=Database.ORDERED_CSV_COLUMNS)

src/acquisition/covid_hosp/state_daily/database.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -224,7 +224,7 @@ def __init__(self, *args, **kwargs):
224224
*args,
225225
**kwargs,
226226
table_name=Database.TABLE_NAME,
227-
dataset_name=Network.DATASET_ID,
227+
hhs_dataset_id=Network.DATASET_ID,
228228
columns_and_types=Database.ORDERED_CSV_COLUMNS,
229229
key_columns=Database.KEY_COLS,
230230
additional_fields=[Columndef('D', 'record_type', None)])

src/acquisition/covid_hosp/state_timeseries/database.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -223,7 +223,7 @@ def __init__(self, *args, **kwargs):
223223
*args,
224224
**kwargs,
225225
table_name=Database.TABLE_NAME,
226-
dataset_name=Network.DATASET_ID,
226+
hhs_dataset_id=Network.DATASET_ID,
227227
columns_and_types=Database.ORDERED_CSV_COLUMNS,
228228
key_columns=Database.KEY_COLS,
229229
additional_fields=[Columndef('T', 'record_type', None)])

src/ddl/covid_hosp.sql

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -48,6 +48,7 @@ surfaced through the Epidata API.
4848
CREATE TABLE `covid_hosp_meta` (
4949
`id` INT NOT NULL AUTO_INCREMENT,
5050
`dataset_name` VARCHAR(64) NOT NULL,
51+
`hhs_dataset_id` CHAR(9) NOT NULL DEFAULT "????-????",
5152
`publication_date` INT NOT NULL,
5253
`revision_timestamp` VARCHAR(512) NOT NULL,
5354
`metadata_json` JSON NOT NULL,
Lines changed: 4 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -1,3 +1,4 @@
1-
UPDATE covid_hosp_meta SET dataset_name="g62h-syeh" WHERE revision_timestamp like "%g62h-syeh%";
2-
UPDATE covid_hosp_meta SET dataset_name="6xf2-c3ie" where revision_timestamp like "%6xf2-c3ie%";
3-
UPDATE covid_hosp_meta SET dataset_name="anag-cw7u" WHERE revision_timestamp LIKE "%anag-cw7u%";
1+
ALTER TABLE covid_hosp_meta ADD COLUMN hhs_dataset_id CHAR(9) NOT NULL DEFAULT "????-????";
2+
UPDATE covid_hosp_meta SET hhs_dataset_id="g62h-syeh" WHERE revision_timestamp LIKE "%g62h-syeh%";
3+
UPDATE covid_hosp_meta SET hhs_dataset_id="6xf2-c3ie" WHERE revision_timestamp LIKE "%6xf2-c3ie%";
4+
UPDATE covid_hosp_meta SET hhs_dataset_id="anag-cw7u" WHERE revision_timestamp LIKE "%anag-cw7u%";

tests/acquisition/covid_hosp/common/test_database.py

Lines changed: 6 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -68,7 +68,7 @@ def test_contains_revision(self):
6868

6969
mock_connection = MagicMock()
7070
mock_cursor = mock_connection.cursor()
71-
database = Database(mock_connection, table_name=sentinel.table_name, dataset_name=sentinel.dataset_name)
71+
database = Database(mock_connection, table_name=sentinel.table_name, hhs_dataset_id=sentinel.hhs_dataset_id)
7272

7373
with self.subTest(name='new revision'):
7474
mock_cursor.__iter__.return_value = [(0,)]
@@ -78,7 +78,7 @@ def test_contains_revision(self):
7878
# compare with boolean literal to test the type cast
7979
self.assertIs(result, False)
8080
query_values = mock_cursor.execute.call_args[0][-1]
81-
self.assertEqual(query_values, (sentinel.dataset_name, sentinel.revision))
81+
self.assertEqual(query_values, (sentinel.hhs_dataset_id, sentinel.revision))
8282

8383
with self.subTest(name='old revision'):
8484
mock_cursor.__iter__.return_value = [(1,)]
@@ -88,7 +88,7 @@ def test_contains_revision(self):
8888
# compare with boolean literal to test the type cast
8989
self.assertIs(result, True)
9090
query_values = mock_cursor.execute.call_args[0][-1]
91-
self.assertEqual(query_values, (sentinel.dataset_name, sentinel.revision))
91+
self.assertEqual(query_values, (sentinel.hhs_dataset_id, sentinel.revision))
9292

9393
def test_insert_metadata(self):
9494
"""Add new metadata to the database."""
@@ -98,7 +98,7 @@ def test_insert_metadata(self):
9898

9999
mock_connection = MagicMock()
100100
mock_cursor = mock_connection.cursor()
101-
database = Database(mock_connection, table_name=sentinel.table_name, dataset_name=sentinel.dataset_name)
101+
database = Database(mock_connection, table_name=sentinel.table_name, hhs_dataset_id=sentinel.hhs_dataset_id)
102102

103103
result = database.insert_metadata(
104104
sentinel.publication_date,
@@ -108,7 +108,8 @@ def test_insert_metadata(self):
108108
self.assertIsNone(result)
109109
actual_values = mock_cursor.execute.call_args[0][-1]
110110
expected_values = (
111-
sentinel.dataset_name,
111+
sentinel.table_name,
112+
sentinel.hhs_dataset_id,
112113
sentinel.publication_date,
113114
sentinel.revision,
114115
sentinel.meta_json,

0 commit comments

Comments
 (0)