diff --git a/integrations/acquisition/covidcast/test_covidcast_meta_caching.py b/integrations/acquisition/covidcast/test_covidcast_meta_caching.py index d9be18645..99008a0f1 100644 --- a/integrations/acquisition/covidcast/test_covidcast_meta_caching.py +++ b/integrations/acquisition/covidcast/test_covidcast_meta_caching.py @@ -40,9 +40,9 @@ def setUp(self): cur = cnx.cursor() # clear all tables - cur.execute("truncate table signal_load") - cur.execute("truncate table signal_history") - cur.execute("truncate table signal_latest") + cur.execute("truncate table epimetric_load") + cur.execute("truncate table epimetric_full") + cur.execute("truncate table epimetric_latest") cur.execute("truncate table geo_dim") cur.execute("truncate table signal_dim") # reset the `covidcast_meta_cache` table (it should always have one row) @@ -83,7 +83,7 @@ def test_caching(self): ''') self.cur.execute(f''' INSERT INTO - `signal_latest` (`signal_data_id`, `signal_key_id`, `geo_key_id`, `time_type`, + `epimetric_latest` (`epimetric_id`, `signal_key_id`, `geo_key_id`, `time_type`, `time_value`, `value_updated_timestamp`, `value`, `stderr`, `sample_size`, `issue`, `lag`, `missing_value`, diff --git a/integrations/acquisition/covidcast/test_csv_uploading.py b/integrations/acquisition/covidcast/test_csv_uploading.py index ecb1eb77b..f0bf99740 100644 --- a/integrations/acquisition/covidcast/test_csv_uploading.py +++ b/integrations/acquisition/covidcast/test_csv_uploading.py @@ -36,9 +36,9 @@ def setUp(self): cur = cnx.cursor() # clear all tables - cur.execute("truncate table signal_load") - cur.execute("truncate table signal_history") - cur.execute("truncate table signal_latest") + cur.execute("truncate table epimetric_load") + cur.execute("truncate table epimetric_full") + cur.execute("truncate table epimetric_latest") cur.execute("truncate table geo_dim") cur.execute("truncate table signal_dim") # reset the `covidcast_meta_cache` table (it should always have one row) @@ -78,9 +78,9 @@ def apply_lag(expected_epidata): def verify_timestamps_and_defaults(self): self.cur.execute(''' -select value_updated_timestamp from signal_history +select value_updated_timestamp from epimetric_full UNION ALL -select value_updated_timestamp from signal_latest''') +select value_updated_timestamp from epimetric_latest''') for (value_updated_timestamp,) in self.cur: self.assertGreater(value_updated_timestamp, 0) diff --git a/integrations/acquisition/covidcast/test_db.py b/integrations/acquisition/covidcast/test_db.py index 3371a3073..588551a67 100644 --- a/integrations/acquisition/covidcast/test_db.py +++ b/integrations/acquisition/covidcast/test_db.py @@ -15,7 +15,7 @@ def _find_matches_for_row(self, row): cols = "source signal time_type time_value geo_type geo_value issue".split() results = {} cur = self._db._cursor - for table in ['signal_latest_v', 'signal_history_v']: + for table in ['epimetric_latest_v', 'epimetric_full_v']: q = f"SELECT * FROM {table} WHERE " # NOTE: repr() puts str values in single quotes but simply 'string-ifies' numerics; # getattr() accesses members by string of their name @@ -34,9 +34,9 @@ def test_id_sync(self): # AUTOINCREMENT pk id from the load table. this test is intended to make sure that they # appropriately stay in sync with each other - pk_column = 'signal_data_id' - histor_view = 'signal_history_v' - latest_view = 'signal_latest_v' + pk_column = 'epimetric_id' + histor_view = 'epimetric_full_v' + latest_view = 'epimetric_latest_v' # add a data point base_row, _ = self._make_placeholder_row() diff --git a/integrations/acquisition/covidcast/test_delete_batch.py b/integrations/acquisition/covidcast/test_delete_batch.py index f59ec7b04..915c9341b 100644 --- a/integrations/acquisition/covidcast/test_delete_batch.py +++ b/integrations/acquisition/covidcast/test_delete_batch.py @@ -33,7 +33,7 @@ def setUp(self): self._db = Database() self._db.connect() - for table in "signal_load signal_latest signal_history geo_dim signal_dim".split(): + for table in "epimetric_load epimetric_latest epimetric_full geo_dim signal_dim".split(): self._db._cursor.execute(f"TRUNCATE TABLE {table}") diff --git a/integrations/server/test_covidcast_meta.py b/integrations/server/test_covidcast_meta.py index ec5f0e23e..d0aef6fe5 100644 --- a/integrations/server/test_covidcast_meta.py +++ b/integrations/server/test_covidcast_meta.py @@ -33,8 +33,8 @@ class CovidcastMetaTests(unittest.TestCase): } template = ''' - INSERT INTO `signal_latest` ( - `signal_data_id`, `signal_key_id`, `geo_key_id`, + INSERT INTO `epimetric_latest` ( + `epimetric_id`, `signal_key_id`, `geo_key_id`, `time_type`, `time_value`, `value_updated_timestamp`, `value`, `stderr`, `sample_size`, `issue`, `lag`, `missing_value`, @@ -59,9 +59,9 @@ def setUp(self): cur = cnx.cursor() # clear all tables - cur.execute("truncate table signal_load") - cur.execute("truncate table signal_history") - cur.execute("truncate table signal_latest") + cur.execute("truncate table epimetric_load") + cur.execute("truncate table epimetric_full") + cur.execute("truncate table epimetric_latest") cur.execute("truncate table geo_dim") cur.execute("truncate table signal_dim") # reset the `covidcast_meta_cache` table (it should always have one row) diff --git a/src/acquisition/covidcast/database.py b/src/acquisition/covidcast/database.py index 9c370c0c6..b93514712 100644 --- a/src/acquisition/covidcast/database.py +++ b/src/acquisition/covidcast/database.py @@ -72,12 +72,12 @@ class Database: DATABASE_NAME = 'covid' - load_table = "signal_load" + load_table = "epimetric_load" # if you want to deal with foreign key ids: use table # if you want to deal with source/signal names, geo type/values, etc: use view - latest_table = "signal_latest" + latest_table = "epimetric_latest" latest_view = latest_table + "_v" - history_table = "signal_history" + history_table = "epimetric_full" history_view = history_table + "_v" # TODO: consider using class variables like this for dimension table names too # TODO: also consider that for composite key tuples, like short_comp_key and long_comp_key as used in delete_batch() @@ -123,20 +123,20 @@ def _reset_load_table_ai_counter(self): """Corrects the AUTO_INCREMENT counter in the load table. To be used in emergencies only, if the load table was accidentally TRUNCATEd. - This ensures any `signal_data_id`s generated by the load table will not collide with the history or latest tables. + This ensures any `epimetric_id`s generated by the load table will not collide with the history or latest tables. This is also destructive to any data in the load table. """ - self._cursor.execute(f'DELETE FROM signal_load') + self._cursor.execute(f'DELETE FROM epimetric_load') # NOTE: 'ones' are used as filler here for the (required) NOT NULL columns. self._cursor.execute(f""" - INSERT INTO signal_load - (signal_data_id, + INSERT INTO epimetric_load + (epimetric_id, source, `signal`, geo_type, geo_value, time_type, time_value, issue, `lag`, value_updated_timestamp) VALUES - ((SELECT 1+MAX(signal_data_id) FROM signal_history), + ((SELECT 1+MAX(epimetric_id) FROM epimetric_full), '1', '1', '1', '1', '1', 1, 1, 1, 1);""") - self._cursor.execute(f'DELETE FROM signal_load') + self._cursor.execute(f'DELETE FROM epimetric_load') def insert_or_update_bulk(self, cc_rows): return self.insert_or_update_batch(cc_rows) @@ -227,7 +227,7 @@ def insert_or_update_batch(self, cc_rows, batch_size=2**20, commit_partial=False def run_dbjobs(self): # we do this LEFT JOIN trick because mysql cant do set difference (aka EXCEPT or MINUS) - # (as in " select distinct source, signal from signal_dim minus select distinct source, signal from signal_load ") + # (as in " select distinct source, signal from signal_dim minus select distinct source, signal from epimetric_load ") signal_dim_add_new_load = f''' INSERT INTO signal_dim (`source`, `signal`) SELECT DISTINCT sl.source, sl.signal @@ -245,20 +245,20 @@ def run_dbjobs(self): WHERE gd.geo_type IS NULL ''' - signal_history_load = f''' + epimetric_full_load = f''' INSERT INTO {self.history_table} - (signal_data_id, signal_key_id, geo_key_id, issue, data_as_of_dt, + (epimetric_id, signal_key_id, geo_key_id, issue, data_as_of_dt, time_type, time_value, `value`, stderr, sample_size, `lag`, value_updated_timestamp, computation_as_of_dt, missing_value, missing_stderr, missing_sample_size) SELECT - signal_data_id, sd.signal_key_id, gd.geo_key_id, issue, data_as_of_dt, + epimetric_id, sd.signal_key_id, gd.geo_key_id, issue, data_as_of_dt, time_type, time_value, `value`, stderr, sample_size, `lag`, value_updated_timestamp, computation_as_of_dt, missing_value, missing_stderr, missing_sample_size FROM `{self.load_table}` sl INNER JOIN signal_dim sd USING (source, `signal`) INNER JOIN geo_dim gd USING (geo_type, geo_value) ON DUPLICATE KEY UPDATE - `signal_data_id` = sl.`signal_data_id`, + `epimetric_id` = sl.`epimetric_id`, `value_updated_timestamp` = sl.`value_updated_timestamp`, `value` = sl.`value`, `stderr` = sl.`stderr`, @@ -269,13 +269,13 @@ def run_dbjobs(self): `missing_sample_size` = sl.`missing_sample_size` ''' - signal_latest_load = f''' + epimetric_latest_load = f''' INSERT INTO {self.latest_table} - (signal_data_id, signal_key_id, geo_key_id, issue, data_as_of_dt, + (epimetric_id, signal_key_id, geo_key_id, issue, data_as_of_dt, time_type, time_value, `value`, stderr, sample_size, `lag`, value_updated_timestamp, computation_as_of_dt, missing_value, missing_stderr, missing_sample_size) SELECT - signal_data_id, sd.signal_key_id, gd.geo_key_id, issue, data_as_of_dt, + epimetric_id, sd.signal_key_id, gd.geo_key_id, issue, data_as_of_dt, time_type, time_value, `value`, stderr, sample_size, `lag`, value_updated_timestamp, computation_as_of_dt, missing_value, missing_stderr, missing_sample_size FROM `{self.load_table}` sl @@ -283,7 +283,7 @@ def run_dbjobs(self): INNER JOIN geo_dim gd USING (geo_type, geo_value) WHERE is_latest_issue = 1 ON DUPLICATE KEY UPDATE - `signal_data_id` = sl.`signal_data_id`, + `epimetric_id` = sl.`epimetric_id`, `value_updated_timestamp` = sl.`value_updated_timestamp`, `value` = sl.`value`, `stderr` = sl.`stderr`, @@ -296,7 +296,7 @@ def run_dbjobs(self): ''' # NOTE: DO NOT `TRUNCATE` THIS TABLE! doing so will ruin the AUTO_INCREMENT counter that the history and latest tables depend on... - signal_load_delete_processed = f''' + epimetric_load_delete_processed = f''' DELETE FROM `{self.load_table}` ''' @@ -313,17 +313,17 @@ def run_dbjobs(self): time_q.append(time.time()) logger.debug('geo_dim_add_new_load', rows=self._cursor.rowcount, elapsed=time_q[-1]-time_q[-2]) - self._cursor.execute(signal_history_load) + self._cursor.execute(epimetric_full_load) time_q.append(time.time()) - logger.debug('signal_history_load', rows=self._cursor.rowcount, elapsed=time_q[-1]-time_q[-2]) + logger.debug('epimetric_full_load', rows=self._cursor.rowcount, elapsed=time_q[-1]-time_q[-2]) - self._cursor.execute(signal_latest_load) + self._cursor.execute(epimetric_latest_load) time_q.append(time.time()) - logger.debug('signal_latest_load', rows=self._cursor.rowcount, elapsed=time_q[-1]-time_q[-2]) + logger.debug('epimetric_latest_load', rows=self._cursor.rowcount, elapsed=time_q[-1]-time_q[-2]) - self._cursor.execute(signal_load_delete_processed) + self._cursor.execute(epimetric_load_delete_processed) time_q.append(time.time()) - logger.debug('signal_load_delete_processed', rows=self._cursor.rowcount, elapsed=time_q[-1]-time_q[-2]) + logger.debug('epimetric_load_delete_processed', rows=self._cursor.rowcount, elapsed=time_q[-1]-time_q[-2]) except Exception as e: raise e @@ -395,22 +395,22 @@ def delete_batch(self, cc_deletions): add_history_id_sql = f''' UPDATE {tmp_table_name} d INNER JOIN {self.history_view} h USING ({long_comp_key}) -SET d.delete_history_id=h.signal_data_id; +SET d.delete_history_id=h.epimetric_id; ''' # if a row we are deleting also appears in the 'latest' table (with a matching 'issue')... mark_for_update_latest_sql = f''' UPDATE {tmp_table_name} d INNER JOIN {self.latest_view} ell USING ({long_comp_key}) -SET d.update_latest=1, d.delete_latest_id=ell.signal_data_id; +SET d.update_latest=1, d.delete_latest_id=ell.epimetric_id; ''' delete_history_sql = f''' -DELETE h FROM {tmp_table_name} d INNER JOIN {self.history_table} h ON d.delete_history_id=h.signal_data_id; +DELETE h FROM {tmp_table_name} d INNER JOIN {self.history_table} h ON d.delete_history_id=h.epimetric_id; ''' # ...remove it from 'latest'... delete_latest_sql = f''' -DELETE ell FROM {tmp_table_name} d INNER JOIN {self.latest_table} ell ON d.delete_latest_id=ell.signal_data_id; +DELETE ell FROM {tmp_table_name} d INNER JOIN {self.latest_table} ell ON d.delete_latest_id=ell.epimetric_id; ''' # ...and re-write that record with its next-latest issue (from 'history') instead. @@ -418,12 +418,12 @@ def delete_batch(self, cc_deletions): # AND also after `delete_latest_sql` so that we dont get a key collision on insert. update_latest_sql = f''' INSERT INTO {self.latest_table} - (signal_data_id, + (epimetric_id, signal_key_id, geo_key_id, time_type, time_value, issue, value, stderr, sample_size, `lag`, value_updated_timestamp, missing_value, missing_stderr, missing_sample_size) SELECT - h.signal_data_id, + h.epimetric_id, h.signal_key_id, h.geo_key_id, h.time_type, h.time_value, h.issue, h.value, h.stderr, h.sample_size, h.`lag`, h.value_updated_timestamp, h.missing_value, h.missing_stderr, h.missing_sample_size diff --git a/src/acquisition/covidcast/migrate_epidata_to_v4.py b/src/acquisition/covidcast/migrate_epidata_to_v4.py index 3ce24e9a5..a4afafc11 100644 --- a/src/acquisition/covidcast/migrate_epidata_to_v4.py +++ b/src/acquisition/covidcast/migrate_epidata_to_v4.py @@ -65,9 +65,9 @@ def start_tx(cursor): cursor.execute('SET autocommit=0;') # starts a transaction as suggested in https://dev.mysql.com/doc/refman/8.0/en/lock-tables.html # NOTE: locks must be specified for any aliases of table names that are used cursor.execute('''LOCK TABLES epidata.covidcast AS cc READ, - signal_load WRITE, signal_load AS sl WRITE, - signal_history WRITE, - signal_latest WRITE, + epimetric_load WRITE, epimetric_load AS sl WRITE, + epimetric_full WRITE, + epimetric_latest WRITE, signal_dim WRITE, signal_dim AS sd READ, geo_dim WRITE, geo_dim AS gd READ;''') cursor.execute('SET unique_checks=0;') @@ -87,7 +87,7 @@ def do_batches(db, start, upper_lim, batch_size): # NOTE: first rows of column names are identical, second rows are for specifying a rename and a literal batch_sql = f""" - INSERT INTO signal_load ( + INSERT INTO epimetric_load ( `issue`, `source`, `signal`, geo_type, geo_value, time_type, time_value, `value`, stderr, sample_size, `lag`, value_updated_timestamp, is_latest_issue, missing_value, missing_stderr, missing_sample_size ) SELECT `issue`, `source`, `signal`, geo_type, geo_value, time_type, time_value, `value`, stderr, sample_size, `lag`, value_updated_timestamp, is_latest_issue, missing_value, missing_stderr, missing_sample_size @@ -150,7 +150,7 @@ def main(destination_schema, batch_size, start_id, upper_lim_override): if start_id==0: # clear tables in the v4 schema print("truncating tables...") - for table in "signal_load signal_latest signal_history geo_dim signal_dim".split(): + for table in "epimetric_load epimetric_latest epimetric_full geo_dim signal_dim".split(): db._cursor.execute(f"TRUNCATE TABLE {table}") db.commit() start_id = 1 @@ -160,12 +160,12 @@ def main(destination_schema, batch_size, start_id, upper_lim_override): # get table counts [the quick and dirty way] print("-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-") - db._cursor.execute(f"SELECT MAX(signal_data_id) FROM signal_history;") + db._cursor.execute(f"SELECT MAX(epimetric_id) FROM epimetric_full;") for (max_id,) in db._cursor: - print(f"signal_history: {max_id}") - db._cursor.execute(f"SELECT MAX(signal_data_id) FROM signal_latest;") + print(f"epimetric_full: {max_id}") + db._cursor.execute(f"SELECT MAX(epimetric_id) FROM epimetric_latest;") for (max_id,) in db._cursor: - print(f"signal_latest: {max_id} (this should be <= the number above)") + print(f"epimetric_latest: {max_id} (this should be <= the number above)") db._cursor.execute(f"SELECT COUNT(signal_key_id), MAX(signal_key_id) FROM signal_dim;") for (count_id, max_id) in db._cursor: print(f"signal_dim: count {count_id} / max {max_id}") diff --git a/src/acquisition/covidcast/test_utils.py b/src/acquisition/covidcast/test_utils.py index 86c8af8ca..181dfac68 100644 --- a/src/acquisition/covidcast/test_utils.py +++ b/src/acquisition/covidcast/test_utils.py @@ -17,7 +17,7 @@ def setUp(self): self._db.connect() # empty all of the data tables - for table in "signal_load signal_latest signal_history geo_dim signal_dim".split(): + for table in "epimetric_load epimetric_latest epimetric_full geo_dim signal_dim".split(): self._db._cursor.execute(f"TRUNCATE TABLE {table};") self.localSetUp() self._db._connection.commit() diff --git a/src/ddl/migrations/v4_renaming.sql b/src/ddl/migrations/v4_renaming.sql new file mode 100644 index 000000000..9bfc7a145 --- /dev/null +++ b/src/ddl/migrations/v4_renaming.sql @@ -0,0 +1,97 @@ +-- drop VIEWs in `epidata` that act as aliases to (ie, they reference) VIEWs in `covid` +USE epidata; +DROP VIEW + signal_history_v, + signal_latest_v; + +-- return to v4 schema namespace +USE covid; + +-- drop VIEWs that reference main TABLEs +DROP VIEW + signal_history_v, + signal_latest_v; + +-- rename main TABLEs +RENAME TABLE + signal_history TO epimetric_full, + signal_latest TO epimetric_latest, + signal_load TO epimetric_load; + +-- rename id COLUMNs in main TABLEs +ALTER TABLE epimetric_full RENAME COLUMN signal_data_id TO epimetric_id; +ALTER TABLE epimetric_latest RENAME COLUMN signal_data_id TO epimetric_id; +ALTER TABLE epimetric_load RENAME COLUMN signal_data_id TO epimetric_id; + +-- -- -- TODO: rename `value_key_*` INDEXes in `epimetric_*` TABLEs to `???_idx_*`? + +-- re-create VIEWs that reference newly renamed TABLEs (this is a straight copy of the VIEW definitions from ../v4_schema.sql +CREATE OR REPLACE VIEW epimetric_full_v AS + SELECT + 0 AS `is_latest_issue`, -- provides column-compatibility to match `covidcast` table + -- ^ this value is essentially undefined in this view, the notion of a 'latest' issue is not encoded here and must be drawn from the 'latest' table or view or otherwise computed... + NULL AS `direction`, -- provides column-compatibility to match `covidcast` table + `t2`.`source` AS `source`, + `t2`.`signal` AS `signal`, + `t3`.`geo_type` AS `geo_type`, + `t3`.`geo_value` AS `geo_value`, + `t1`.`epimetric_id` AS `epimetric_id`, + `t1`.`strat_key_id` AS `strat_key_id`, -- TODO: for future use + `t1`.`issue` AS `issue`, + `t1`.`data_as_of_dt` AS `data_as_of_dt`, -- TODO: for future use ; also "as_of" is problematic and should be renamed + `t1`.`time_type` AS `time_type`, + `t1`.`time_value` AS `time_value`, + `t1`.`reference_dt` AS `reference_dt`, -- TODO: for future use + `t1`.`value` AS `value`, + `t1`.`stderr` AS `stderr`, + `t1`.`sample_size` AS `sample_size`, + `t1`.`lag` AS `lag`, + `t1`.`value_updated_timestamp` AS `value_updated_timestamp`, + `t1`.`computation_as_of_dt` AS `computation_as_of_dt`, -- TODO: for future use ; also "as_of" is problematic and should be renamed + `t1`.`missing_value` AS `missing_value`, + `t1`.`missing_stderr` AS `missing_stderr`, + `t1`.`missing_sample_size` AS `missing_sample_size`, + `t1`.`signal_key_id` AS `signal_key_id`, + `t1`.`geo_key_id` AS `geo_key_id` + FROM `epimetric_full` `t1` + JOIN `signal_dim` `t2` + ON `t1`.`signal_key_id` = `t2`.`signal_key_id` + JOIN `geo_dim` `t3` + ON `t1`.`geo_key_id` = `t3`.`geo_key_id`; +CREATE OR REPLACE VIEW epimetric_latest_v AS + SELECT + 1 AS `is_latest_issue`, -- provides column-compatibility to match `covidcast` table + NULL AS `direction`, -- provides column-compatibility to match `covidcast` table + `t2`.`source` AS `source`, + `t2`.`signal` AS `signal`, + `t3`.`geo_type` AS `geo_type`, + `t3`.`geo_value` AS `geo_value`, + `t1`.`epimetric_id` AS `epimetric_id`, + `t1`.`strat_key_id` AS `strat_key_id`, -- TODO: for future use + `t1`.`issue` AS `issue`, + `t1`.`data_as_of_dt` AS `data_as_of_dt`, -- TODO: for future use ; also "as_of" is problematic and should be renamed + `t1`.`time_type` AS `time_type`, + `t1`.`time_value` AS `time_value`, + `t1`.`reference_dt` AS `reference_dt`, -- TODO: for future use + `t1`.`value` AS `value`, + `t1`.`stderr` AS `stderr`, + `t1`.`sample_size` AS `sample_size`, + `t1`.`lag` AS `lag`, + `t1`.`value_updated_timestamp` AS `value_updated_timestamp`, + `t1`.`computation_as_of_dt` AS `computation_as_of_dt`, -- TODO: for future use ; also "as_of" is problematic and should be renamed + `t1`.`missing_value` AS `missing_value`, + `t1`.`missing_stderr` AS `missing_stderr`, + `t1`.`missing_sample_size` AS `missing_sample_size`, + `t1`.`signal_key_id` AS `signal_key_id`, + `t1`.`geo_key_id` AS `geo_key_id` + FROM `epimetric_latest` `t1` + JOIN `signal_dim` `t2` + ON `t1`.`signal_key_id` = `t2`.`signal_key_id` + JOIN `geo_dim` `t3` + ON `t1`.`geo_key_id` = `t3`.`geo_key_id`; + + +-- re-create `epidata` alias VIEWs +USE epidata; +CREATE VIEW epidata.epimetric_full_v AS SELECT * FROM covid.epimetric_full_v; +CREATE VIEW epidata.epimetric_latest_v AS SELECT * FROM covid.epimetric_latest_v; diff --git a/src/ddl/v4_schema.sql b/src/ddl/v4_schema.sql index cf65e0834..cc4f8294e 100644 --- a/src/ddl/v4_schema.sql +++ b/src/ddl/v4_schema.sql @@ -24,8 +24,8 @@ CREATE TABLE strat_dim ( ) ENGINE=InnoDB; INSERT INTO strat_dim VALUES (1, 'NO_STRATIFICATION', ''); -CREATE TABLE signal_history ( - `signal_data_id` BIGINT(20) UNSIGNED NOT NULL PRIMARY KEY, +CREATE TABLE epimetric_full ( + `epimetric_id` BIGINT(20) UNSIGNED NOT NULL PRIMARY KEY, `signal_key_id` BIGINT(20) UNSIGNED NOT NULL, `geo_key_id` BIGINT(20) UNSIGNED NOT NULL, `strat_key_id` BIGINT(20) UNSIGNED NOT NULL DEFAULT 1, -- TODO: for future use @@ -52,23 +52,23 @@ CREATE TABLE signal_history ( UNIQUE INDEX `value_key_gti` (`signal_key_id`, `geo_key_id`, `time_type`, `time_value`, `issue`) ) ENGINE=InnoDB; -CREATE TABLE signal_latest ( - PRIMARY KEY (`signal_data_id`), +CREATE TABLE epimetric_latest ( + PRIMARY KEY (`epimetric_id`), UNIQUE INDEX `value_key_tg` (`signal_key_id`, `time_type`, `time_value`, `geo_key_id`), UNIQUE INDEX `value_key_gt` (`signal_key_id`, `geo_key_id`, `time_type`, `time_value`) ) ENGINE=InnoDB -SELECT * FROM signal_history; +SELECT * FROM epimetric_full; -- NOTE: In production or any non-testing system that should maintain consistency, -- **DO NOT** 'TRUNCATE' this table. --- Doing so will function as a DROP/CREATE and reset the AUTO_INCREMENT counter for the `signal_data_id` field. --- This field is used to populate the non-AUTO_INCREMENT fields of the same name in `signal_latest` and `signal_history`, +-- Doing so will function as a DROP/CREATE and reset the AUTO_INCREMENT counter for the `epimetric_id` field. +-- This field is used to populate the non-AUTO_INCREMENT fields of the same name in `epimetric_latest` and `epimetric_full`, -- and resetting it will ultimately cause PK collisions. --- To restore the counter, a row must be written with a `signal_data_id` value greater than the maximum +-- To restore the counter, a row must be written with a `epimetric_id` value greater than the maximum -- of its values in the other tables. -CREATE TABLE signal_load ( - `signal_data_id` BIGINT(20) UNSIGNED NOT NULL AUTO_INCREMENT PRIMARY KEY, +CREATE TABLE epimetric_load ( + `epimetric_id` BIGINT(20) UNSIGNED NOT NULL AUTO_INCREMENT PRIMARY KEY, `signal_key_id` BIGINT(20) UNSIGNED, `geo_key_id` BIGINT(20) UNSIGNED, `strat_key_id` BIGINT(20) UNSIGNED NOT NULL DEFAULT 1, -- TODO: for future use @@ -96,7 +96,7 @@ CREATE TABLE signal_load ( ) ENGINE=InnoDB; -CREATE OR REPLACE VIEW signal_history_v AS +CREATE OR REPLACE VIEW epimetric_full_v AS SELECT 0 AS `is_latest_issue`, -- provides column-compatibility to match `covidcast` table -- ^ this value is essentially undefined in this view, the notion of a 'latest' issue is not encoded here and must be drawn from the 'latest' table or view or otherwise computed... @@ -105,7 +105,7 @@ CREATE OR REPLACE VIEW signal_history_v AS `t2`.`signal` AS `signal`, `t3`.`geo_type` AS `geo_type`, `t3`.`geo_value` AS `geo_value`, - `t1`.`signal_data_id` AS `signal_data_id`, + `t1`.`epimetric_id` AS `epimetric_id`, `t1`.`strat_key_id` AS `strat_key_id`, -- TODO: for future use `t1`.`issue` AS `issue`, `t1`.`data_as_of_dt` AS `data_as_of_dt`, -- TODO: for future use ; also "as_of" is problematic and should be renamed @@ -123,13 +123,13 @@ CREATE OR REPLACE VIEW signal_history_v AS `t1`.`missing_sample_size` AS `missing_sample_size`, `t1`.`signal_key_id` AS `signal_key_id`, `t1`.`geo_key_id` AS `geo_key_id` - FROM `signal_history` `t1` + FROM `epimetric_full` `t1` JOIN `signal_dim` `t2` ON `t1`.`signal_key_id` = `t2`.`signal_key_id` JOIN `geo_dim` `t3` ON `t1`.`geo_key_id` = `t3`.`geo_key_id`; -CREATE OR REPLACE VIEW signal_latest_v AS +CREATE OR REPLACE VIEW epimetric_latest_v AS SELECT 1 AS `is_latest_issue`, -- provides column-compatibility to match `covidcast` table NULL AS `direction`, -- provides column-compatibility to match `covidcast` table @@ -137,7 +137,7 @@ CREATE OR REPLACE VIEW signal_latest_v AS `t2`.`signal` AS `signal`, `t3`.`geo_type` AS `geo_type`, `t3`.`geo_value` AS `geo_value`, - `t1`.`signal_data_id` AS `signal_data_id`, + `t1`.`epimetric_id` AS `epimetric_id`, `t1`.`strat_key_id` AS `strat_key_id`, -- TODO: for future use `t1`.`issue` AS `issue`, `t1`.`data_as_of_dt` AS `data_as_of_dt`, -- TODO: for future use ; also "as_of" is problematic and should be renamed @@ -155,7 +155,7 @@ CREATE OR REPLACE VIEW signal_latest_v AS `t1`.`missing_sample_size` AS `missing_sample_size`, `t1`.`signal_key_id` AS `signal_key_id`, `t1`.`geo_key_id` AS `geo_key_id` - FROM `signal_latest` `t1` + FROM `epimetric_latest` `t1` JOIN `signal_dim` `t2` ON `t1`.`signal_key_id` = `t2`.`signal_key_id` JOIN `geo_dim` `t3` diff --git a/src/ddl/v4_schema_aliases.sql b/src/ddl/v4_schema_aliases.sql index 838facc53..f5c6340e9 100644 --- a/src/ddl/v4_schema_aliases.sql +++ b/src/ddl/v4_schema_aliases.sql @@ -5,6 +5,6 @@ -- frontend api code still uses `epidata` but has these relevant tables/views "aliased" to use covid.blah when referred to as epidata.blah in context. -- ---------------------------------- -CREATE VIEW `epidata`.`signal_history_v` AS SELECT * FROM `covid`.`signal_history_v`; -CREATE VIEW `epidata`.`signal_latest_v` AS SELECT * FROM `covid`.`signal_latest_v`; +CREATE VIEW `epidata`.`epimetric_full_v` AS SELECT * FROM `covid`.`epimetric_full_v`; +CREATE VIEW `epidata`.`epimetric_latest_v` AS SELECT * FROM `covid`.`epimetric_latest_v`; CREATE VIEW `epidata`.`covidcast_meta_cache` AS SELECT * FROM `covid`.`covidcast_meta_cache`; diff --git a/src/server/endpoints/covidcast.py b/src/server/endpoints/covidcast.py index 3def48ca3..4a636d891 100644 --- a/src/server/endpoints/covidcast.py +++ b/src/server/endpoints/covidcast.py @@ -42,8 +42,8 @@ bp = Blueprint("covidcast", __name__) alias = None -latest_table = "signal_latest_v" -history_table = "signal_history_v" +latest_table = "epimetric_latest_v" +history_table = "epimetric_full_v" def parse_source_signal_pairs() -> List[SourceSignalPair]: ds = request.values.get("data_source")