From c7e45c1a23873cddfeaf396742c8c7ac1240d8d2 Mon Sep 17 00:00:00 2001 From: Dmitry Shemetov Date: Fri, 23 Jun 2023 14:55:03 -0700 Subject: [PATCH 01/11] feat(afhsb): remove afhsb acquisition code --- src/acquisition/afhsb/afhsb_csv.py | 351 -------------------------- src/acquisition/afhsb/afhsb_sql.py | 194 -------------- src/acquisition/afhsb/afhsb_update.py | 39 --- 3 files changed, 584 deletions(-) delete mode 100644 src/acquisition/afhsb/afhsb_csv.py delete mode 100644 src/acquisition/afhsb/afhsb_sql.py delete mode 100644 src/acquisition/afhsb/afhsb_update.py diff --git a/src/acquisition/afhsb/afhsb_csv.py b/src/acquisition/afhsb/afhsb_csv.py deleted file mode 100644 index b839c4053..000000000 --- a/src/acquisition/afhsb/afhsb_csv.py +++ /dev/null @@ -1,351 +0,0 @@ -''' -afhsb_csv.py creates CSV files filled_00to13.csv, filled_13to17.csv and simple_DMISID_FY2018.csv -which will be later used to create MYSQL data tables. - -Several intermediate files will be created, including: -00to13.pickle 13to17.pickle 00to13.csv 13to17.csv - -Required source files: -ili_1_2000_5_2013_new.sas7bdat and ili_1_2013_11_2017_new.sas7bdat under SOURCE_DIR -country_codes.csv and DMISID_FY2018.csv under TARGET_DIR -All intermediate files and final csv files will be stored in TARGET_DIR -''' - -import csv -import os - -import pickle -import sas7bdat -import epiweeks as epi - - -DATAPATH = '/home/automation/afhsb_data' -SOURCE_DIR = DATAPATH -TARGET_DIR = DATAPATH - -INVALID_DMISIDS = set() - -def get_flu_cat(dx): - # flu1 (influenza) - if len(dx) == 0: - return None - dx = dx.capitalize() - if dx.isnumeric(): - for prefix in ["487", "488"]: - if dx.startswith(prefix): - return 1 - for i in range(0, 7): - prefix = str(480 + i) - if dx.startswith(prefix): - return 2 - for i in range(0, 7): - prefix = str(460 + i) - if dx.startswith(prefix): - return 3 - for prefix in ["07999", "3829", "7806", "7862"]: - if dx.startswith(prefix): - return 3 - elif (dx[0].isalpha() and dx[1:].isnumeric()): - for prefix in ["J09", "J10", "J11"]: - if dx.startswith(prefix): - return 1 - for i in range(12, 19): - prefix = "J{}".format(i) - if dx.startswith(prefix): - return 2 - for i in range(0, 7): - prefix = "J0{}".format(i) - if dx.startswith(prefix): - return 3 - for i in range(20, 23): - prefix = "J{}".format(i) - if dx.startswith(prefix): - return 3 - for prefix in ["J40", "R05", "H669", "R509", "B9789"]: - if dx.startswith(prefix): - return 3 - else: - return None - -def aggregate_data(sourcefile, targetfile): - reader = sas7bdat.SAS7BDAT(os.path.join(SOURCE_DIR, sourcefile), skip_header=True) - # map column names to column indices - col_2_idx = {column.name.decode('utf-8'): column.col_id for column in reader.columns} - - def get_field(row, column): - return row[col_2_idx[column]] - - def row2flu(row): - for i in range(1, 9): - dx = get_field(row, "dx{}".format(i)) - flu_cat = get_flu_cat(dx) - if flu_cat is not None: - return flu_cat - return 0 - - def row2epiweek(row): - date = get_field(row, 'd_event') - year, month, day = date.year, date.month, date.day - week_tuple = epi.Week.fromdate(year, month, day).weektuple() - year, week_num = week_tuple[0], week_tuple[1] - return year, week_num - - results_dict = {} - for _, row in enumerate(reader): - # if (r >= 1000000): break - if get_field(row, 'type') != "Outpt": - continue - year, week_num = row2epiweek(row) - dmisid = get_field(row, 'DMISID') - flu_cat = row2flu(row) - - key_list = [year, week_num, dmisid, flu_cat] - curr_dict = results_dict - for i, key in enumerate(key_list): - if i == len(key_list) - 1: - if key not in curr_dict: - curr_dict[key] = 0 - curr_dict[key] += 1 - else: - if key not in curr_dict: - curr_dict[key] = {} - curr_dict = curr_dict[key] - - results_path = os.path.join(TARGET_DIR, targetfile) - with open(results_path, 'wb') as f: - pickle.dump(results_dict, f, pickle.HIGHEST_PROTOCOL) - - -################# Functions for geographical information #################### - -def get_country_mapping(): - filename = "country_codes.csv" - mapping = dict() - with open(os.path.join(TARGET_DIR, filename), "r") as csvfile: - reader = csv.DictReader(csvfile) - for row in reader: - print(row.keys()) - alpha2 = row['alpha-2'] - alpha3 = row['alpha-3'] - mapping[alpha2] = alpha3 - - return mapping - -def format_dmisid_csv(filename, target_name): - src_path = os.path.join(TARGET_DIR, "{}.csv".format(filename)) - dst_path = os.path.join(TARGET_DIR, target_name) - - src_csv = open(src_path, "r", encoding='utf-8-sig') - reader = csv.DictReader(src_csv) - - dst_csv = open(dst_path, "w") - fieldnames = ['dmisid', 'country', 'state', 'zip5'] - writer = csv.DictWriter(dst_csv, fieldnames=fieldnames) - writer.writeheader() - - country_mapping = get_country_mapping() - - for row in reader: - country2 = row['Facility ISO Country Code'] - if country2 == "": - country3 = "" - elif country2 not in country_mapping: - for key in row.keys(): - print(key, row[key]) - continue - else: - country3 = country_mapping[country2] - new_row = {'dmisid': row['DMIS ID'], - 'country': country3, - 'state': row['Facility State Code'], - 'zip5': row['Facility 5-Digit ZIP Code']} - writer.writerow(new_row) - -def dmisid(): - filename = 'DMISID_FY2018' - target_name = "simple_DMISID_FY2018.csv" - format_dmisid_csv(filename, target_name) - - -cen2states = {'cen1': {'CT', 'ME', 'MA', 'NH', 'RI', 'VT'}, - 'cen2': {'NJ', 'NY', 'PA'}, - 'cen3': {'IL', 'IN', 'MI', 'OH', 'WI'}, - 'cen4': {'IA', 'KS', 'MN', 'MO', 'NE', 'ND', 'SD'}, - 'cen5': {'DE', 'DC', 'FL', 'GA', 'MD', 'NC', 'SC', 'VA', 'WV'}, - 'cen6': {'AL', 'KY', 'MS', 'TN'}, - 'cen7': {'AR', 'LA', 'OK', 'TX'}, - 'cen8': {'AZ', 'CO', 'ID', 'MT', 'NV', 'NM', 'UT', 'WY'}, - 'cen9': {'AK', 'CA', 'HI', 'OR', 'WA'}} - -hhs2states = {'hhs1': {'VT', 'CT', 'ME', 'MA', 'NH', 'RI'}, - 'hhs2': {'NJ', 'NY'}, - 'hhs3': {'DE', 'DC', 'MD', 'PA', 'VA', 'WV'}, - 'hhs4': {'AL', 'FL', 'GA', 'KY', 'MS', 'NC', 'TN', 'SC'}, - 'hhs5': {'IL', 'IN', 'MI', 'MN', 'OH', 'WI'}, - 'hhs6': {'AR', 'LA', 'NM', 'OK', 'TX'}, - 'hhs7': {'IA', 'KS', 'MO', 'NE'}, - 'hhs8': {'CO', 'MT', 'ND', 'SD', 'UT', 'WY'}, - 'hhs9': {'AZ', 'CA', 'HI', 'NV'}, - 'hhs10': {'AK', 'ID', 'OR', 'WA'}} - -def state2region(D): - results = dict() - for region in D.keys(): - states = D[region] - for state in states: - assert state not in results - results[state] = region - return results - -def state2region_csv(): - to_hhs = state2region(hhs2states) - to_cen = state2region(cen2states) - states = to_hhs.keys() - target_name = "state2region.csv" - fieldnames = ['state', 'hhs', 'cen'] - with open(target_name, "w") as csvfile: - writer = csv.DictWriter(csvfile, fieldnames=fieldnames) - writer.writeheader() - for state in states: - content = {"state": state, "hhs": to_hhs[state], "cen": to_cen[state]} - writer.writerow(content) - -################# Functions for geographical information #################### - -######################### Functions for AFHSB data ########################## - -def write_afhsb_csv(period): - flu_mapping = {0: "ili-flu3", 1: "flu1", 2:"flu2-flu1", 3: "flu3-flu2"} - results_dict = pickle.load(open(os.path.join(TARGET_DIR, "{}.pickle".format(period)), 'rb')) - - fieldnames = ["id", "epiweek", "dmisid", "flu_type", "visit_sum"] - with open(os.path.join(TARGET_DIR, "{}.csv".format(period)), 'w') as csvfile: - writer = csv.DictWriter(csvfile, fieldnames=fieldnames) - writer.writeheader() - - i = 0 - for year in sorted(results_dict.keys()): - year_dict = results_dict[year] - for week in sorted(year_dict.keys()): - week_dict = year_dict[week] - for dmisid in sorted(week_dict.keys()): - dmisid_dict = week_dict[dmisid] - for flu in sorted(dmisid_dict.keys()): - visit_sum = dmisid_dict[flu] - i += 1 - epiweek = int("{}{:02d}".format(year, week)) - flu_type = flu_mapping[flu] - - row = {"epiweek": epiweek, "dmisid": None if (not dmisid.isnumeric()) else dmisid, - "flu_type": flu_type, "visit_sum": visit_sum, "id": i} - writer.writerow(row) - if i % 100000 == 0: - print(row) - -def dmisid_start_time_from_file(filename): - starttime_record = dict() - with open(filename, 'r') as csvfile: - reader = csv.DictReader(csvfile) - for row in reader: - dmisid = row['dmisid'] - epiweek = int(row['epiweek']) - if dmisid not in starttime_record: - starttime_record[dmisid] = epiweek - else: - starttime_record[dmisid] = min(epiweek, starttime_record[dmisid]) - return starttime_record - -def dmisid_start_time(): - record1 = dmisid_start_time_from_file(os.path.join(TARGET_DIR, "00to13.csv")) - record2 = dmisid_start_time_from_file(os.path.join(TARGET_DIR, "13to17.csv")) - record = record1 - for dmisid, epiweek in record2.items(): - if dmisid in record: - record[dmisid] = min(record[dmisid], epiweek) - else: - record[dmisid] = epiweek - return record - -def fillin_zero_to_csv(period, dmisid_start_record): - src_path = os.path.join(TARGET_DIR, "{}.csv".format(period)) - dst_path = os.path.join(TARGET_DIR, "filled_{}.csv".format(period)) - - # Load data into a dictionary - src_csv = open(src_path, "r") - reader = csv.DictReader(src_csv) - - results_dict = dict() # epiweek -> dmisid -> flu_type: visit_sum - for i, row in enumerate(reader): - epiweek = int(row['epiweek']) - dmisid = row['dmisid'] - flu_type = row['flu_type'] - visit_sum = row['visit_sum'] - if epiweek not in results_dict: - results_dict[epiweek] = dict() - week_dict = results_dict[epiweek] - if dmisid not in week_dict: - week_dict[dmisid] = dict() - dmisid_dict = week_dict[dmisid] - dmisid_dict[flu_type] = visit_sum - - # Fill in zero count records - dmisid_group = dmisid_start_record.keys() - flutype_group = ["ili-flu3", "flu1", "flu2-flu1", "flu3-flu2"] - - for epiweek in results_dict.keys(): - week_dict = results_dict[epiweek] - for dmisid in dmisid_group: - start_week = dmisid_start_record[dmisid] - if start_week > epiweek: - continue - - if dmisid not in week_dict: - week_dict[dmisid] = dict() - - dmisid_dict = week_dict[dmisid] - for flutype in flutype_group: - if flutype not in dmisid_dict: - dmisid_dict[flutype] = 0 - - # Write to csv files - dst_csv = open(dst_path, "w") - fieldnames = ["id", "epiweek", "dmisid", "flu_type", "visit_sum"] - writer = csv.DictWriter(dst_csv, fieldnames=fieldnames) - writer.writeheader() - - i = 1 - for epiweek in results_dict: - for dmisid in results_dict[epiweek]: - for flutype in results_dict[epiweek][dmisid]: - visit_sum = results_dict[epiweek][dmisid][flutype] - row = {"id": i, "epiweek": epiweek, "dmisid": dmisid, - "flu_type": flutype, "visit_sum": visit_sum} - writer.writerow(row) - if i % 100000 == 0: - print(row) - i += 1 - print("Wrote {} rows".format(i)) - -######################### Functions for AFHSB data ########################## - -def main(): - # Build tables containing geographical information - state2region_csv() - dmisid() - - # Aggregate raw data into pickle files - aggregate_data("ili_1_2000_5_2013_new.sas7bdat", "00to13.pickle") - aggregate_data("ili_1_2013_11_2017_new.sas7bdat", "13to17.pickle") - - # write pickle content to csv files - write_afhsb_csv("00to13") - write_afhsb_csv("13to17") - - # Fill in zero count records - dmisid_start_record = dmisid_start_time() - fillin_zero_to_csv("00to13", dmisid_start_record) - fillin_zero_to_csv("13to17", dmisid_start_record) - - -if __name__ == '__main__': - main() diff --git a/src/acquisition/afhsb/afhsb_sql.py b/src/acquisition/afhsb/afhsb_sql.py deleted file mode 100644 index 278f3fc38..000000000 --- a/src/acquisition/afhsb/afhsb_sql.py +++ /dev/null @@ -1,194 +0,0 @@ -# standard library -import os - -# third party -import mysql.connector as connector - -# first party -import delphi.operations.secrets as secrets - - -def init_dmisid_table(sourcefile): - (u, p) = secrets.db.epi - cnx = connector.connect(user=u, passwd=p, database="epidata") - table_name = 'dmisid_table' - create_table_cmd = ''' - CREATE TABLE `{}` ( - `dmisid` INT(4) NOT NULL PRIMARY KEY, - `country` CHAR(3) NULL, - `state` CHAR(2) NULL - ); - '''.format(table_name) - populate_table_cmd = ''' - LOAD DATA INFILE '{}' - INTO TABLE {} - FIELDS TERMINATED BY ',' - ENCLOSED BY '"' - LINES TERMINATED BY '\r\n' - IGNORE 1 ROWS - (@dmisid, @country, @state, @zip5) - SET - dmisid = @dmisid, - country = nullif(@country, ''), - state = nullif(@state, '') - ; - '''.format(sourcefile, table_name) - try: - cursor = cnx.cursor() - cursor.execute(create_table_cmd) - cursor.execute(populate_table_cmd) - cnx.commit() - finally: - cnx.close() - -def init_region_table(sourcefile): - (u, p) = secrets.db.epi - cnx = connector.connect(user=u, passwd=p, database="epidata") - table_name = 'state2region_table' - create_table_cmd = ''' - CREATE TABLE `{}` ( - `state` CHAR(2) NOT NULL PRIMARY KEY, - `hhs` CHAR(5) NOT NULL, - `cen` CHAR(4) NOT NULL - ); - '''.format(table_name) - populate_table_cmd = ''' - LOAD DATA INFILE '{}' - INTO TABLE {} - FIELDS TERMINATED BY ',' - ENCLOSED BY '"' - LINES TERMINATED BY '\r\n' - IGNORE 1 ROWS - (@state, @hhs, @cen) - SET state=@state, hhs=@hhs, cen=@cen; - '''.format(sourcefile, table_name) - try: - cursor = cnx.cursor() - cursor.execute(create_table_cmd) - cursor.execute(populate_table_cmd) - cnx.commit() - finally: - cnx.close() - - -def init_raw_data(table_name, sourcefile): - print("Initialize {}".format(table_name)) - (u, p) = secrets.db.epi - cnx = connector.connect(user=u, passwd=p, database="epidata") - create_table_cmd = ''' - CREATE TABLE IF NOT EXISTS `{}` ( - `id` INT(11) NOT NULL PRIMARY KEY AUTO_INCREMENT, - `epiweek` INT(6) NOT NULL, - `dmisid` CHAR(4) NULL, - `flu_type` CHAR(9) NOT NULL, - `visit_sum` INT(11) NOT NULL, - - KEY `epiweek` (`epiweek`), - KEY `dmisid` (`dmisid`), - KEY `flu_type` (`flu_type`) - ); - '''.format(table_name) - populate_table_cmd = ''' - LOAD DATA INFILE '{}' - INTO TABLE {} - FIELDS TERMINATED BY ',' - ENCLOSED BY '"' - LINES TERMINATED BY '\r\n' - IGNORE 1 ROWS - (@id, @epiweek, @dmisid, @flu, @visits) - SET - id = @id, - epiweek = @epiweek, - dmisid = nullif(@dmisid, 'ZZZZ'), - flu_type = @flu, - visit_sum = @visits - ; - '''.format(sourcefile, table_name) - try: - cursor = cnx.cursor() - cursor.execute(create_table_cmd) - cursor.execute(populate_table_cmd) - cnx.commit() - finally: - cnx.close() - -def agg_by_state(src_table, dest_table): - print("Aggregating records by states...") - (u, p) = secrets.db.epi - cnx = connector.connect(user=u, passwd=p, database="epidata") - cmd = ''' - CREATE TABLE {} - SELECT a.epiweek, a.flu_type, d.state, d.country, sum(a.visit_sum) visit_sum - FROM {} a - LEFT JOIN dmisid_table d - ON a.dmisid = d.dmisid - GROUP BY a.epiweek, a.flu_type, d.state, d.country; - '''.format(dest_table, src_table) - try: - cursor = cnx.cursor() - cursor.execute(cmd) - cnx.commit() - finally: - cnx.close() - -def agg_by_region(src_table, dest_table): - print("Aggregating records by regions...") - (u, p) = secrets.db.epi - cnx = connector.connect(user=u, passwd=p, database="epidata") - cmd = ''' - CREATE TABLE {} - SELECT s.epiweek, s.flu_type, r.hhs, r.cen, sum(s.visit_sum) visit_sum - FROM {} s - LEFT JOIN state2region_table r - ON s.state = r.state - GROUP BY s.epiweek, s.flu_type, r.hhs, r.cen; - '''.format(dest_table, src_table) - try: - cursor = cnx.cursor() - cursor.execute(cmd) - cnx.commit() - finally: - cnx.close() - -def init_all_tables(datapath): - init_dmisid_table(os.path.join(datapath, "simple_DMISID_FY2018.csv")) - init_region_table(os.path.join(datapath, "state2region.csv")) - - periods = ["00to13", "13to17"] - for period in periods: - raw_table_name = 'afhsb_{}_raw'.format(period) - state_table_name = 'afhsb_{}_state'.format(period) - region_table_name = 'afhsb_{}_region'.format(period) - - init_raw_data(raw_table_name, os.path.join(datapath, "filled_{}.csv".format(period))) - agg_by_state(raw_table_name, state_table_name) - agg_by_region(state_table_name, region_table_name) - -def dangerously_drop_all_afhsb_tables(): - (u, p) = secrets.db.epi - cnx = connector.connect(user=u, passwd=p, database="epidata") - try: - cursor = cnx.cursor() - cursor.execute(''' - DROP TABLE IF EXISTS `afhsb_00to13_raw`, - `afhsb_00to13_region`, - `afhsb_00to13_state`, - `afhsb_13to17_raw`, - `afhsb_13to17_region`, - `afhsb_13to17_state`, - `state2region_table`, - `dmisid_table`; - ''') - cnx.commit() # (might do nothing; each DROP commits itself anyway) - finally: - cnx.close() - -def run_cmd(cmd): - (u, p) = secrets.db.epi - cnx = connector.connect(user=u, passwd=p, database="epidata") - try: - cursor = cnx.cursor() - cursor.execute(cmd) - cnx.commit() - finally: - cnx.close() diff --git a/src/acquisition/afhsb/afhsb_update.py b/src/acquisition/afhsb/afhsb_update.py deleted file mode 100644 index c5a8635c8..000000000 --- a/src/acquisition/afhsb/afhsb_update.py +++ /dev/null @@ -1,39 +0,0 @@ -# standard library -import argparse -import tempfile -import os -import stat -import shutil - -# first party -from . import afhsb_sql - -DEFAULT_DATAPATH = '/home/automation/afhsb_data' - -def main(): - parser = argparse.ArgumentParser() - parser.add_argument('--datapath', action='store', type=str, default=DEFAULT_DATAPATH, help='filepath to directory containing csv files to input into database') - args = parser.parse_args() - # MariaDB appears to refuse to LOAD DATA INFILE except on files under - # /var/lib/mysql (which seems dedicated to its own files) or /tmp; create a - # temporary directory, make rwx for automation & rx for mysql user, copy in - # (or alternatively, symlink --- unimplemented) args.datapath to the - # temporary directory, then run init_all_tables on this temporary datapath. - # Set up temporary directory that will hold temporary datapath (initial - # permissions are very restrictive): - tmp_datapath_parent_dir = tempfile.mkdtemp() - os.chmod(tmp_datapath_parent_dir, stat.S_IRWXU | stat.S_IRGRP | stat.S_IXGRP) - shutil.chown(tmp_datapath_parent_dir, group="mysql_automation") - # (here, mysql_automation is a group with members {mysql,automation}) - tmp_datapath = os.path.join(tmp_datapath_parent_dir, "afhsb_data") - # Copy datapath to temporary datapath (initial permission of copy are - # permissive, but require directory access, which was set appropriately - # above): - shutil.copytree(args.datapath, tmp_datapath) - # Run init_all_tables on temporary datapath: - afhsb_sql.init_all_tables(tmp_datapath) - # (Temporary parent directory should be deleted automatically.) - - -if __name__ == '__main__': - main() From cdf3832cec31d3f96d0b00f62b48f9f5c171d55c Mon Sep 17 00:00:00 2001 From: Dmitry Shemetov Date: Fri, 23 Jun 2023 14:55:24 -0700 Subject: [PATCH 02/11] feat(afhsb): remove afhsb from Epidata Python client --- src/client/delphi_epidata.py | 56 ------------------------------------ 1 file changed, 56 deletions(-) diff --git a/src/client/delphi_epidata.py b/src/client/delphi_epidata.py index a56527357..654eba74c 100644 --- a/src/client/delphi_epidata.py +++ b/src/client/delphi_epidata.py @@ -394,62 +394,6 @@ def meta_norostat(auth): # Make the API call return Epidata._request(params) - # Fetch AFHSB data - @staticmethod - def afhsb(auth, locations, epiweeks, flu_types): - """Fetch AFHSB data (point data, no min/max).""" - # Check parameters - if auth is None or locations is None or epiweeks is None or flu_types is None: - raise Exception('`auth`, `locations`, `epiweeks` and `flu_types` are all required') - - loc_exception = 'Location parameter `{}` is invalid. Valid `location` parameters are: '\ - '`hhs[1-10]`, `cen[1-9]`, 2-letter state code or 3-letter country code.' - for location in locations: - location = location.lower() - if (location.startswith('hhs') or location.startswith('cen')): - prefix, postfix = location[:3], location[3:] - if (postfix.isnumeric()): - region_num = int(postfix) - if (region_num < 1 or region_num > 10 or (region_num == 10 and prefix == 'cen')): - raise Exception(loc_exception.format(location)) - else: - raise Exception(loc_exception.format(location)) - elif (len(location) < 2 or len(location) > 3): - raise Exception(loc_exception.format(location)) - - flu_exception = 'Flu-type parameters `{}` is invalid. Valid flu-type parameters are: '\ - '`flu1`, `flu2`, `flu3`, `ili`, `flu2-flu1`, `flu3-flu2`, `ili-flu3`.' - valid_flu_types = ['flu1', 'flu2', 'flu3', 'ili', 'flu2-flu1', 'flu3-flu2', 'ili-flu3'] - for flu_type in flu_types: - if (not flu_type in valid_flu_types): - raise Exception(flu_exception.format(flu_type)) - - # Set up request - params = { - 'endpoint': 'afhsb', - 'auth': auth, - 'locations': Epidata._list(locations), - 'epiweeks': Epidata._list(epiweeks), - 'flu_types': Epidata._list(flu_types) - } - # Make the API call - return Epidata._request(params) - - # Fetch AFHSB metadata - @staticmethod - def meta_afhsb(auth): - """Fetch AFHSB metadata.""" - # Check parameters - if auth is None: - raise Exception('`auth` is required') - # Set up request - params = { - 'endpoint': 'meta_afhsb', - 'auth': auth, - } - # Make the API call - return Epidata._request(params) - # Fetch NIDSS flu data @staticmethod def nidss_flu(regions, epiweeks, issues=None, lag=None): From 7a8ee39eb0c0fd1daefaac7c9c4f34ff534eebe6 Mon Sep 17 00:00:00 2001 From: Dmitry Shemetov Date: Fri, 23 Jun 2023 14:55:30 -0700 Subject: [PATCH 03/11] feat(afhsb): remove afhsb from Epidata R client --- src/client/delphi_epidata.R | 35 ----------------------------------- 1 file changed, 35 deletions(-) diff --git a/src/client/delphi_epidata.R b/src/client/delphi_epidata.R index 06e9c2209..627948cc2 100644 --- a/src/client/delphi_epidata.R +++ b/src/client/delphi_epidata.R @@ -371,39 +371,6 @@ Epidata <- (function() { return(.request(params)) } - # Fetch AFHSB data (point data, no min/max) - afhsb <- function(auth, locations, epiweeks, flu_types) { - # Check parameters - if(missing(auth) || missing(locations) || missing(epiweeks) || missing(flu_types)) { - stop('`auth`, `locations`, `epiweeks` and `flu_types` are all required') - } - # Set up request - params <- list( - endpoint = 'afhsb', - auth = auth, - locations = .list(locations), - epiweeks = .list(epiweeks), - flu_types = .list(flu_types) - ) - # Make the API call - return(.request(params)) - } - - # Fetch AFHSB metadata - meta_afhsb <- function(auth) { - # Check parameters - if(missing(auth)) { - stop('`auth` is required') - } - # Set up request - params <- list( - endpoint = 'meta_afhsb', - auth = auth - ) - # Make the API call - return(.request(params)) - } - # Fetch NIDSS flu data nidss.flu <- function(regions, epiweeks, issues, lag) { # Check parameters @@ -662,8 +629,6 @@ Epidata <- (function() { quidel = quidel, norostat = norostat, meta_norostat = meta_norostat, - afhsb = afhsb, - meta_afhsb = meta_afhsb, nidss.flu = nidss.flu, nidss.dengue = nidss.dengue, delphi = delphi, From 9ce66351aa811045e1ca38037ab6d669c0bcb3bb Mon Sep 17 00:00:00 2001 From: Dmitry Shemetov Date: Fri, 23 Jun 2023 14:55:53 -0700 Subject: [PATCH 04/11] feat(afhsb): remove afhsb endpoint from server --- src/server/endpoints/__init__.py | 4 - src/server/endpoints/afhsb.py | 114 ----------------------------- src/server/endpoints/meta_afhsb.py | 31 -------- 3 files changed, 149 deletions(-) delete mode 100644 src/server/endpoints/afhsb.py delete mode 100644 src/server/endpoints/meta_afhsb.py diff --git a/src/server/endpoints/__init__.py b/src/server/endpoints/__init__.py index b58692676..94f1de5b8 100644 --- a/src/server/endpoints/__init__.py +++ b/src/server/endpoints/__init__.py @@ -1,5 +1,4 @@ from . import ( - afhsb, cdc, covid_hosp_facility_lookup, covid_hosp_facility, @@ -19,7 +18,6 @@ ght, ilinet, kcdc_ili, - meta_afhsb, meta_norostat, meta, nidss_dengue, @@ -36,7 +34,6 @@ ) endpoints = [ - afhsb, cdc, covid_hosp_facility_lookup, covid_hosp_facility, @@ -56,7 +53,6 @@ ght, ilinet, kcdc_ili, - meta_afhsb, meta_norostat, meta, nidss_dengue, diff --git a/src/server/endpoints/afhsb.py b/src/server/endpoints/afhsb.py deleted file mode 100644 index a006defac..000000000 --- a/src/server/endpoints/afhsb.py +++ /dev/null @@ -1,114 +0,0 @@ -from typing import Dict, List - -from flask import Blueprint, request - -from .._params import extract_integers, extract_strings -from .._query import execute_queries, filter_integers, filter_strings -from .._validate import require_all -from .._security import require_role - -# first argument is the endpoint name -bp = Blueprint("afhsb", __name__) -alias = None - - -def _split_locations(locations: List[str]): - # split locations into national/regional/state - location_dict: Dict[str, List[str]] = { - "hhs": [], - "cen": [], - "state": [], - "country": [], - } - for location in locations: - location = location.lower() - if location[0:3] == "hhs": - location_dict["hhs"].append(location) - elif location[0:3] == "cen": - location_dict["cen"].append(location) - elif len(location) == 3: - location_dict["country"].append(location) - elif len(location) == 2: - location_dict["state"].append(location) - return location_dict - - -def _split_flu_types(flu_types: List[str]): - # split flu types into disjoint/subset - disjoint_flus = [] - subset_flus = [] - for flu_type in flu_types: - if flu_type in ["flu1", "flu2-flu1", "flu3-flu2", "ili-flu3"]: - disjoint_flus.append(flu_type) - elif flu_type in ["flu2", "flu3", "ili"]: - subset_flus.append(flu_type) - return disjoint_flus, subset_flus - - -FLU_MAPPING = { - "flu2": ["flu1", "flu2-flu1"], - "flu3": ["flu1", "flu2-flu1", "flu3-flu2"], - "ili": ["flu1", "flu2-flu1", "flu3-flu2", "ili-flu3"], -} - - -@bp.route("/", methods=("GET", "POST")) -@require_role("afhsb") -def handle(): - require_all(request, "locations", "epiweeks", "flu_types") - - locations = extract_strings("locations") - epiweeks = extract_integers("epiweeks") - flu_types = extract_strings("flu_types") - - disjoint_flus, subset_flus = _split_flu_types(flu_types) - location_dict = _split_locations(locations) - - # build query - - queries = [] - for location_type, loc in location_dict.items(): - if not loc: - continue - table = ( - "afhsb_00to13_region" - if location_type in ["hhs", "cen"] - else "afhsb_00to13_state" - ) - fields = ( - f"`epiweek`, `{location_type}` `location`, sum(`visit_sum`) `visit_sum`" - ) - group = "`epiweek`, `location`" - order = "`epiweek` ASC, `location` ASC" - # build the filter - params = dict() - # build the epiweek filter - condition_epiweek = filter_integers("nd.`epiweek`", epiweeks, "epiweek", params) - condition_location = filter_strings(location_type, locations, "loc", params) - - for subset_flu in subset_flus: - flu_params = params.copy() - condition_flu = filter_strings( - "`flu_type`", FLU_MAPPING[subset_flu], "flu_type", flu_params - ) - query = f"""SELECT {fields}, '{subset_flu}' `flu_type` FROM {table} - WHERE ({condition_epiweek}) AND ({condition_location}) AND ({condition_flu}) - GROUP BY {group} ORDER BY {order}""" - queries.append((query, flu_params)) - # disjoint flu types: flu1, flu2-flu1, flu3-flu2, ili-flu3 - if disjoint_flus: - flu_params = params.copy() - condition_flu = filter_strings( - "`flu_type`", disjoint_flus, "flu_type", flu_params - ) - query = f"""SELECT {fields}, `flu_type` FROM {table} - WHERE ({condition_epiweek}) AND ({condition_location}) AND ({condition_flu}) - GROUP BY {group},`flu_type` ORDER BY {order},`flu_type`""" - queries.append((query, flu_params)) - - fields_string = ["location", "flu_type"] - fields_int = ["epiweek", "visit_sum"] - fields_float = [] - - # send query - return execute_queries(queries, fields_string, fields_int, fields_float) diff --git a/src/server/endpoints/meta_afhsb.py b/src/server/endpoints/meta_afhsb.py deleted file mode 100644 index 096ab58ec..000000000 --- a/src/server/endpoints/meta_afhsb.py +++ /dev/null @@ -1,31 +0,0 @@ -from flask import Blueprint, request - -from .._printer import print_non_standard -from .._query import parse_result -from .._security import require_role - - -# first argument is the endpoint name -bp = Blueprint("meta_afhsb", __name__) -alias = None - - -@bp.route("/", methods=("GET", "POST")) -@require_role("afhsb") -def handle(): - # build query - table1 = "afhsb_00to13_state" - table2 = "afhsb_13to17_state" - - string_keys = ["state", "country"] - int_keys = ["flu_severity"] - data = dict() - - for key in string_keys: - query = f"SELECT DISTINCT `{key}` FROM (select `{key}` from `{table1}` union select `{key}` from `{table2}`) t" - data[key] = parse_result(query, {}, [key]) - for key in int_keys: - query = f"SELECT DISTINCT `{key}` FROM (select `{key}` from `{table1}` union select `{key}` from `{table2}`) t" - data[key] = parse_result(query, {}, [], [key]) - - return print_non_standard(request.values.get("format"), data) From 2878980e4b4810f5594b9bf7a0afb539089d67d9 Mon Sep 17 00:00:00 2001 From: Dmitry Shemetov Date: Fri, 23 Jun 2023 18:33:33 -0700 Subject: [PATCH 05/11] feat(afhsb): remove afhsb from deploy.json --- deploy.json | 9 --------- 1 file changed, 9 deletions(-) diff --git a/deploy.json b/deploy.json index 59d141ba4..654d669cc 100644 --- a/deploy.json +++ b/deploy.json @@ -174,15 +174,6 @@ "add-header-comment": true }, - "// acquisition - afhsb", - { - "type": "move", - "src": "src/acquisition/afhsb/", - "dst": "[[package]]/acquisition/afhsb/", - "match": "^.*\\.(py)$", - "add-header-comment": true - }, - "// acquisition - covidcast", { "type": "move", From d554566cbb323a3ffced9690e5dabdff41c387d4 Mon Sep 17 00:00:00 2001 From: Dmitry Shemetov Date: Fri, 23 Jun 2023 18:33:59 -0700 Subject: [PATCH 06/11] feat(afhsb): remove afhsb from setup.cfg --- dev/local/setup.cfg | 1 - 1 file changed, 1 deletion(-) diff --git a/dev/local/setup.cfg b/dev/local/setup.cfg index 443359b25..d7383ade1 100644 --- a/dev/local/setup.cfg +++ b/dev/local/setup.cfg @@ -6,7 +6,6 @@ version = 4.1.3 packages = delphi.epidata delphi.epidata.acquisition - delphi.epidata.acquisition.afhsb delphi.epidata.acquisition.cdcp delphi.epidata.acquisition.covid_hosp delphi.epidata.acquisition.covid_hosp.common From 316ecd18f27776b465f738db66ba2c628cd2803d Mon Sep 17 00:00:00 2001 From: Dmitry Shemetov Date: Fri, 23 Jun 2023 18:34:13 -0700 Subject: [PATCH 07/11] feat(afhsb): remove norostat from setup.cfg --- dev/local/setup.cfg | 1 - 1 file changed, 1 deletion(-) diff --git a/dev/local/setup.cfg b/dev/local/setup.cfg index d7383ade1..69bc91778 100644 --- a/dev/local/setup.cfg +++ b/dev/local/setup.cfg @@ -20,7 +20,6 @@ packages = delphi.epidata.acquisition.ght delphi.epidata.acquisition.kcdc delphi.epidata.acquisition.nidss - delphi.epidata.acquisition.norostat delphi.epidata.acquisition.paho delphi.epidata.acquisition.quidel delphi.epidata.acquisition.twtr From 5a6cefc004d45a45935436c93a3ba4ed636787df Mon Sep 17 00:00:00 2001 From: Dmitry Shemetov Date: Fri, 23 Jun 2023 18:34:29 -0700 Subject: [PATCH 08/11] docs(afhsb): remove afhsb and its meta --- docs/api/afhsb.md | 52 ------------------------------------------ docs/api/meta_afhsb.md | 49 --------------------------------------- 2 files changed, 101 deletions(-) delete mode 100644 docs/api/afhsb.md delete mode 100644 docs/api/meta_afhsb.md diff --git a/docs/api/afhsb.md b/docs/api/afhsb.md deleted file mode 100644 index d53ad643e..000000000 --- a/docs/api/afhsb.md +++ /dev/null @@ -1,52 +0,0 @@ ---- -title: AFHSB -parent: Other Endpoints (COVID-19 and Other Diseases) ---- - -# AFHSB - -This is the API documentation for accessing the AFHSB (`afhsb`) endpoint of -[Delphi](https://delphi.cmu.edu/)'s epidemiological data. - -General topics not specific to any particular endpoint are discussed in the -[API overview](README.md). Such topics include: -[contributing](README.md#contributing), [citing](README.md#citing), and -[data licensing](README.md#data-licensing). - -## AFHSB Data - -... - -# The API - -The base URL is: https://api.delphi.cmu.edu/epidata/afhsb/ - -See [this documentation](README.md) for details on specifying epiweeks, dates, and lists. - -## Parameters - -### Required - -| Parameter | Description | Type | -| --- | --- | --- | -| `auth` | password | string | -| `epiweeks` | epiweeks | `list` of epiweeks | -| `locations` | locations | `list` of [region](https://github.com/cmu-delphi/delphi-epidata/blob/main/labels/regions.txt), [state](https://github.com/cmu-delphi/delphi-epidata/blob/main/labels/states.txt), or 3-letter country code labels | -| `flu_types` | flu types | `list` of disjoint (`flu1`, `flu2-flu1`, `flu3-flu2`, `ili-flu3`) or subset (`flu2`, `flu3`, `ili`) flu type labels | - -## Response - -| Field | Description | Type | -|-----------|-----------------------------------------------------------------|------------------| -| `result` | result code: 1 = success, 2 = too many results, -2 = no results | integer | -| `epidata` | list of results | array of objects | -| ... | ... | ... | -| `message` | `success` or error message | string | - -# Example URLs - - - -# Code Samples - - diff --git a/docs/api/meta_afhsb.md b/docs/api/meta_afhsb.md deleted file mode 100644 index 6ba294772..000000000 --- a/docs/api/meta_afhsb.md +++ /dev/null @@ -1,49 +0,0 @@ ---- -title: AFHSB Metadata -parent: Other Endpoints (COVID-19 and Other Diseases) ---- - -# AFHSB Metadata - -This is the documentation of the API for accessing the AFHSB Metadata (`meta_afhsb`) endpoint of -the [Delphi](https://delphi.cmu.edu/)'s epidemiological data. - -General topics not specific to any particular endpoint are discussed in the -[API overview](README.md). Such topics include: -[contributing](README.md#contributing), [citing](README.md#citing), and -[data licensing](README.md#data-licensing). - -## AFHSB Metadata - -... - -# The API - -The base URL is: https://api.delphi.cmu.edu/epidata/meta_afhsb/ - -See [this documentation](README.md) for details on specifying epiweeks, dates, and lists. - -## Parameters - -### Required - -| Parameter | Description | Type | -|-----------|-------------|--------| -| `auth` | password | string | - -## Response - -| Field | Description | Type | -|-----------|-----------------------------------------------------------------|------------------| -| `result` | result code: 1 = success, 2 = too many results, -2 = no results | integer | -| `epidata` | list of results | array of objects | -| ... | ... | ... | -| `message` | `success` or error message | string | - -# Example URLs - - - -# Code Samples - - From 7a2fc7b0c53d944c03ce4d2e878a216814e10a24 Mon Sep 17 00:00:00 2001 From: Dmitry Shemetov Date: Fri, 23 Jun 2023 18:34:39 -0700 Subject: [PATCH 09/11] docs(afhsb): remove afhsb from README --- docs/api/README.md | 2 -- 1 file changed, 2 deletions(-) diff --git a/docs/api/README.md b/docs/api/README.md index dd8f98d5c..709d068e0 100644 --- a/docs/api/README.md +++ b/docs/api/README.md @@ -110,7 +110,6 @@ The parameters available for each source are documented in each linked source-sp | Endpoint | Name | Description | Restricted? | | --- | --- | --- | --- | -| [`afhsb`](ahfsb.md) | AFHSB | ... | yes | | [`cdc`](cdc.md) | CDC Page Hits | ... | yes | | [`delphi`](delphi.md) | Delphi's Forecast | ... | no | | [`ecdc_ili`](ecdc_ili.md) | ECDC ILI | ECDC ILI data from the ECDC website. | no | @@ -122,7 +121,6 @@ The parameters available for each source are documented in each linked source-sp | [`ght`](ght.md) | Google Health Trends | Estimate of influenza activity based on volume of certain search queries. | yes | | [`kcdc_ili`](kcdc_ili.md) | KCDC ILI | KCDC ILI data from KCDC website. | no | | [`meta`](meta.md) | API Metadata | Metadata for `fluview`, `twitter`, `wiki`, and `delphi`. | no | -| [`meta_afhsb`](meta_afhsb.md) | AFHSB Metadata | ... | yes | | [`nidss_flu`](nidss_flu.md) | NIDSS Flu | Outpatient ILI from Taiwan's National Infectious Disease Statistics System (NIDSS). | no | | [`nowcast`](nowcast.md) | ILI Nearby | A nowcast of U.S. national, regional, and state-level (weighted) percent ILI, available seven days (regionally) or five days (state-level) before the first ILINet report for the corresponding week. | no | | [`quidel`](quidel.md) | Quidel | Data provided by Quidel Corp., which contains flu lab test results. | yes | From 92b546f3129e028153ea3ce6be8d38a0a7f10b23 Mon Sep 17 00:00:00 2001 From: Dmitry Shemetov Date: Fri, 23 Jun 2023 18:35:03 -0700 Subject: [PATCH 10/11] feat(afhsb): remove afhsb from the js client --- src/client/delphi_epidata.js | 23 ----------------------- 1 file changed, 23 deletions(-) diff --git a/src/client/delphi_epidata.js b/src/client/delphi_epidata.js index cf06ae976..117fe8949 100644 --- a/src/client/delphi_epidata.js +++ b/src/client/delphi_epidata.js @@ -123,19 +123,6 @@ version: () => { return _request('version', {}).then((r) => Object.assign(r, {client_version})); }, - /** - * Fetch AFHSB data (point data, no min/max) - */ - afhsb: (auth, locations, epiweeks, flu_types) => { - requireAll({ auth, locations, epiweeks, flu_types }); - const params = { - auth, - locations: _list(locations), - epiweeks: _list(epiweeks), - flu_types: _list(flu_types), - }; - return _request("afhsb", params); - }, /** * Fetch CDC page hits */ @@ -387,16 +374,6 @@ }; return _request("kcdc_ili", params); }, - /** - * Fetch AFHSB metadata - */ - meta_afhsb: (auth) => { - requireAll({ auth }); - const params = { - auth, - }; - return _request("meta_afhsb", params); - }, /** * Fetch NoroSTAT metadata */ From fa0bd53a095096e775d5545f4cb11a229a34a0cc Mon Sep 17 00:00:00 2001 From: Dmitry Shemetov Date: Mon, 26 Jun 2023 09:40:51 -0700 Subject: [PATCH 11/11] feat(afhsb): remove afhsb from .ts file --- src/client/delphi_epidata.d.ts | 4 ---- 1 file changed, 4 deletions(-) diff --git a/src/client/delphi_epidata.d.ts b/src/client/delphi_epidata.d.ts index f88b18247..0b81db779 100644 --- a/src/client/delphi_epidata.d.ts +++ b/src/client/delphi_epidata.d.ts @@ -20,7 +20,6 @@ declare module 'delphi_epidata' { client_version: string; version(): Promise<{version: string, client_version: string}>; - afhsb(callback: EpiDataCallback, auth: string, locations: StringParam, epiweeks: EpiRangeParam, flu_types: StringParam): Promise; cdc(callback: EpiDataCallback, auth: string, epiweeks: EpiRangeParam, locations: StringParam): Promise; covid_hosp_facility(callback: EpiDataCallback, hospital_pks: StringParam, collection_weeks: EpiRangeParam, publication_dates: EpiRangeParam): Promise; covid_hosp_facility_lookup(callback: EpiDataCallback, state?: string, ccn?: string, city?: string, zip?: string, fips_code?: string): Promise; @@ -37,7 +36,6 @@ declare module 'delphi_epidata' { gft(callback: EpiDataCallback, locations: StringParam, epiweeks: EpiRangeParam): Promise; ght(callback: EpiDataCallback, auth: string, locations: StringParam, epiweeks: EpiRangeParam, query: string): Promise; kcdc_ili(callback: EpiDataCallback, regions: StringParam, epiweeks: EpiRangeParam, issues?: EpiRangeParam, lag?: number): Promise; - meta_afhsb(callback: EpiDataCallback, auth: string): Promise; meta_norostat(callback: EpiDataCallback, auth: string): Promise; meta(callback: EpiDataCallback): Promise; nidss_dengue(callback: EpiDataCallback, locations: StringParam, epiweeks: EpiRangeParam): Promise; @@ -61,7 +59,6 @@ declare module 'delphi_epidata' { client_version: string; version(): Promise<{ version: string, client_version: string }>; - afhsb(auth: string, locations: StringParam, epiweeks: EpiRangeParam, flu_types: StringParam): Promise; cdc(auth: string, epiweeks: EpiRangeParam, locations: StringParam): Promise; covid_hosp_facility(hospital_pks: StringParam, collection_weeks: EpiRangeParam, publication_dates: EpiRangeParam): Promise; covid_hosp_facility_lookup(state?: string, ccn?: string, city?: string, zip?: string, fips_code?: string): Promise; @@ -78,7 +75,6 @@ declare module 'delphi_epidata' { gft(locations: StringParam, epiweeks: EpiRangeParam): Promise; ght(auth: string, locations: StringParam, epiweeks: EpiRangeParam, query: string): Promise; kcdc_ili(regions: StringParam, epiweeks: EpiRangeParam, issues?: EpiRangeParam, lag?: number): Promise; - meta_afhsb(auth: string): Promise; meta_norostat(auth: string): Promise; meta(callback: EpiDataCallback): Promise; nidss_dengue(locations: StringParam, epiweeks: EpiRangeParam): Promise;