|
| 1 | +"""Functions to read NREL MIDC data. |
| 2 | +""" |
| 3 | +from functools import partial |
| 4 | +import pandas as pd |
| 5 | + |
| 6 | +# VARIABLE_MAP is a dictionary mapping partial MIDC field names to their |
| 7 | +# pvlib names. See docstring of read_midc for description. |
| 8 | + |
| 9 | +VARIABLE_MAP = { |
| 10 | + 'Direct': 'dni', |
| 11 | + 'Global': 'ghi', |
| 12 | + 'Diffuse': 'dhi', |
| 13 | + 'Airmass': 'airmass', |
| 14 | + 'Azimuth Angle': 'solar_azimuth', |
| 15 | + 'Zenith Angle': 'solar_zenith', |
| 16 | + 'Air Temperature': 'temp_air', |
| 17 | + 'Temperature': 'temp_air', |
| 18 | + 'Dew Point Temp': 'temp_dew', |
| 19 | + 'Relative Humidity': 'relative_humidity', |
| 20 | +} |
| 21 | + |
| 22 | +# Maps problematic timezones to 'Etc/GMT' for parsing. |
| 23 | + |
| 24 | +TZ_MAP = { |
| 25 | + 'PST': 'Etc/GMT+8', |
| 26 | + 'CST': 'Etc/GMT+6', |
| 27 | +} |
| 28 | + |
| 29 | + |
| 30 | +def map_midc_to_pvlib(variable_map, field_name): |
| 31 | + """A mapper function to rename Dataframe columns to their pvlib counterparts. |
| 32 | +
|
| 33 | + Parameters |
| 34 | + ---------- |
| 35 | + variable_map: Dictionary |
| 36 | + A dictionary for mapping MIDC field name to pvlib name. See |
| 37 | + VARIABLE_MAP for default value and description of how to construct |
| 38 | + this argument. |
| 39 | + field_name: string |
| 40 | + The Column to map. |
| 41 | +
|
| 42 | + Returns |
| 43 | + ------- |
| 44 | + label: string |
| 45 | + The pvlib variable name associated with the MIDC field or the input if |
| 46 | + a mapping does not exist. |
| 47 | +
|
| 48 | + Notes |
| 49 | + ----- |
| 50 | + Will fail if field_name to be mapped matches an entry in VARIABLE_MAP and |
| 51 | + does not contain brackets. This should not be an issue unless MIDC file |
| 52 | + headers are updated. |
| 53 | +
|
| 54 | + """ |
| 55 | + new_field_name = field_name |
| 56 | + for midc_name, pvlib_name in variable_map.items(): |
| 57 | + if field_name.startswith(midc_name): |
| 58 | + # extract the instrument and units field and then remove units |
| 59 | + instrument_units = field_name[len(midc_name):] |
| 60 | + units_index = instrument_units.find('[') |
| 61 | + instrument = instrument_units[:units_index - 1] |
| 62 | + new_field_name = pvlib_name + instrument.replace(' ', '_') |
| 63 | + break |
| 64 | + return new_field_name |
| 65 | + |
| 66 | + |
| 67 | +def format_index(data): |
| 68 | + """Create DatetimeIndex for the Dataframe localized to the timezone provided |
| 69 | + as the label of the second (time) column. |
| 70 | +
|
| 71 | + Parameters |
| 72 | + ---------- |
| 73 | + data: Dataframe |
| 74 | + Must contain 'DATE (MM/DD/YYYY)' column, second column must be labeled |
| 75 | + with the timezone and contain times in 'HH:MM' format. |
| 76 | +
|
| 77 | + Returns |
| 78 | + ------- |
| 79 | + data: Dataframe |
| 80 | + Dataframe with DatetimeIndex localized to the provided timezone. |
| 81 | + """ |
| 82 | + tz_raw = data.columns[1] |
| 83 | + timezone = TZ_MAP.get(tz_raw, tz_raw) |
| 84 | + datetime = data['DATE (MM/DD/YYYY)'] + data[tz_raw] |
| 85 | + datetime = pd.to_datetime(datetime, format='%m/%d/%Y%H:%M') |
| 86 | + data = data.set_index(datetime) |
| 87 | + data = data.tz_localize(timezone) |
| 88 | + return data |
| 89 | + |
| 90 | + |
| 91 | +def format_index_raw(data): |
| 92 | + """Create DatetimeIndex for the Dataframe localized to the timezone provided |
| 93 | + as the label of the third column. |
| 94 | +
|
| 95 | + Parameters |
| 96 | + ---------- |
| 97 | + data: Dataframe |
| 98 | + Must contain columns 'Year' and 'DOY'. Timezone must be found as the |
| 99 | + label of the third (time) column. |
| 100 | +
|
| 101 | + Returns |
| 102 | + ------- |
| 103 | + data: Dataframe |
| 104 | + The data with a Datetime index localized to the provided timezone. |
| 105 | + """ |
| 106 | + tz_raw = data.columns[3] |
| 107 | + timezone = TZ_MAP.get(tz_raw, tz_raw) |
| 108 | + year = data.Year.apply(str) |
| 109 | + jday = data.DOY.apply(lambda x: '{:03d}'.format(x)) |
| 110 | + time = data[tz_raw].apply(lambda x: '{:04d}'.format(x)) |
| 111 | + index = pd.to_datetime(year + jday + time, format="%Y%j%H%M") |
| 112 | + data = data.set_index(index) |
| 113 | + data = data.tz_localize(timezone) |
| 114 | + return data |
| 115 | + |
| 116 | + |
| 117 | +def read_midc(filename, variable_map=VARIABLE_MAP, raw_data=False): |
| 118 | + """Read in National Renewable Energy Laboratory Measurement and |
| 119 | + Instrumentation Data Center [1]_ weather data. |
| 120 | +
|
| 121 | + Parameters |
| 122 | + ---------- |
| 123 | + filename: string |
| 124 | + Filename or url of data to read. |
| 125 | + variable_map: dictionary |
| 126 | + Dictionary for mapping MIDC field names to pvlib names. See variable |
| 127 | + `VARIABLE_MAP` for default and Notes section below for a description of |
| 128 | + its format. |
| 129 | + raw_data: boolean |
| 130 | + Set to true to use format_index_raw to correctly format the date/time |
| 131 | + columns of MIDC raw data files. |
| 132 | +
|
| 133 | + Returns |
| 134 | + ------- |
| 135 | + data: Dataframe |
| 136 | + A dataframe with DatetimeIndex localized to the provided timezone. |
| 137 | +
|
| 138 | + Notes |
| 139 | + ----- |
| 140 | + Keys of the `variable_map` dictionary should include the first part |
| 141 | + of a MIDC field name which indicates the variable being measured. |
| 142 | +
|
| 143 | + e.g. 'Global PSP [W/m^2]' is entered as a key of 'Global' |
| 144 | +
|
| 145 | + The 'PSP' indicating instrument is appended to the pvlib variable name |
| 146 | + after mapping to differentiate measurements of the same variable. For a |
| 147 | + full list of pvlib variable names see the `Variable Style Rules |
| 148 | + <https://pvlib-python.readthedocs.io/en/latest/variables_style_rules.html>`_. |
| 149 | +
|
| 150 | + Be sure to check the units for the variables you will use on the |
| 151 | + `MIDC site <https://midcdmz.nrel.gov/>`_. |
| 152 | +
|
| 153 | + References |
| 154 | + ---------- |
| 155 | + .. [1] NREL: Measurement and Instrumentation Data Center |
| 156 | + `https://midcdmz.nrel.gov/ <https://midcdmz.nrel.gov/>`_ |
| 157 | + """ |
| 158 | + data = pd.read_csv(filename) |
| 159 | + if raw_data: |
| 160 | + data = format_index_raw(data) |
| 161 | + else: |
| 162 | + data = format_index(data) |
| 163 | + mapper = partial(map_midc_to_pvlib, variable_map) |
| 164 | + data = data.rename(columns=mapper) |
| 165 | + return data |
| 166 | + |
| 167 | + |
| 168 | +def read_midc_raw_data_from_nrel(site, start, end): |
| 169 | + """Request and read MIDC data directly from the raw data api. |
| 170 | +
|
| 171 | + Parameters |
| 172 | + ---------- |
| 173 | + site: string |
| 174 | + The MIDC station id. |
| 175 | + start: datetime |
| 176 | + Start date for requested data. |
| 177 | + end: datetime |
| 178 | + End date for requested data. |
| 179 | +
|
| 180 | + Returns |
| 181 | + ------- |
| 182 | + data: |
| 183 | + Dataframe with DatetimeIndex localized to the station location. |
| 184 | +
|
| 185 | + Notes |
| 186 | + ----- |
| 187 | + Requests spanning an instrumentation change will yield an error. See the |
| 188 | + MIDC raw data api page here_ for more details and considerations. |
| 189 | + .. _here: https://midcdmz.nrel.gov/apps/data_api_doc.pl?_idtextlist |
| 190 | + """ |
| 191 | + args = {'site': site, |
| 192 | + 'begin': start.strftime('%Y%m%d'), |
| 193 | + 'end': end.strftime('%Y%m%d')} |
| 194 | + endpoint = 'https://midcdmz.nrel.gov/apps/data_api.pl?' |
| 195 | + url = endpoint + '&'.join(['{}={}'.format(k, v) for k, v in args.items()]) |
| 196 | + return read_midc(url, raw_data=True) |
0 commit comments