Skip to content

Commit c165e94

Browse files
lboemanwholmgren
authored andcommitted
Add MIDC reader (#605)
* Add MIDC reader * fix comment style on import * typo in tests * correct rename call for compatibility with older pandas versions * Add link to variable style rules in read_midc docstring * add default variable mapping, update what's new * style fix * documentation update * fix linting errors * add test case to cover no-alter case of mapper * add raw data api index formatting and helper function for querying midc servers for data * style fixed in test_midc.py * test updates * add timezone mapping for PST, CST * typo correction
1 parent 7057afc commit c165e94

File tree

7 files changed

+3157
-0
lines changed

7 files changed

+3157
-0
lines changed

docs/sphinx/source/api.rst

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -322,6 +322,8 @@ relevant to solar energy modeling.
322322
iotools.read_srml
323323
iotools.read_srml_month_from_solardat
324324
iotools.read_surfrad
325+
iotools.read_midc
326+
iotools.read_midc_raw_data_from_nrel
325327

326328
A :py:class:`~pvlib.location.Location` object may be created from metadata
327329
in some files.

docs/sphinx/source/whatsnew/v0.6.1.rst

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -36,6 +36,8 @@ Enhancements
3636
:py:func:`pvlib.iotools.read_srml_month_from_solardat` to read University of
3737
Oregon Solar Radiation Monitoring Laboratory data. (:issue:`589`)
3838
* Created :py:func:`pvlib.iotools.read_surfrad` to read NOAA SURFRAD data. (:issue:`590`)
39+
* Created :py:func:`pvlib.iotools.read_midc` and :py:func:`pvlib.iotools.read_midc_raw_data_from_nrel`
40+
to read NREL MIDC data. (:issue:`601`)
3941

4042
Bug fixes
4143
~~~~~~~~~

pvlib/data/midc_20181014.txt

Lines changed: 1441 additions & 0 deletions
Large diffs are not rendered by default.

pvlib/data/midc_raw_20181018.txt

Lines changed: 1441 additions & 0 deletions
Large diffs are not rendered by default.

pvlib/iotools/__init__.py

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -3,3 +3,5 @@
33
from pvlib.iotools.srml import read_srml # noqa: F401
44
from pvlib.iotools.srml import read_srml_month_from_solardat # noqa: F401
55
from pvlib.iotools.surfrad import read_surfrad # noqa: F401
6+
from pvlib.iotools.midc import read_midc # noqa: F401
7+
from pvlib.iotools.midc import read_midc_raw_data_from_nrel # noqa: F401

pvlib/iotools/midc.py

Lines changed: 196 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,196 @@
1+
"""Functions to read NREL MIDC data.
2+
"""
3+
from functools import partial
4+
import pandas as pd
5+
6+
# VARIABLE_MAP is a dictionary mapping partial MIDC field names to their
7+
# pvlib names. See docstring of read_midc for description.
8+
9+
VARIABLE_MAP = {
10+
'Direct': 'dni',
11+
'Global': 'ghi',
12+
'Diffuse': 'dhi',
13+
'Airmass': 'airmass',
14+
'Azimuth Angle': 'solar_azimuth',
15+
'Zenith Angle': 'solar_zenith',
16+
'Air Temperature': 'temp_air',
17+
'Temperature': 'temp_air',
18+
'Dew Point Temp': 'temp_dew',
19+
'Relative Humidity': 'relative_humidity',
20+
}
21+
22+
# Maps problematic timezones to 'Etc/GMT' for parsing.
23+
24+
TZ_MAP = {
25+
'PST': 'Etc/GMT+8',
26+
'CST': 'Etc/GMT+6',
27+
}
28+
29+
30+
def map_midc_to_pvlib(variable_map, field_name):
31+
"""A mapper function to rename Dataframe columns to their pvlib counterparts.
32+
33+
Parameters
34+
----------
35+
variable_map: Dictionary
36+
A dictionary for mapping MIDC field name to pvlib name. See
37+
VARIABLE_MAP for default value and description of how to construct
38+
this argument.
39+
field_name: string
40+
The Column to map.
41+
42+
Returns
43+
-------
44+
label: string
45+
The pvlib variable name associated with the MIDC field or the input if
46+
a mapping does not exist.
47+
48+
Notes
49+
-----
50+
Will fail if field_name to be mapped matches an entry in VARIABLE_MAP and
51+
does not contain brackets. This should not be an issue unless MIDC file
52+
headers are updated.
53+
54+
"""
55+
new_field_name = field_name
56+
for midc_name, pvlib_name in variable_map.items():
57+
if field_name.startswith(midc_name):
58+
# extract the instrument and units field and then remove units
59+
instrument_units = field_name[len(midc_name):]
60+
units_index = instrument_units.find('[')
61+
instrument = instrument_units[:units_index - 1]
62+
new_field_name = pvlib_name + instrument.replace(' ', '_')
63+
break
64+
return new_field_name
65+
66+
67+
def format_index(data):
68+
"""Create DatetimeIndex for the Dataframe localized to the timezone provided
69+
as the label of the second (time) column.
70+
71+
Parameters
72+
----------
73+
data: Dataframe
74+
Must contain 'DATE (MM/DD/YYYY)' column, second column must be labeled
75+
with the timezone and contain times in 'HH:MM' format.
76+
77+
Returns
78+
-------
79+
data: Dataframe
80+
Dataframe with DatetimeIndex localized to the provided timezone.
81+
"""
82+
tz_raw = data.columns[1]
83+
timezone = TZ_MAP.get(tz_raw, tz_raw)
84+
datetime = data['DATE (MM/DD/YYYY)'] + data[tz_raw]
85+
datetime = pd.to_datetime(datetime, format='%m/%d/%Y%H:%M')
86+
data = data.set_index(datetime)
87+
data = data.tz_localize(timezone)
88+
return data
89+
90+
91+
def format_index_raw(data):
92+
"""Create DatetimeIndex for the Dataframe localized to the timezone provided
93+
as the label of the third column.
94+
95+
Parameters
96+
----------
97+
data: Dataframe
98+
Must contain columns 'Year' and 'DOY'. Timezone must be found as the
99+
label of the third (time) column.
100+
101+
Returns
102+
-------
103+
data: Dataframe
104+
The data with a Datetime index localized to the provided timezone.
105+
"""
106+
tz_raw = data.columns[3]
107+
timezone = TZ_MAP.get(tz_raw, tz_raw)
108+
year = data.Year.apply(str)
109+
jday = data.DOY.apply(lambda x: '{:03d}'.format(x))
110+
time = data[tz_raw].apply(lambda x: '{:04d}'.format(x))
111+
index = pd.to_datetime(year + jday + time, format="%Y%j%H%M")
112+
data = data.set_index(index)
113+
data = data.tz_localize(timezone)
114+
return data
115+
116+
117+
def read_midc(filename, variable_map=VARIABLE_MAP, raw_data=False):
118+
"""Read in National Renewable Energy Laboratory Measurement and
119+
Instrumentation Data Center [1]_ weather data.
120+
121+
Parameters
122+
----------
123+
filename: string
124+
Filename or url of data to read.
125+
variable_map: dictionary
126+
Dictionary for mapping MIDC field names to pvlib names. See variable
127+
`VARIABLE_MAP` for default and Notes section below for a description of
128+
its format.
129+
raw_data: boolean
130+
Set to true to use format_index_raw to correctly format the date/time
131+
columns of MIDC raw data files.
132+
133+
Returns
134+
-------
135+
data: Dataframe
136+
A dataframe with DatetimeIndex localized to the provided timezone.
137+
138+
Notes
139+
-----
140+
Keys of the `variable_map` dictionary should include the first part
141+
of a MIDC field name which indicates the variable being measured.
142+
143+
e.g. 'Global PSP [W/m^2]' is entered as a key of 'Global'
144+
145+
The 'PSP' indicating instrument is appended to the pvlib variable name
146+
after mapping to differentiate measurements of the same variable. For a
147+
full list of pvlib variable names see the `Variable Style Rules
148+
<https://pvlib-python.readthedocs.io/en/latest/variables_style_rules.html>`_.
149+
150+
Be sure to check the units for the variables you will use on the
151+
`MIDC site <https://midcdmz.nrel.gov/>`_.
152+
153+
References
154+
----------
155+
.. [1] NREL: Measurement and Instrumentation Data Center
156+
`https://midcdmz.nrel.gov/ <https://midcdmz.nrel.gov/>`_
157+
"""
158+
data = pd.read_csv(filename)
159+
if raw_data:
160+
data = format_index_raw(data)
161+
else:
162+
data = format_index(data)
163+
mapper = partial(map_midc_to_pvlib, variable_map)
164+
data = data.rename(columns=mapper)
165+
return data
166+
167+
168+
def read_midc_raw_data_from_nrel(site, start, end):
169+
"""Request and read MIDC data directly from the raw data api.
170+
171+
Parameters
172+
----------
173+
site: string
174+
The MIDC station id.
175+
start: datetime
176+
Start date for requested data.
177+
end: datetime
178+
End date for requested data.
179+
180+
Returns
181+
-------
182+
data:
183+
Dataframe with DatetimeIndex localized to the station location.
184+
185+
Notes
186+
-----
187+
Requests spanning an instrumentation change will yield an error. See the
188+
MIDC raw data api page here_ for more details and considerations.
189+
.. _here: https://midcdmz.nrel.gov/apps/data_api_doc.pl?_idtextlist
190+
"""
191+
args = {'site': site,
192+
'begin': start.strftime('%Y%m%d'),
193+
'end': end.strftime('%Y%m%d')}
194+
endpoint = 'https://midcdmz.nrel.gov/apps/data_api.pl?'
195+
url = endpoint + '&'.join(['{}={}'.format(k, v) for k, v in args.items()])
196+
return read_midc(url, raw_data=True)

pvlib/test/test_midc.py

Lines changed: 73 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,73 @@
1+
import inspect
2+
import os
3+
4+
import pandas as pd
5+
from pandas.util.testing import network
6+
import pytest
7+
import pytz
8+
9+
from pvlib.iotools import midc
10+
11+
12+
test_dir = os.path.dirname(
13+
os.path.abspath(inspect.getfile(inspect.currentframe())))
14+
midc_testfile = os.path.join(test_dir, '../data/midc_20181014.txt')
15+
midc_raw_testfile = os.path.join(test_dir, '../data/midc_raw_20181018.txt')
16+
midc_network_testfile = ('https://midcdmz.nrel.gov/apps/data_api.pl'
17+
'?site=UAT&begin=20181018&end=20181019')
18+
19+
20+
@pytest.mark.parametrize('field_name,expected', [
21+
('Temperature @ 2m [deg C]', 'temp_air_@_2m'),
22+
('Global PSP [W/m^2]', 'ghi_PSP'),
23+
('Temperature @ 50m [deg C]', 'temp_air_@_50m'),
24+
('Other Variable [units]', 'Other Variable [units]'),
25+
])
26+
def test_read_midc_mapper_function(field_name, expected):
27+
assert midc.map_midc_to_pvlib(midc.VARIABLE_MAP, field_name) == expected
28+
29+
30+
def test_midc_format_index():
31+
data = pd.read_csv(midc_testfile)
32+
data = midc.format_index(data)
33+
start = pd.Timestamp("20181014 00:00")
34+
start = start.tz_localize("MST")
35+
end = pd.Timestamp("20181014 23:59")
36+
end = end.tz_localize("MST")
37+
assert type(data.index) == pd.DatetimeIndex
38+
assert data.index[0] == start
39+
assert data.index[-1] == end
40+
41+
42+
def test_midc_format_index_tz_conversion():
43+
data = pd.read_csv(midc_testfile)
44+
data = data.rename(columns={'MST': 'PST'})
45+
data = midc.format_index(data)
46+
assert data.index[0].tz == pytz.timezone('Etc/GMT+8')
47+
48+
49+
def test_midc_format_index_raw():
50+
data = pd.read_csv(midc_raw_testfile)
51+
data = midc.format_index_raw(data)
52+
start = pd.Timestamp('20181018 00:00')
53+
start = start.tz_localize('MST')
54+
end = pd.Timestamp('20181018 23:59')
55+
end = end.tz_localize('MST')
56+
assert data.index[0] == start
57+
assert data.index[-1] == end
58+
59+
60+
def test_read_midc_var_mapping_as_arg():
61+
data = midc.read_midc(midc_testfile, variable_map=midc.VARIABLE_MAP)
62+
assert 'ghi_PSP' in data.columns
63+
assert 'temp_air_@_2m' in data.columns
64+
assert 'temp_air_@_50m' in data.columns
65+
66+
67+
@network
68+
def test_read_midc_raw_data_from_nrel():
69+
start_ts = pd.Timestamp('20181018')
70+
end_ts = pd.Timestamp('20181019')
71+
data = midc.read_midc_raw_data_from_nrel('UAT', start_ts, end_ts)
72+
assert 'dni_Normal' in data.columns
73+
assert data.index.size == 2880

0 commit comments

Comments
 (0)