Skip to content

Deprecate parse_psm3 and parse_cams #2458

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Open
wants to merge 4 commits into
base: main
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
9 changes: 9 additions & 0 deletions docs/sphinx/source/whatsnew/v0.12.1.rst
Original file line number Diff line number Diff line change
Expand Up @@ -8,6 +8,15 @@ Breaking Changes
~~~~~~~~~~~~~~~~


Deprecations
~~~~~~~~~~~~
* The following ``parse_`` functions in :py:mod:`pvlib.iotools` are deprecated,
with the corresponding ``read_`` functions taking their place: (:issue:`2444`, :pull:``)

- :py:func:`~pvlib.iotools.parse_psm3`
- :py:func:`~pvlib.iotools.parse_cams`


Bug fixes
~~~~~~~~~

Expand Down
108 changes: 37 additions & 71 deletions pvlib/iotools/psm3.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,8 +7,8 @@
import requests
import pandas as pd
from json import JSONDecodeError
import warnings
from pvlib._deprecation import pvlibDeprecationWarning
from pvlib._deprecation import deprecated
from pvlib import tools

NSRDB_API_BASE = "https://developer.nrel.gov"
PSM_URL = NSRDB_API_BASE + "/api/nsrdb/v2/solar/psm3-2-2-download.csv"
Expand Down Expand Up @@ -127,7 +127,7 @@ def get_psm3(latitude, longitude, api_key, email, names='tmy', interval=60,
timeseries data from NREL PSM3
metadata : dict
metadata from NREL PSM3 about the record, see
:func:`pvlib.iotools.parse_psm3` for fields
:func:`pvlib.iotools.read_psm3` for fields

Raises
------
Expand All @@ -152,7 +152,7 @@ def get_psm3(latitude, longitude, api_key, email, names='tmy', interval=60,

See Also
--------
pvlib.iotools.read_psm3, pvlib.iotools.parse_psm3
pvlib.iotools.read_psm3

References
----------
Expand Down Expand Up @@ -216,12 +216,12 @@ def get_psm3(latitude, longitude, api_key, email, names='tmy', interval=60,
# the CSV is in the response content as a UTF-8 bytestring
# to use pandas we need to create a file buffer from the response
fbuf = io.StringIO(response.content.decode('utf-8'))
return parse_psm3(fbuf, map_variables)
return read_psm3(fbuf, map_variables)


def parse_psm3(fbuf, map_variables=True):
def read_psm3(filename, map_variables=True):
"""
Parse an NSRDB PSM3 weather file (formatted as SAM CSV). The NSRDB
Read an NSRDB PSM3 weather file (formatted as SAM CSV). The NSRDB
is described in [1]_ and the SAM CSV format is described in [2]_.

.. versionchanged:: 0.9.0
Expand All @@ -231,8 +231,8 @@ def parse_psm3(fbuf, map_variables=True):

Parameters
----------
fbuf: file-like object
File-like object containing data to read.
filename: str, path-like, or buffer
Filename or in-memory buffer of a file containing data to read.
map_variables: bool, default True
When true, renames columns of the Dataframe to pvlib variable names
where applicable. See variable :const:`VARIABLE_MAP`.
Expand Down Expand Up @@ -302,12 +302,15 @@ def parse_psm3(fbuf, map_variables=True):
Examples
--------
>>> # Read a local PSM3 file:
>>> df, metadata = iotools.read_psm3("data.csv") # doctest: +SKIP

>>> # Read a file object or an in-memory buffer:
>>> with open(filename, 'r') as f: # doctest: +SKIP
... df, metadata = iotools.parse_psm3(f) # doctest: +SKIP
... df, metadata = iotools.read_psm3(f) # doctest: +SKIP

See Also
--------
pvlib.iotools.read_psm3, pvlib.iotools.get_psm3
pvlib.iotools.get_psm3

References
----------
Expand All @@ -316,34 +319,35 @@ def parse_psm3(fbuf, map_variables=True):
.. [2] `Standard Time Series Data File Format
<https://web.archive.org/web/20170207203107/https://sam.nrel.gov/sites/default/files/content/documents/pdf/wfcsv.pdf>`_
"""
# The first 2 lines of the response are headers with metadata
metadata_fields = fbuf.readline().split(',')
metadata_fields[-1] = metadata_fields[-1].strip() # strip trailing newline
metadata_values = fbuf.readline().split(',')
metadata_values[-1] = metadata_values[-1].strip() # strip trailing newline
with tools._file_context_manager(filename) as fbuf:
# The first 2 lines of the response are headers with metadata
metadata_fields = fbuf.readline().split(',')
metadata_values = fbuf.readline().split(',')
# get the column names so we can set the dtypes
columns = fbuf.readline().split(',')
columns[-1] = columns[-1].strip() # strip trailing newline
# Since the header has so many columns, excel saves blank cols in the
# data below the header lines.
columns = [col for col in columns if col != '']
dtypes = dict.fromkeys(columns, float) # all floats except datevec
dtypes.update({'Year': int, 'Month': int, 'Day': int, 'Hour': int,
'Minute': int, 'Cloud Type': int, 'Fill Flag': int})
data = pd.read_csv(
fbuf, header=None, names=columns, usecols=columns, dtype=dtypes,
delimiter=',', lineterminator='\n') # skip carriage returns \r

metadata_fields[-1] = metadata_fields[-1].strip() # trailing newline
metadata_values[-1] = metadata_values[-1].strip() # trailing newline
metadata = dict(zip(metadata_fields, metadata_values))
# the response is all strings, so set some metadata types to numbers
metadata['Local Time Zone'] = int(metadata['Local Time Zone'])
metadata['Time Zone'] = int(metadata['Time Zone'])
metadata['Latitude'] = float(metadata['Latitude'])
metadata['Longitude'] = float(metadata['Longitude'])
metadata['Elevation'] = int(metadata['Elevation'])
# get the column names so we can set the dtypes
columns = fbuf.readline().split(',')
columns[-1] = columns[-1].strip() # strip trailing newline
# Since the header has so many columns, excel saves blank cols in the
# data below the header lines.
columns = [col for col in columns if col != '']
dtypes = dict.fromkeys(columns, float) # all floats except datevec
dtypes.update(Year=int, Month=int, Day=int, Hour=int, Minute=int)
dtypes['Cloud Type'] = int
dtypes['Fill Flag'] = int
data = pd.read_csv(
fbuf, header=None, names=columns, usecols=columns, dtype=dtypes,
delimiter=',', lineterminator='\n') # skip carriage returns \r

# the response 1st 5 columns are a date vector, convert to datetime
dtidx = pd.to_datetime(
data[['Year', 'Month', 'Day', 'Hour', 'Minute']])
dtidx = pd.to_datetime(data[['Year', 'Month', 'Day', 'Hour', 'Minute']])
# in USA all timezones are integers
tz = 'Etc/GMT%+d' % -metadata['Time Zone']
data.index = pd.DatetimeIndex(dtidx).tz_localize(tz)
Expand All @@ -357,43 +361,5 @@ def parse_psm3(fbuf, map_variables=True):
return data, metadata


def read_psm3(filename, map_variables=True):
"""
Read an NSRDB PSM3 weather file (formatted as SAM CSV). The NSRDB
is described in [1]_ and the SAM CSV format is described in [2]_.

.. versionchanged:: 0.9.0
The function now returns a tuple where the first element is a dataframe
and the second element is a dictionary containing metadata. Previous
versions of this function had the return values switched.

Parameters
----------
filename: str
Filename of a file containing data to read.
map_variables: bool, default True
When true, renames columns of the Dataframe to pvlib variable names
where applicable. See variable :const:`VARIABLE_MAP`.

Returns
-------
data : pandas.DataFrame
timeseries data from NREL PSM3
metadata : dict
metadata from NREL PSM3 about the record, see
:func:`pvlib.iotools.parse_psm3` for fields

See Also
--------
pvlib.iotools.parse_psm3, pvlib.iotools.get_psm3

References
----------
.. [1] `NREL National Solar Radiation Database (NSRDB)
<https://nsrdb.nrel.gov/>`_
.. [2] `Standard Time Series Data File Format
<https://web.archive.org/web/20170207203107/https://sam.nrel.gov/sites/default/files/content/documents/pdf/wfcsv.pdf>`_
"""
with open(str(filename), 'r') as fbuf:
content = parse_psm3(fbuf, map_variables)
return content
parse_psm3 = deprecated(since="0.12.1", name="parse_psm3",
alternative="read_psm3")(read_psm3)
92 changes: 32 additions & 60 deletions pvlib/iotools/sodapro.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,7 +7,9 @@
import requests
import io
import warnings
from pvlib import tools

from pvlib._deprecation import deprecated

URL = 'api.soda-solardata.com'

Expand Down Expand Up @@ -145,7 +147,7 @@ def get_cams(latitude, longitude, start, end, email, identifier='mcclear',

See Also
--------
pvlib.iotools.read_cams, pvlib.iotools.parse_cams
pvlib.iotools.read_cams

Raises
------
Expand Down Expand Up @@ -231,20 +233,22 @@ def get_cams(latitude, longitude, start, end, email, identifier='mcclear',
# Successful requests returns a csv data file
else:
fbuf = io.StringIO(res.content.decode('utf-8'))
data, metadata = parse_cams(fbuf, integrated=integrated, label=label,
map_variables=map_variables)
data, metadata = read_cams(fbuf, integrated=integrated, label=label,
map_variables=map_variables)
return data, metadata


def parse_cams(fbuf, integrated=False, label=None, map_variables=True):
def read_cams(filename, integrated=False, label=None, map_variables=True):
"""
Parse a file-like buffer with data in the format of a CAMS Radiation or
McClear file. The CAMS solar radiation services are described in [1]_.
Read a file or file-like buffer with data in the format of a CAMS
Radiation or McClear file.

The CAMS solar radiation services are described in [1]_.

Parameters
----------
fbuf: file-like object
File-like object containing data to read.
filename: str, path-like, or buffer
Filename or in-memory buffer of a file containing data to read.
integrated: boolean, default False
Whether to return radiation parameters as integrated values (Wh/m^2)
or as average irradiance values (W/m^2) (pvlib preferred units)
Expand All @@ -264,23 +268,31 @@ def parse_cams(fbuf, integrated=False, label=None, map_variables=True):

See Also
--------
pvlib.iotools.read_cams, pvlib.iotools.get_cams
pvlib.iotools.get_cams

References
----------
.. [1] `CAMS solar radiation documentation
<https://atmosphere.copernicus.eu/solar-radiation>`_
"""
metadata = {}
# Initial lines starting with # contain metadata
while True:
line = fbuf.readline().rstrip('\n')
if line.startswith('# Observation period'):
# The last line of the metadata section contains the column names
names = line.lstrip('# ').split(';')
break # End of metadata section has been reached
elif ': ' in line:
metadata[line.split(': ')[0].lstrip('# ')] = line.split(': ')[1]

with tools._file_context_manager(filename) as fbuf:

# Initial lines starting with # contain metadata
while True:
line = fbuf.readline().rstrip('\n')
if line.startswith('# Observation period'):
# The last line of the metadata section has the column names
names = line.lstrip('# ').split(';')
break # End of metadata section has been reached
elif ': ' in line:
key = line.split(': ')[0].lstrip('# ')
value = line.split(': ')[1]
metadata[key] = value

data = pd.read_csv(fbuf, sep=';', comment='#', header=None,
names=names)

# Convert latitude, longitude, and altitude values from strings to floats
for k_old in list(metadata.keys()):
Expand All @@ -296,8 +308,6 @@ def parse_cams(fbuf, integrated=False, label=None, map_variables=True):
metadata['Summarization (integration) period']]
metadata['time_step'] = time_step

data = pd.read_csv(fbuf, sep=';', comment='#', header=None, names=names)

obs_period = data['Observation period'].str.split('/')

# Set index as the start observation time (left) and localize to UTC
Expand Down Expand Up @@ -336,43 +346,5 @@ def parse_cams(fbuf, integrated=False, label=None, map_variables=True):
return data, metadata


def read_cams(filename, integrated=False, label=None, map_variables=True):
"""
Read a CAMS Radiation or McClear file into a pandas DataFrame.

CAMS Radiation and McClear are described in [1]_.

Parameters
----------
filename: str
Filename of a file containing data to read.
integrated: boolean, default False
Whether to return radiation parameters as integrated values (Wh/m^2)
or as average irradiance values (W/m^2) (pvlib preferred units)
label : {'right', 'left}, optional
Which bin edge label to label time-step with. The default is 'left' for
all time steps except for '1M' which has a default of 'right'.
map_variables: bool, default: True
When true, renames columns of the Dataframe to pvlib variable names
where applicable. See variable :const:`VARIABLE_MAP`.

Returns
-------
data: pandas.DataFrame
Timeseries data from CAMS Radiation or McClear.
See :func:`pvlib.iotools.get_cams` for fields.
metadata: dict
Metadata available in the file.

See Also
--------
pvlib.iotools.parse_cams, pvlib.iotools.get_cams

References
----------
.. [1] `CAMS solar radiation documentation
<https://atmosphere.copernicus.eu/solar-radiation>`_
"""
with open(str(filename), 'r') as fbuf:
content = parse_cams(fbuf, integrated, label, map_variables)
return content
parse_cams = deprecated(since="0.12.1", name="parse_cams",
alternative="read_cams")(read_cams)
13 changes: 11 additions & 2 deletions tests/iotools/test_psm3.py
Original file line number Diff line number Diff line change
Expand Up @@ -16,6 +16,8 @@
from requests import HTTPError
from io import StringIO

from pvlib._deprecation import pvlibDeprecationWarning


TMY_TEST_DATA = TESTS_DATA_DIR / 'test_psm3_tmy-2017.csv'
YEAR_TEST_DATA = TESTS_DATA_DIR / 'test_psm3_2017.csv'
Expand Down Expand Up @@ -130,7 +132,7 @@ def test_get_psm3_tmy_errors(

@pytest.fixture
def io_input(request):
"""file-like object for parse_psm3"""
"""file-like object for read_psm3"""
with MANUAL_TEST_DATA.open() as f:
data = f.read()
obj = StringIO(data)
Expand All @@ -139,7 +141,8 @@ def io_input(request):

def test_parse_psm3(io_input):
"""test parse_psm3"""
data, metadata = psm3.parse_psm3(io_input, map_variables=False)
with pytest.warns(pvlibDeprecationWarning, match='Use read_psm3 instead'):
data, metadata = psm3.parse_psm3(io_input, map_variables=False)
expected = pd.read_csv(YEAR_TEST_DATA)
assert_psm3_equal(data, metadata, expected)

Expand All @@ -151,6 +154,12 @@ def test_read_psm3():
assert_psm3_equal(data, metadata, expected)


def test_read_psm3_buffer(io_input):
data, metadata = psm3.read_psm3(io_input, map_variables=False)
expected = pd.read_csv(YEAR_TEST_DATA)
assert_psm3_equal(data, metadata, expected)


def test_read_psm3_map_variables():
"""test read_psm3 map_variables=True"""
data, metadata = psm3.read_psm3(MANUAL_TEST_DATA, map_variables=True)
Expand Down
Loading
Loading