diff --git a/docs/sphinx/source/whatsnew/v0.12.1.rst b/docs/sphinx/source/whatsnew/v0.12.1.rst index feac3c3a05..223c037f75 100644 --- a/docs/sphinx/source/whatsnew/v0.12.1.rst +++ b/docs/sphinx/source/whatsnew/v0.12.1.rst @@ -8,6 +8,15 @@ Breaking Changes ~~~~~~~~~~~~~~~~ +Deprecations +~~~~~~~~~~~~ +* The following ``parse_`` functions in :py:mod:`pvlib.iotools` are deprecated, + with the corresponding ``read_`` functions taking their place: (:issue:`2444`, :pull:`2458`) + + - :py:func:`~pvlib.iotools.parse_psm3` + - :py:func:`~pvlib.iotools.parse_cams` + + Bug fixes ~~~~~~~~~ * :py:func:`pvlib.iotools.get_pvgis_tmy` now returns the correct dtypes when diff --git a/pvlib/iotools/psm3.py b/pvlib/iotools/psm3.py index 92ebbeb734..34eb35ca33 100644 --- a/pvlib/iotools/psm3.py +++ b/pvlib/iotools/psm3.py @@ -7,8 +7,8 @@ import requests import pandas as pd from json import JSONDecodeError -import warnings -from pvlib._deprecation import pvlibDeprecationWarning +from pvlib._deprecation import deprecated +from pvlib import tools NSRDB_API_BASE = "https://developer.nrel.gov" PSM_URL = NSRDB_API_BASE + "/api/nsrdb/v2/solar/psm3-2-2-download.csv" @@ -127,7 +127,7 @@ def get_psm3(latitude, longitude, api_key, email, names='tmy', interval=60, timeseries data from NREL PSM3 metadata : dict metadata from NREL PSM3 about the record, see - :func:`pvlib.iotools.parse_psm3` for fields + :func:`pvlib.iotools.read_psm3` for fields Raises ------ @@ -152,7 +152,7 @@ def get_psm3(latitude, longitude, api_key, email, names='tmy', interval=60, See Also -------- - pvlib.iotools.read_psm3, pvlib.iotools.parse_psm3 + pvlib.iotools.read_psm3 References ---------- @@ -216,12 +216,12 @@ def get_psm3(latitude, longitude, api_key, email, names='tmy', interval=60, # the CSV is in the response content as a UTF-8 bytestring # to use pandas we need to create a file buffer from the response fbuf = io.StringIO(response.content.decode('utf-8')) - return parse_psm3(fbuf, map_variables) + return read_psm3(fbuf, map_variables) -def parse_psm3(fbuf, map_variables=True): +def read_psm3(filename, map_variables=True): """ - Parse an NSRDB PSM3 weather file (formatted as SAM CSV). The NSRDB + Read an NSRDB PSM3 weather file (formatted as SAM CSV). The NSRDB is described in [1]_ and the SAM CSV format is described in [2]_. .. versionchanged:: 0.9.0 @@ -231,8 +231,8 @@ def parse_psm3(fbuf, map_variables=True): Parameters ---------- - fbuf: file-like object - File-like object containing data to read. + filename: str, path-like, or buffer + Filename or in-memory buffer of a file containing data to read. map_variables: bool, default True When true, renames columns of the Dataframe to pvlib variable names where applicable. See variable :const:`VARIABLE_MAP`. @@ -302,12 +302,15 @@ def parse_psm3(fbuf, map_variables=True): Examples -------- >>> # Read a local PSM3 file: + >>> df, metadata = iotools.read_psm3("data.csv") # doctest: +SKIP + + >>> # Read a file object or an in-memory buffer: >>> with open(filename, 'r') as f: # doctest: +SKIP - ... df, metadata = iotools.parse_psm3(f) # doctest: +SKIP + ... df, metadata = iotools.read_psm3(f) # doctest: +SKIP See Also -------- - pvlib.iotools.read_psm3, pvlib.iotools.get_psm3 + pvlib.iotools.get_psm3 References ---------- @@ -316,11 +319,25 @@ def parse_psm3(fbuf, map_variables=True): .. [2] `Standard Time Series Data File Format `_ """ - # The first 2 lines of the response are headers with metadata - metadata_fields = fbuf.readline().split(',') - metadata_fields[-1] = metadata_fields[-1].strip() # strip trailing newline - metadata_values = fbuf.readline().split(',') - metadata_values[-1] = metadata_values[-1].strip() # strip trailing newline + with tools._file_context_manager(filename) as fbuf: + # The first 2 lines of the response are headers with metadata + metadata_fields = fbuf.readline().split(',') + metadata_values = fbuf.readline().split(',') + # get the column names so we can set the dtypes + columns = fbuf.readline().split(',') + columns[-1] = columns[-1].strip() # strip trailing newline + # Since the header has so many columns, excel saves blank cols in the + # data below the header lines. + columns = [col for col in columns if col != ''] + dtypes = dict.fromkeys(columns, float) # all floats except datevec + dtypes.update({'Year': int, 'Month': int, 'Day': int, 'Hour': int, + 'Minute': int, 'Cloud Type': int, 'Fill Flag': int}) + data = pd.read_csv( + fbuf, header=None, names=columns, usecols=columns, dtype=dtypes, + delimiter=',', lineterminator='\n') # skip carriage returns \r + + metadata_fields[-1] = metadata_fields[-1].strip() # trailing newline + metadata_values[-1] = metadata_values[-1].strip() # trailing newline metadata = dict(zip(metadata_fields, metadata_values)) # the response is all strings, so set some metadata types to numbers metadata['Local Time Zone'] = int(metadata['Local Time Zone']) @@ -328,22 +345,9 @@ def parse_psm3(fbuf, map_variables=True): metadata['Latitude'] = float(metadata['Latitude']) metadata['Longitude'] = float(metadata['Longitude']) metadata['Elevation'] = int(metadata['Elevation']) - # get the column names so we can set the dtypes - columns = fbuf.readline().split(',') - columns[-1] = columns[-1].strip() # strip trailing newline - # Since the header has so many columns, excel saves blank cols in the - # data below the header lines. - columns = [col for col in columns if col != ''] - dtypes = dict.fromkeys(columns, float) # all floats except datevec - dtypes.update(Year=int, Month=int, Day=int, Hour=int, Minute=int) - dtypes['Cloud Type'] = int - dtypes['Fill Flag'] = int - data = pd.read_csv( - fbuf, header=None, names=columns, usecols=columns, dtype=dtypes, - delimiter=',', lineterminator='\n') # skip carriage returns \r + # the response 1st 5 columns are a date vector, convert to datetime - dtidx = pd.to_datetime( - data[['Year', 'Month', 'Day', 'Hour', 'Minute']]) + dtidx = pd.to_datetime(data[['Year', 'Month', 'Day', 'Hour', 'Minute']]) # in USA all timezones are integers tz = 'Etc/GMT%+d' % -metadata['Time Zone'] data.index = pd.DatetimeIndex(dtidx).tz_localize(tz) @@ -357,43 +361,5 @@ def parse_psm3(fbuf, map_variables=True): return data, metadata -def read_psm3(filename, map_variables=True): - """ - Read an NSRDB PSM3 weather file (formatted as SAM CSV). The NSRDB - is described in [1]_ and the SAM CSV format is described in [2]_. - - .. versionchanged:: 0.9.0 - The function now returns a tuple where the first element is a dataframe - and the second element is a dictionary containing metadata. Previous - versions of this function had the return values switched. - - Parameters - ---------- - filename: str - Filename of a file containing data to read. - map_variables: bool, default True - When true, renames columns of the Dataframe to pvlib variable names - where applicable. See variable :const:`VARIABLE_MAP`. - - Returns - ------- - data : pandas.DataFrame - timeseries data from NREL PSM3 - metadata : dict - metadata from NREL PSM3 about the record, see - :func:`pvlib.iotools.parse_psm3` for fields - - See Also - -------- - pvlib.iotools.parse_psm3, pvlib.iotools.get_psm3 - - References - ---------- - .. [1] `NREL National Solar Radiation Database (NSRDB) - `_ - .. [2] `Standard Time Series Data File Format - `_ - """ - with open(str(filename), 'r') as fbuf: - content = parse_psm3(fbuf, map_variables) - return content +parse_psm3 = deprecated(since="0.12.1", name="parse_psm3", + alternative="read_psm3")(read_psm3) diff --git a/pvlib/iotools/sodapro.py b/pvlib/iotools/sodapro.py index 292c8b477a..524db942ac 100644 --- a/pvlib/iotools/sodapro.py +++ b/pvlib/iotools/sodapro.py @@ -7,7 +7,9 @@ import requests import io import warnings +from pvlib import tools +from pvlib._deprecation import deprecated URL = 'api.soda-solardata.com' @@ -151,7 +153,7 @@ def get_cams(latitude, longitude, start, end, email, identifier='mcclear', See Also -------- - pvlib.iotools.read_cams, pvlib.iotools.parse_cams + pvlib.iotools.read_cams Raises ------ @@ -239,20 +241,22 @@ def get_cams(latitude, longitude, start, end, email, identifier='mcclear', # Successful requests returns a csv data file else: fbuf = io.StringIO(res.content.decode('utf-8')) - data, metadata = parse_cams(fbuf, integrated=integrated, label=label, - map_variables=map_variables) + data, metadata = read_cams(fbuf, integrated=integrated, label=label, + map_variables=map_variables) return data, metadata -def parse_cams(fbuf, integrated=False, label=None, map_variables=True): +def read_cams(filename, integrated=False, label=None, map_variables=True): """ - Parse a file-like buffer with data in the format of a CAMS Radiation or - McClear file. The CAMS solar radiation services are described in [1]_. + Read a file or file-like buffer with data in the format of a CAMS + Radiation or McClear file. + + The CAMS solar radiation services are described in [1]_. Parameters ---------- - fbuf: file-like object - File-like object containing data to read. + filename: str, path-like, or buffer + Filename or in-memory buffer of a file containing data to read. integrated: boolean, default False Whether to return radiation parameters as integrated values (Wh/m^2) or as average irradiance values (W/m^2) (pvlib preferred units) @@ -272,7 +276,7 @@ def parse_cams(fbuf, integrated=False, label=None, map_variables=True): See Also -------- - pvlib.iotools.read_cams, pvlib.iotools.get_cams + pvlib.iotools.get_cams References ---------- @@ -280,15 +284,23 @@ def parse_cams(fbuf, integrated=False, label=None, map_variables=True): `_ """ metadata = {} - # Initial lines starting with # contain metadata - while True: - line = fbuf.readline().rstrip('\n') - if line.startswith('# Observation period'): - # The last line of the metadata section contains the column names - names = line.lstrip('# ').split(';') - break # End of metadata section has been reached - elif ': ' in line: - metadata[line.split(': ')[0].lstrip('# ')] = line.split(': ')[1] + + with tools._file_context_manager(filename) as fbuf: + + # Initial lines starting with # contain metadata + while True: + line = fbuf.readline().rstrip('\n') + if line.startswith('# Observation period'): + # The last line of the metadata section has the column names + names = line.lstrip('# ').split(';') + break # End of metadata section has been reached + elif ': ' in line: + key = line.split(': ')[0].lstrip('# ') + value = line.split(': ')[1] + metadata[key] = value + + data = pd.read_csv(fbuf, sep=';', comment='#', header=None, + names=names) # Convert latitude, longitude, and altitude values from strings to floats for k_old in list(metadata.keys()): @@ -304,8 +316,6 @@ def parse_cams(fbuf, integrated=False, label=None, map_variables=True): metadata['Summarization (integration) period']] metadata['time_step'] = time_step - data = pd.read_csv(fbuf, sep=';', comment='#', header=None, names=names) - obs_period = data['Observation period'].str.split('/') # Set index as the start observation time (left) and localize to UTC @@ -344,43 +354,5 @@ def parse_cams(fbuf, integrated=False, label=None, map_variables=True): return data, metadata -def read_cams(filename, integrated=False, label=None, map_variables=True): - """ - Read a CAMS Radiation or McClear file into a pandas DataFrame. - - CAMS Radiation and McClear are described in [1]_. - - Parameters - ---------- - filename: str - Filename of a file containing data to read. - integrated: boolean, default False - Whether to return radiation parameters as integrated values (Wh/m^2) - or as average irradiance values (W/m^2) (pvlib preferred units) - label : {'right', 'left}, optional - Which bin edge label to label time-step with. The default is 'left' for - all time steps except for '1M' which has a default of 'right'. - map_variables: bool, default: True - When true, renames columns of the Dataframe to pvlib variable names - where applicable. See variable :const:`VARIABLE_MAP`. - - Returns - ------- - data: pandas.DataFrame - Timeseries data from CAMS Radiation or McClear. - See :func:`pvlib.iotools.get_cams` for fields. - metadata: dict - Metadata available in the file. - - See Also - -------- - pvlib.iotools.parse_cams, pvlib.iotools.get_cams - - References - ---------- - .. [1] `CAMS solar radiation time-series documentation. Climate Data Store. - `_ - """ - with open(str(filename), 'r') as fbuf: - content = parse_cams(fbuf, integrated, label, map_variables) - return content +parse_cams = deprecated(since="0.12.1", name="parse_cams", + alternative="read_cams")(read_cams) diff --git a/tests/iotools/test_psm3.py b/tests/iotools/test_psm3.py index 7a771ff207..39de06d234 100644 --- a/tests/iotools/test_psm3.py +++ b/tests/iotools/test_psm3.py @@ -16,6 +16,8 @@ from requests import HTTPError from io import StringIO +from pvlib._deprecation import pvlibDeprecationWarning + TMY_TEST_DATA = TESTS_DATA_DIR / 'test_psm3_tmy-2017.csv' YEAR_TEST_DATA = TESTS_DATA_DIR / 'test_psm3_2017.csv' @@ -130,7 +132,7 @@ def test_get_psm3_tmy_errors( @pytest.fixture def io_input(request): - """file-like object for parse_psm3""" + """file-like object for read_psm3""" with MANUAL_TEST_DATA.open() as f: data = f.read() obj = StringIO(data) @@ -139,7 +141,8 @@ def io_input(request): def test_parse_psm3(io_input): """test parse_psm3""" - data, metadata = psm3.parse_psm3(io_input, map_variables=False) + with pytest.warns(pvlibDeprecationWarning, match='Use read_psm3 instead'): + data, metadata = psm3.parse_psm3(io_input, map_variables=False) expected = pd.read_csv(YEAR_TEST_DATA) assert_psm3_equal(data, metadata, expected) @@ -151,6 +154,12 @@ def test_read_psm3(): assert_psm3_equal(data, metadata, expected) +def test_read_psm3_buffer(io_input): + data, metadata = psm3.read_psm3(io_input, map_variables=False) + expected = pd.read_csv(YEAR_TEST_DATA) + assert_psm3_equal(data, metadata, expected) + + def test_read_psm3_map_variables(): """test read_psm3 map_variables=True""" data, metadata = psm3.read_psm3(MANUAL_TEST_DATA, map_variables=True) diff --git a/tests/iotools/test_sodapro.py b/tests/iotools/test_sodapro.py index 93e3c39cff..9bda1d8f16 100644 --- a/tests/iotools/test_sodapro.py +++ b/tests/iotools/test_sodapro.py @@ -11,6 +11,8 @@ from tests.conftest import TESTS_DATA_DIR, assert_frame_equal +from pvlib._deprecation import pvlibDeprecationWarning + testfile_mcclear_verbose = TESTS_DATA_DIR / 'cams_mcclear_1min_verbose.csv' testfile_mcclear_monthly = TESTS_DATA_DIR / 'cams_mcclear_monthly.csv' testfile_radiation_verbose = TESTS_DATA_DIR / 'cams_radiation_1min_verbose.csv' @@ -144,7 +146,6 @@ 0.9897]]) -# @pytest.fixture def generate_expected_dataframe(values, columns, index, dtypes): """Create dataframe from arrays of values, columns and index, in order to use this dataframe to compare to. @@ -185,6 +186,12 @@ def test_read_cams_integrated_unmapped_label(): assert_frame_equal(out, expected, check_less_precise=True) +def test_parse_cams_deprecated(): + with pytest.warns(pvlibDeprecationWarning, match='Use read_cams instead'): + with open(testfile_radiation_verbose, mode="r") as fbuf: + _ = sodapro.parse_cams(fbuf) + + def test_read_cams_metadata(): _, metadata = sodapro.read_cams(testfile_mcclear_monthly, integrated=False) assert metadata['Time reference'] == 'Universal time (UT)' @@ -203,7 +210,7 @@ def test_read_cams_metadata(): values_radiation_monthly, dtypes_radiation, 'cams_radiation')]) def test_get_cams(requests_mock, testfile, index, columns, values, dtypes, identifier): - """Test that get_cams generates the correct URI request and that parse_cams + """Test that get_cams generates the correct URI request and that read_cams is being called correctly""" # Open local test file containing McClear mothly data with open(testfile, 'r') as test_file: