diff --git a/docs/sphinx/source/whatsnew/v0.10.4.rst b/docs/sphinx/source/whatsnew/v0.10.4.rst index 3cab3fc8ad..72e402c919 100644 --- a/docs/sphinx/source/whatsnew/v0.10.4.rst +++ b/docs/sphinx/source/whatsnew/v0.10.4.rst @@ -8,7 +8,8 @@ v0.10.4 (Anticipated March, 2024) Enhancements ~~~~~~~~~~~~ * Added the Huld PV model used by PVGIS (:pull:`1940`) - +* Added metadata parsing to :py:func:`~pvlib.iotools.read_solrad` to follow the standard iotools + convention of returning a tuple of (data, meta). Previously the function only returned a dataframe. (:pull:`1968`) Bug fixes ~~~~~~~~~ diff --git a/pvlib/iotools/solrad.py b/pvlib/iotools/solrad.py index e5bb05d709..6d9dde743a 100644 --- a/pvlib/iotools/solrad.py +++ b/pvlib/iotools/solrad.py @@ -1,8 +1,8 @@ -"""Functions to read data from the NOAA SOLRAD network. -""" +"""Functions to read data from the NOAA SOLRAD network.""" -import numpy as np import pandas as pd +import requests +import io # pvlib conventions BASE_HEADERS = ( @@ -49,8 +49,15 @@ def read_solrad(filename): """ - Read NOAA SOLRAD fixed-width file into pandas dataframe. The SOLRAD - network is described in [1]_ and [2]_. + Read NOAA SOLRAD fixed-width file into pandas dataframe. + + The SOLRAD network is described in [1]_ and [2]_. + + .. versionchanged:: 0.10.4 + The function now returns a tuple where the first element is a dataframe + and the second element is a dictionary containing metadata. Previous + versions of this function only returned a dataframe. + Parameters ---------- @@ -62,6 +69,8 @@ def read_solrad(filename): data: Dataframe A dataframe with DatetimeIndex and all of the variables in the file. + metadata : dict + Metadata. Notes ----- @@ -91,19 +100,29 @@ def read_solrad(filename): widths = WIDTHS dtypes = DTYPES + meta = {} + + if str(filename).startswith('ftp') or str(filename).startswith('http'): + response = requests.get(filename) + file_buffer = io.StringIO(response.content.decode()) + else: + with open(str(filename), 'r') as file_buffer: + file_buffer = io.StringIO(file_buffer.read()) + + # The first line has the name of the station, and the second gives the + # station's latitude, longitude, elevation above mean sea level in meters, + # and the displacement in hours from local standard time. + meta['station_name'] = file_buffer.readline().strip() + + meta_line = file_buffer.readline().split() + meta['latitude'] = float(meta_line[0]) + meta['longitude'] = float(meta_line[1]) + meta['altitude'] = float(meta_line[2]) + meta['TZ'] = int(meta_line[3]) + # read in data - data = pd.read_fwf(filename, header=None, skiprows=2, names=names, - widths=widths, na_values=-9999.9) - - # loop here because dtype kwarg not supported in read_fwf until 0.20 - for (col, _dtype) in zip(data.columns, dtypes): - ser = data[col].astype(_dtype) - if _dtype == 'float64': - # older verions of pandas/numpy read '-9999.9' as - # -9999.8999999999996 and fail to set nan in read_fwf, - # so manually set nan - ser = ser.where(ser > -9999, other=np.nan) - data[col] = ser + data = pd.read_fwf(file_buffer, header=None, names=names, + widths=widths, na_values=-9999.9, dtypes=dtypes) # set index # columns do not have leading 0s, so must zfill(2) to comply @@ -114,10 +133,5 @@ def read_solrad(filename): data['year'].astype(str) + dts['month'] + dts['day'] + dts['hour'] + dts['minute'], format='%Y%m%d%H%M', utc=True) data = data.set_index(dtindex) - try: - # to_datetime(utc=True) does not work in older versions of pandas - data = data.tz_localize('UTC') - except TypeError: - pass - return data + return data, meta diff --git a/pvlib/tests/iotools/test_solrad.py b/pvlib/tests/iotools/test_solrad.py index f8f97af41f..abfa5d6e31 100644 --- a/pvlib/tests/iotools/test_solrad.py +++ b/pvlib/tests/iotools/test_solrad.py @@ -5,12 +5,13 @@ import pytest from pvlib.iotools import solrad -from ..conftest import DATA_DIR, assert_frame_equal +from ..conftest import DATA_DIR, assert_frame_equal, RERUNS, RERUNS_DELAY testfile = DATA_DIR / 'abq19056.dat' testfile_mad = DATA_DIR / 'msn19056.dat' - +https_testfile = ('https://gml.noaa.gov/aftp/data/radiation/solrad/msn/' + '2019/msn19056.dat') columns = [ 'year', 'julian_day', 'month', 'day', 'hour', 'minute', 'decimal_time', @@ -87,15 +88,32 @@ 'int64', 'float64', 'int64', 'float64', 'int64', 'float64', 'int64', 'float64', 'int64', 'float64', 'float64', 'float64', 'float64', 'float64', 'float64', 'float64'] +meta = {'station_name': 'Albuquerque', 'latitude': 35.03796, + 'longitude': -106.62211, 'altitude': 1617, 'TZ': -7} +meta_mad = {'station_name': 'Madison', 'latitude': 43.07250, + 'longitude': -89.41133, 'altitude': 271, 'TZ': -6} -@pytest.mark.parametrize('testfile,index,columns,values,dtypes', [ - (testfile, index, columns, values, dtypes), - (testfile_mad, index, columns_mad, values_mad, dtypes_mad) +@pytest.mark.parametrize('testfile,index,columns,values,dtypes,meta', [ + (testfile, index, columns, values, dtypes, meta), + (testfile_mad, index, columns_mad, values_mad, dtypes_mad, meta_mad) ]) -def test_read_solrad(testfile, index, columns, values, dtypes): +def test_read_solrad(testfile, index, columns, values, dtypes, meta): expected = pd.DataFrame(values, columns=columns, index=index) for (col, _dtype) in zip(expected.columns, dtypes): expected[col] = expected[col].astype(_dtype) - out = solrad.read_solrad(testfile) + out, m = solrad.read_solrad(testfile) assert_frame_equal(out, expected) + assert m == meta + + +@pytest.mark.remote_data +@pytest.mark.flaky(reruns=RERUNS, reruns_delay=RERUNS_DELAY) +def test_read_solrad_https(): + # Test reading of https files. + # If this test begins failing, SOLRAD's data structure or data + # archive may have changed. + local_data, _ = solrad.read_solrad(testfile_mad) + remote_data, _ = solrad.read_solrad(https_testfile) + # local file only contains four rows to save space + assert_frame_equal(local_data, remote_data.iloc[:4])