From c0563d3198b2468983c4eb4f9b4678094ec9dcca Mon Sep 17 00:00:00 2001
From: Bennet Meyers <bennetm@stanford.edu>
Date: Sun, 17 Feb 2019 18:07:30 -0800
Subject: [PATCH 1/7] pvdaq io functions

---
 pvlib/iotools/pvdaq.py | 124 +++++++++++++++++++++++++++++++++++++++++
 1 file changed, 124 insertions(+)
 create mode 100644 pvlib/iotools/pvdaq.py

diff --git a/pvlib/iotools/pvdaq.py b/pvlib/iotools/pvdaq.py
new file mode 100644
index 0000000000..ffd14a5a18
--- /dev/null
+++ b/pvlib/iotools/pvdaq.py
@@ -0,0 +1,124 @@
+"""Functions to read NREL PVDAQ data
+"""
+
+from time import time
+from io import StringIO
+import sys
+from datetime import timedelta
+
+import requests
+import numpy as np
+import pandas as pd
+
+def get_pvdaq_data(sysid=2, api_key = 'DEMO_KEY', year=2011, delim=','):
+    """This fuction queries one or more years of raw PV system data from NREL's PVDAQ data service:
+            https://maps.nrel.gov/pvdaq/
+
+     This function uses the annual raw data file API, which is the most efficient way of accessing
+     multi-year, sub-hourly time series data.
+
+    Parameters
+    ----------
+    sysid: int
+        The system ID corresponding to the site that data should be queried from
+    api_key: string
+        Your API key (https://developer.nrel.gov/docs/api-key/)
+    year: int of list of ints
+        Either the year to request or the list of years to request. Multiple years will be concatenated
+        into a single data frame
+    delim: string
+        The deliminator used in the CSV file being requested
+
+    Returns
+    -------
+    label: pandas data frame
+        A data frame containing the tabular time series data from the PVDAQ service over the years
+        requested
+
+    """
+    # Force year to be a list of integers
+    ti = time()
+    try:
+        year = int(year)
+    except TypeError:
+        year = [int(yr) for yr in year]
+    else:
+        year = [year]
+    # Each year must queries separately, so iterate over the years and generate a list of dataframes.
+    df_list = []
+    it = 0
+    for yr in year:
+        progress(it, len(year), 'querying year {}'.format(year[it]))
+        req_params = {
+            'api_key': api_key,
+            'system_id': sysid,
+            'year': yr
+        }
+        base_url = 'https://developer.nrel.gov/api/pvdaq/v3/data_file?'
+        param_list = [str(item[0]) + '=' + str(item[1]) for item in req_params.items()]
+        req_url = base_url + '&'.join(param_list)
+        response = requests.get(req_url)
+        if int(response.status_code) != 200:
+            print('\n error: ', response.status_code)
+            return
+        df = pd.read_csv(StringIO(response.text), delimiter=delim)
+        df_list.append(df)
+        it += 1
+    tf = time()
+    progress(it, len(year), 'queries complete in {:.1f} seconds       '.format(tf - ti))
+    # concatenate the list of yearly data frames
+    df = pd.concat(df_list, axis=0, sort=True)
+    # convert index to timeseries
+    df = standardize_time_axis(df, datetimekey='Date-Time')
+    return df
+
+def standardize_time_axis(df, datetimekey='Date-Time'):
+    '''
+    This function takes in a pandas data frame containing tabular time series data, likely generated with a call to
+    pandas.read_csv(). It is assumed that each row of the data frame corresponds to a unique date-time, though not
+    necessarily on standard intervals. This function will attempt to convert a user-specified column containing time
+    stamps to python datetime objects, assign this column to the index of the data frame, and then standardize the
+    index over time. By standardize, we mean reconstruct the index to be at regular intervals, starting at midnight of
+    the first day of the data set. This solves a couple common data errors when working with raw data. (1) Missing data
+    points from skipped scans in the data acquisition system. (2) Time stamps that are at irregular exact times,
+    including fractional seconds.
+    :param df: A pandas data frame containing the tabular time series data
+    :param datetimekey: An optional key corresponding to the name of the column that contains the time stamps
+    :return: A new data frame with a standardized time axis
+    '''
+    # convert index to timeseries
+    try:
+        df[datetimekey] = pd.to_datetime(df[datetimekey])
+        df.set_index('Date-Time', inplace=True)
+    except KeyError:
+        time_cols = [col for col in df.columns if np.logical_or('Time' in col, 'time' in col)]
+        key = time_cols[0]
+        df[datetimekey] = pd.to_datetime(df[key])
+        df.set_index(datetimekey, inplace=True)
+    # standardize the timeseries axis to a regular frequency over a full set of days
+    diff = (df.index[1:] - df.index[:-1]).seconds
+    freq = int(np.median(diff))  # the number of seconds between each measurement
+    start = df.index[0]
+    end = df.index[-1]
+    time_index = pd.date_range(start=start.date(), end=end.date() + timedelta(days=1), freq='{}s'.format(freq))[:-1]
+    df = df.reindex(index=time_index, method='nearest')
+    return df.fillna(value=0)
+
+
+def progress(count, total, status=''):
+    """
+    Python command line progress bar in less than 10 lines of code. · GitHub
+    https://gist.github.com/vladignatyev/06860ec2040cb497f0f3
+    :param count: the current count, int
+    :param total: to total count, int
+    :param status: a message to display
+    :return:
+    """
+    bar_len = 60
+    filled_len = int(round(bar_len * count / float(total)))
+
+    percents = round(100.0 * count / float(total), 1)
+    bar = '=' * filled_len + '-' * (bar_len - filled_len)
+
+    sys.stdout.write('[%s] %s%s ...%s\r' % (bar, percents, '%', status))
+    sys.stdout.flush()
\ No newline at end of file

From 32bda8aa09fcd94fcb3c5f38ecc1075d02ab734c Mon Sep 17 00:00:00 2001
From: Bennet Meyers <bennetm@stanford.edu>
Date: Mon, 18 Feb 2019 10:56:18 -0800
Subject: [PATCH 2/7] fixing line length issues

---
 pvlib/iotools/pvdaq.py | 67 ++++++++++++++++++++++++++----------------
 1 file changed, 42 insertions(+), 25 deletions(-)

diff --git a/pvlib/iotools/pvdaq.py b/pvlib/iotools/pvdaq.py
index ffd14a5a18..f9fa5e8279 100644
--- a/pvlib/iotools/pvdaq.py
+++ b/pvlib/iotools/pvdaq.py
@@ -11,29 +11,30 @@
 import pandas as pd
 
 def get_pvdaq_data(sysid=2, api_key = 'DEMO_KEY', year=2011, delim=','):
-    """This fuction queries one or more years of raw PV system data from NREL's PVDAQ data service:
-            https://maps.nrel.gov/pvdaq/
+    """This fuction queries one or more years of raw PV system data from NREL's
+     PVDAQ data service: https://maps.nrel.gov/pvdaq/
 
-     This function uses the annual raw data file API, which is the most efficient way of accessing
-     multi-year, sub-hourly time series data.
+     This function uses the annual raw data file API, which is the most
+     efficient way of accessing multi-year, sub-hourly time series data.
 
     Parameters
     ----------
     sysid: int
-        The system ID corresponding to the site that data should be queried from
+        The system ID corresponding to the site that data should be
+        queried from
     api_key: string
         Your API key (https://developer.nrel.gov/docs/api-key/)
     year: int of list of ints
-        Either the year to request or the list of years to request. Multiple years will be concatenated
-        into a single data frame
+        Either the year to request or the list of years to request. Multiple
+        years will be concatenated into a single data frame
     delim: string
         The deliminator used in the CSV file being requested
 
     Returns
     -------
     label: pandas data frame
-        A data frame containing the tabular time series data from the PVDAQ service over the years
-        requested
+        A data frame containing the tabular time series data from the PVDAQ
+        service over the years requested
 
     """
     # Force year to be a list of integers
@@ -44,7 +45,8 @@ def get_pvdaq_data(sysid=2, api_key = 'DEMO_KEY', year=2011, delim=','):
         year = [int(yr) for yr in year]
     else:
         year = [year]
-    # Each year must queries separately, so iterate over the years and generate a list of dataframes.
+    # Each year must queries separately, so iterate over the years and
+    # generate a list of dataframes.
     df_list = []
     it = 0
     for yr in year:
@@ -55,7 +57,8 @@ def get_pvdaq_data(sysid=2, api_key = 'DEMO_KEY', year=2011, delim=','):
             'year': yr
         }
         base_url = 'https://developer.nrel.gov/api/pvdaq/v3/data_file?'
-        param_list = [str(item[0]) + '=' + str(item[1]) for item in req_params.items()]
+        param_list = [str(item[0]) + '=' + str(item[1])
+                      for item in req_params.items()]
         req_url = base_url + '&'.join(param_list)
         response = requests.get(req_url)
         if int(response.status_code) != 200:
@@ -65,7 +68,8 @@ def get_pvdaq_data(sysid=2, api_key = 'DEMO_KEY', year=2011, delim=','):
         df_list.append(df)
         it += 1
     tf = time()
-    progress(it, len(year), 'queries complete in {:.1f} seconds       '.format(tf - ti))
+    msg = 'queries complete in {:.1f} seconds       '.format(tf - ti)
+    progress(it, len(year), msg)
     # concatenate the list of yearly data frames
     df = pd.concat(df_list, axis=0, sort=True)
     # convert index to timeseries
@@ -74,16 +78,23 @@ def get_pvdaq_data(sysid=2, api_key = 'DEMO_KEY', year=2011, delim=','):
 
 def standardize_time_axis(df, datetimekey='Date-Time'):
     '''
-    This function takes in a pandas data frame containing tabular time series data, likely generated with a call to
-    pandas.read_csv(). It is assumed that each row of the data frame corresponds to a unique date-time, though not
-    necessarily on standard intervals. This function will attempt to convert a user-specified column containing time
-    stamps to python datetime objects, assign this column to the index of the data frame, and then standardize the
-    index over time. By standardize, we mean reconstruct the index to be at regular intervals, starting at midnight of
-    the first day of the data set. This solves a couple common data errors when working with raw data. (1) Missing data
-    points from skipped scans in the data acquisition system. (2) Time stamps that are at irregular exact times,
-    including fractional seconds.
+    This function takes in a pandas data frame containing tabular time series
+    data, likely generated with a call to pandas.read_csv(). It is assumed that
+    each row of the data frame corresponds to a unique date-time, though not
+    necessarily on standard intervals. This function will attempt to convert a
+    user-specified column containing time stamps to python datetime objects,
+    assign this column to the index of the data frame, and then standardize the
+    index over time. By standardize, we mean reconstruct the index to be at
+    regular intervals, starting at midnight of the first day of the data set.
+    This solves a couple common data errors when working with raw data.
+        (1) Missing data points from skipped scans in the data acquisition
+            system.
+        (2) Time stamps that are at irregular exact times, including fractional
+            seconds.
+
     :param df: A pandas data frame containing the tabular time series data
-    :param datetimekey: An optional key corresponding to the name of the column that contains the time stamps
+    :param datetimekey: An optional key corresponding to the name of the column
+        that contains the time stamps
     :return: A new data frame with a standardized time axis
     '''
     # convert index to timeseries
@@ -91,16 +102,22 @@ def standardize_time_axis(df, datetimekey='Date-Time'):
         df[datetimekey] = pd.to_datetime(df[datetimekey])
         df.set_index('Date-Time', inplace=True)
     except KeyError:
-        time_cols = [col for col in df.columns if np.logical_or('Time' in col, 'time' in col)]
+        time_cols = [col for col in df.columns
+                     if np.logical_or('Time' in col, 'time' in col)]
         key = time_cols[0]
         df[datetimekey] = pd.to_datetime(df[key])
         df.set_index(datetimekey, inplace=True)
-    # standardize the timeseries axis to a regular frequency over a full set of days
+    # standardize the timeseries axis to a regular frequency over
+    # a full set of days
     diff = (df.index[1:] - df.index[:-1]).seconds
-    freq = int(np.median(diff))  # the number of seconds between each measurement
+    freq = int(np.median(diff))  # the number of secs between each measurement
     start = df.index[0]
     end = df.index[-1]
-    time_index = pd.date_range(start=start.date(), end=end.date() + timedelta(days=1), freq='{}s'.format(freq))[:-1]
+    time_index = pd.date_range(
+        start=start.date(),
+        end=end.date() + timedelta(days=1),
+        freq='{}s'.format(freq)
+    )[:-1]
     df = df.reindex(index=time_index, method='nearest')
     return df.fillna(value=0)
 

From a6373c872bc7f1d19ccbaa0fb0bdcdae6f562543 Mon Sep 17 00:00:00 2001
From: Bennet Meyers <bennetm@stanford.edu>
Date: Mon, 18 Feb 2019 10:58:05 -0800
Subject: [PATCH 3/7] importing new function in package init

---
 pvlib/iotools/__init__.py | 1 +
 1 file changed, 1 insertion(+)

diff --git a/pvlib/iotools/__init__.py b/pvlib/iotools/__init__.py
index 112cc6fbcf..9f349418c5 100644
--- a/pvlib/iotools/__init__.py
+++ b/pvlib/iotools/__init__.py
@@ -7,3 +7,4 @@
 from pvlib.iotools.midc import read_midc_raw_data_from_nrel  # noqa: F401
 from pvlib.iotools.ecmwf_macc import read_ecmwf_macc  # noqa: F401
 from pvlib.iotools.ecmwf_macc import get_ecmwf_macc  # noqa: F401
+from pvlib.iotools.pvdaq import get_pvdaq_data

From 6d8eb6696b9cf0e718a6b04f9848c190d7e428bf Mon Sep 17 00:00:00 2001
From: Bennet Meyers <bennetm@stanford.edu>
Date: Mon, 18 Feb 2019 11:00:33 -0800
Subject: [PATCH 4/7] fixing blank lines

---
 pvlib/iotools/pvdaq.py | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/pvlib/iotools/pvdaq.py b/pvlib/iotools/pvdaq.py
index f9fa5e8279..992a93b125 100644
--- a/pvlib/iotools/pvdaq.py
+++ b/pvlib/iotools/pvdaq.py
@@ -10,6 +10,7 @@
 import numpy as np
 import pandas as pd
 
+
 def get_pvdaq_data(sysid=2, api_key = 'DEMO_KEY', year=2011, delim=','):
     """This fuction queries one or more years of raw PV system data from NREL's
      PVDAQ data service: https://maps.nrel.gov/pvdaq/
@@ -76,6 +77,7 @@ def get_pvdaq_data(sysid=2, api_key = 'DEMO_KEY', year=2011, delim=','):
     df = standardize_time_axis(df, datetimekey='Date-Time')
     return df
 
+
 def standardize_time_axis(df, datetimekey='Date-Time'):
     '''
     This function takes in a pandas data frame containing tabular time series

From 4ce7826e74a9eadbf705781ff62226d53c8b87e6 Mon Sep 17 00:00:00 2001
From: Bennet Meyers <bennetm@stanford.edu>
Date: Mon, 18 Feb 2019 11:02:15 -0800
Subject: [PATCH 5/7] fixing other lint issues

---
 pvlib/iotools/pvdaq.py | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/pvlib/iotools/pvdaq.py b/pvlib/iotools/pvdaq.py
index 992a93b125..3467a1c7af 100644
--- a/pvlib/iotools/pvdaq.py
+++ b/pvlib/iotools/pvdaq.py
@@ -11,7 +11,7 @@
 import pandas as pd
 
 
-def get_pvdaq_data(sysid=2, api_key = 'DEMO_KEY', year=2011, delim=','):
+def get_pvdaq_data(sysid=2, api_key='DEMO_KEY', year=2011, delim=','):
     """This fuction queries one or more years of raw PV system data from NREL's
      PVDAQ data service: https://maps.nrel.gov/pvdaq/
 
@@ -140,4 +140,4 @@ def progress(count, total, status=''):
     bar = '=' * filled_len + '-' * (bar_len - filled_len)
 
     sys.stdout.write('[%s] %s%s ...%s\r' % (bar, percents, '%', status))
-    sys.stdout.flush()
\ No newline at end of file
+    sys.stdout.flush()

From 60041ed22c90543c98aea4136a3627881dd52e81 Mon Sep 17 00:00:00 2001
From: Bennet Meyers <bennetm@stanford.edu>
Date: Mon, 18 Feb 2019 17:00:50 -0800
Subject: [PATCH 6/7] Adding a newline character after last progress bar write
 so that the progress bar is not overwritten by a later print statement in a
 user script. Also added an if __name__ == "__main__" block for testing
 purposes

---
 pvlib/iotools/pvdaq.py | 6 ++++++
 1 file changed, 6 insertions(+)

diff --git a/pvlib/iotools/pvdaq.py b/pvlib/iotools/pvdaq.py
index 3467a1c7af..4161ea19b2 100644
--- a/pvlib/iotools/pvdaq.py
+++ b/pvlib/iotools/pvdaq.py
@@ -71,6 +71,7 @@ def get_pvdaq_data(sysid=2, api_key='DEMO_KEY', year=2011, delim=','):
     tf = time()
     msg = 'queries complete in {:.1f} seconds       '.format(tf - ti)
     progress(it, len(year), msg)
+    print('\n')
     # concatenate the list of yearly data frames
     df = pd.concat(df_list, axis=0, sort=True)
     # convert index to timeseries
@@ -141,3 +142,8 @@ def progress(count, total, status=''):
 
     sys.stdout.write('[%s] %s%s ...%s\r' % (bar, percents, '%', status))
     sys.stdout.flush()
+
+
+if __name__ == "__main__":
+    df = get_pvdaq_data()
+    print(df.head())

From bebcea3eb1b6cfe48b4c21e917576202921db03e Mon Sep 17 00:00:00 2001
From: Bennet Meyers <bennetm@stanford.edu>
Date: Tue, 19 Feb 2019 18:47:06 -0800
Subject: [PATCH 7/7] allow user option for standardize_time_axis

---
 pvlib/iotools/pvdaq.py | 7 ++++---
 1 file changed, 4 insertions(+), 3 deletions(-)

diff --git a/pvlib/iotools/pvdaq.py b/pvlib/iotools/pvdaq.py
index 4161ea19b2..1c1f594e8a 100644
--- a/pvlib/iotools/pvdaq.py
+++ b/pvlib/iotools/pvdaq.py
@@ -11,7 +11,8 @@
 import pandas as pd
 
 
-def get_pvdaq_data(sysid=2, api_key='DEMO_KEY', year=2011, delim=','):
+def get_pvdaq_data(sysid=2, api_key='DEMO_KEY', year=2011, delim=',',
+                   standardize=True):
     """This fuction queries one or more years of raw PV system data from NREL's
      PVDAQ data service: https://maps.nrel.gov/pvdaq/
 
@@ -74,8 +75,8 @@ def get_pvdaq_data(sysid=2, api_key='DEMO_KEY', year=2011, delim=','):
     print('\n')
     # concatenate the list of yearly data frames
     df = pd.concat(df_list, axis=0, sort=True)
-    # convert index to timeseries
-    df = standardize_time_axis(df, datetimekey='Date-Time')
+    if standardize:
+        df = standardize_time_axis(df, datetimekey='Date-Time')
     return df