Skip to content

Fix time-series scripts use time bounds properly #48

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 2 commits into from
Dec 12, 2016
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
19 changes: 11 additions & 8 deletions mpas_analysis/ocean/ohc_timeseries.py
Original file line number Diff line number Diff line change
Expand Up @@ -11,12 +11,14 @@

from ..shared.io import NameList, StreamsFile

from ..shared.timekeeping.Date import Date

def ohc_timeseries(config):
"""
Performs analysis of ocean heat content (OHC) from time-series output.

Author: Xylar Asay-Davis, Milena Veneziani
Last Modified: 11/25/2016
Last Modified: 11/28/2016
"""

# read parameters from config file
Expand Down Expand Up @@ -50,8 +52,6 @@ def ohc_timeseries(config):
plots_dir = config.get('paths','plots_dir')

yr_offset = config.getint('time','yr_offset')
timeseries_yr1 = yr_offset + config.getint('time', 'timeseries_yr1')
timeseries_yr2 = yr_offset + config.getint('time', 'timeseries_yr2')

N_movavg = config.getint('ohc_timeseries','N_movavg')

Expand Down Expand Up @@ -90,14 +90,17 @@ def ohc_timeseries(config):

ds = remove_repeated_time_index(ds)

# convert the start and end dates to datetime objects using
# the Date class, which ensures the results are within the
# supported range
time_start = Date(startDate).to_datetime(yr_offset)
time_end = Date(endDate).to_datetime(yr_offset)
# select only the data in the specified range of years
# time_start = datetime.datetime(timeseries_yr1, 1, 1)
# time_end = datetime.datetime(timeseries_yr2, 12, 31)
# ds = ds.sel(Time=slice(time_start, time_end))
ds = ds.sel(Time=slice(time_start, time_end))

# Select year-1 data and average it (for later computing anomalies)
time_start = datetime.datetime(timeseries_yr1, 1, 1)
time_end = datetime.datetime(timeseries_yr1, 12, 31)
time_start = datetime.datetime(time_start.year, 1, 1)
time_end = datetime.datetime(time_start.year, 12, 31)
ds_yr1 = ds.sel(Time=slice(time_start,time_end))
mean_yr1 = ds_yr1.mean('Time')

Expand Down
12 changes: 11 additions & 1 deletion mpas_analysis/ocean/sst_timeseries.py
Original file line number Diff line number Diff line change
Expand Up @@ -9,13 +9,15 @@

from ..shared.io import StreamsFile

from ..shared.timekeeping.Date import Date

def sst_timeseries(config):
"""
Performs analysis of the time-series output of sea-surface temperature
(SST).

Author: Xylar Asay-Davis, Milena Veneziani
Last Modified: 10/27/2016
Last Modified: 11/28/2016
"""
# Define/read in general variables
print " Load SST data..."
Expand Down Expand Up @@ -55,6 +57,14 @@ def sst_timeseries(config):
onlyvars=['time_avg_avgValueWithinOceanRegion_avgSurfaceTemperature']))
ds = remove_repeated_time_index(ds)

# convert the start and end dates to datetime objects using
# the Date class, which ensures the results are within the
# supported range
time_start = Date(startDate).to_datetime(yr_offset)
time_end = Date(endDate).to_datetime(yr_offset)
# select only the data in the specified range of years
ds = ds.sel(Time=slice(time_start, time_end))

SSTregions = ds.time_avg_avgValueWithinOceanRegion_avgSurfaceTemperature

year_start = (pd.to_datetime(ds.Time.min().values)).year
Expand Down
12 changes: 11 additions & 1 deletion mpas_analysis/sea_ice/timeseries.py
Original file line number Diff line number Diff line change
Expand Up @@ -11,12 +11,14 @@
from ..shared.io import StreamsFile
from ..shared.io.utility import paths

from ..shared.timekeeping.Date import Date

def seaice_timeseries(config):
"""
Performs analysis of time series of sea-ice properties.

Author: Xylar Asay-Davis, Milena Veneziani
Last Modified: 10/27/2016
Last Modified: 11/28/2016
"""

# read parameters from config file
Expand Down Expand Up @@ -79,6 +81,14 @@ def seaice_timeseries(config):
'timeSeriesStatsMonthly_avg_iceVolumeCell_1']))
ds = remove_repeated_time_index(ds)

# convert the start and end dates to datetime objects using
# the Date class, which ensures the results are within the
# supported range
time_start = Date(startDate).to_datetime(yr_offset)
time_end = Date(endDate).to_datetime(yr_offset)
# select only the data in the specified range of years
ds = ds.sel(Time=slice(time_start, time_end))

ds = ds.merge(dsmesh)

year_start = (pd.to_datetime(ds.Time.min().values)).year
Expand Down
61 changes: 52 additions & 9 deletions mpas_analysis/shared/timekeeping/Date.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,6 +7,7 @@

import functools
import numpy
import datetime

@functools.total_ordering
class Date(object):
Expand Down Expand Up @@ -75,6 +76,50 @@ def __init__(self, dateString=None, isInterval=False, totalSeconds=None,
self.seconds = numpy.int64(seconds)
self._setTotalSeconds()

def to_datetime(self, yearOffset=0):
"""
Converts the date object to a datetime object.
The yearOffset is added to this date's year, and
the resulting date is clamped to the range supported by
numpy's datetime64[ns], used internally by xarray an
pandas

Last modified: 11/28/2016
Author: Xylar Asay-Davis
"""
if self.isInterval:
raise ValueError("self.isInterval == True. Use to_timedelta "
"instead of to_datetime")

year = numpy.maximum(datetime.MINYEAR,
numpy.minimum(datetime.MAXYEAR,
self.years+yearOffset))
outDate = datetime.datetime(year=year, month=self.months+1,
day=self.days+1, hour=self.hours,
minute=self.minutes, second=self.seconds)

minDate = datetime.datetime(year=1678, month=1, day=1,
hour=0, minute=0, second=0)
maxDate = datetime.datetime(year=2262, month=1, day=1,
hour=0, minute=0, second=0)
outDate = max(minDate, min(maxDate, outDate))
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I'm thinking we should issue a warning if we are on the clipping boundary so the unawares user will know there may be a potential problem.

Copy link
Collaborator Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I disagree that this should be a warning. Typically want the end year of the time series analysis to be 9999 so we don't have to think about it. This is the function that makes sure that 9999 + 1849 still gives you a reasonable year that the analysis can handle. If you didn't want the clamping and the year offset, you wouldn't bother calling this function.

If I add a warning, it's going to be annoying that there are warnings throughout our output even though I'm intentionally using this function to clamp the date. The whole point of clamping the date here is so I don't have to do it elsewhere and so I can have better code reuse.

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Hm, I see what you mean. We really aren't at a point where our simulations will go past 2262 anyway so I'm ok leaving this as-is for now. However, I'm not sure if we can necessarily plan on this for the future (in general) because I know Jeremy runs multi-century simulations. But I agree this is probably sufficient for now.

Copy link
Collaborator Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

@pwolfram, we don't support going past 100 years right now (because of limited numbers of files) so I think 2262 is the least of our concerns...

But also fixing Date with a new min/max allowed date will be super easy once xarray and pandas support a wider range. For now, we're at their mercy and this function is really only a side effect of that.

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Agreed. Thanks for the reminder about the file limitation issue. I need to take a closer look at that once I get a chance.

return outDate

def to_timedelta(self):
"""
Converts the date object to a timedelta object

Last modified: 11/28/2016
Author: Xylar Asay-Davis
"""
if not self.isInterval:
raise ValueError("self.isInterval == False. Use to_datetime "
"instead of to_timedelta")

days = 365*self.years + self._monthsToDays(self.months) + self.days
return datetime.timedelta(days=self.days, hours=self.hours,
minutes=self.minutes, seconds=self.seconds)

def __lt__(self, other):
if self.isInterval != other.isInterval:
raise ValueError('Comparing interval with non-interval Date '
Expand Down Expand Up @@ -130,24 +175,22 @@ def __sub__(self, other):
return Date(isInterval=isInterval, years=years, months=months,
days=days, hours=hours, minutes=minutes, seconds=seconds)


def __str__(self):
if self.isInterval:
offset = 0
else:
offset = 1
return '{:04d}-{:02d}-{:02d} {:02d}:{:02d}:{:02d}' \
.format(self.years, self.months+offset, self.days+offset,
self.hours, self.minutes, self.seconds)
return '{:04d}-{:02d}-{:02d} {:02d}:{:02d}:{:02d}'.format(
self.years, self.months+offset, self.days+offset,
self.hours, self.minutes, self.seconds)

def _diffSeconds(self, other):
return

def _setTotalSeconds(self):
days = self.years*365 + self._monthsToDays(self.months) + self.days
self.totalSeconds = (((days*24 + self.hours)*60 + self.minutes)*60
+ self.seconds)

self.totalSeconds = (((days*24 + self.hours)*60 + self.minutes)*60 +
self.seconds)

def _monthsToDays(self, months):
daysInMonth = [31, 28, 31, 30, 31, 30, 31, 31, 30, 31, 30, 31]
Expand Down Expand Up @@ -182,7 +225,7 @@ def _secondsToDate(self, seconds):
def _parseDate(self, dateString):
"""
parses a dateString in one of the following formats into
a datetime object:
a Date object:
YYYY-MM-DD_hh:mm:ss
YYYY-MM-DD_hh.mm.ss
YYYY-MM-DD_SSSSS
Expand Down Expand Up @@ -213,7 +256,7 @@ def _parseDate(self, dateString):
hms = dateString

if '.' in hms:
hms = hms.replace('.',':')
hms = hms.replace('.', ':')

if '-' in ymd:
(self.years, self.months, self.days) \
Expand Down
33 changes: 33 additions & 0 deletions mpas_analysis/test/test_date.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,7 @@
"""

import pytest
import datetime
from mpas_analysis.test import TestCase, loaddatadir
from mpas_analysis.shared.timekeeping.Date import Date

Expand Down Expand Up @@ -118,5 +119,37 @@ def test_date(self):
diff = date1-date2
self.assertEqual(diff, Date(dateString='1995-12-26', isInterval=False))

date = Date(dateString='1996-01-15', isInterval=False)
datetime1 = date.to_datetime(yearOffset=0)
datetime2 = datetime.datetime(year=1996, month=1, day=15)
self.assertEqual(datetime1, datetime2)

date = Date(dateString='0000-00-20', isInterval=True)
timedelta1 = date.to_timedelta()
timedelta2 = datetime.timedelta(days=20)
self.assertEqual(timedelta1, timedelta2)

# since pandas and xarray use the numpy type 'datetime[ns]`, which
# has a limited range of dates, the date 0001-01-01 gets increased to
# the minimum allowed year boundary, 1678-01-01 to avoid invalid
# dates.
date = Date(dateString='0001-01-01', isInterval=False)
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I would put a comment above this line reminding the reader that the date is specified based on the quasi-arbitrary xarray datetime boundary.

datetime1 = date.to_datetime(yearOffset=0)
datetime2 = datetime.datetime(year=1678, month=1, day=1)
self.assertEqual(datetime1, datetime2)

date = Date(dateString='0001-01-01', isInterval=False)
datetime1 = date.to_datetime(yearOffset=1849)
datetime2 = datetime.datetime(year=1850, month=1, day=1)
self.assertEqual(datetime1, datetime2)

# since pandas and xarray use the numpy type 'datetime[ns]`, which
# has a limited range of dates, the date 9999-01-01 gets decreased to
# the maximum allowed year boundary, 2262-01-01 to avoid invalid
# dates.
date = Date(dateString='9999-01-01', isInterval=False)
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This tests the clipping capability, right? I think we should note this here and if there is a warning thrown we should make sure it is what we expect to get.

datetime1 = date.to_datetime(yearOffset=0)
datetime2 = datetime.datetime(year=2262, month=1, day=1)
self.assertEqual(datetime1, datetime2)

# vim: foldmethod=marker ai ts=4 sts=4 et sw=4 ft=python