Skip to content

Commit ad28184

Browse files
committed
Update date type handling in mpas_xarray
This merge changes the way that mpas_xarray converts MPAS data arrays of various types to dates used for xarray data sets. At least one (and potentially 2) MPAS array names are passed to mpas_preprocess. mpas_xarray will convert these arrays to dates, using the data type to determine the type of data (date string, or days since simulation start) to be converted. The 5 analysis scripts have been updated to use the new syntax. timeseries_y1 and timeseries_yr2 have been added to the OHC script but general values (other than 1 and 9999, respectively) are not yet supported.
1 parent a09ad3a commit ad28184

File tree

6 files changed

+116
-94
lines changed

6 files changed

+116
-94
lines changed

mpas_analysis/ocean/ocean_modelvsobs.py

Lines changed: 9 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -63,10 +63,10 @@ def ocn_modelvsobs(config, field):
6363
if field.lower() == 'mld':
6464
obs_filename = "%s/holtetalley_mld_climatology.nc" % obsdir
6565

66-
ds = xr.open_mfdataset(infiles, preprocess=lambda x: preprocess_mpas(x, yearoffset=yr_offset,
67-
timeSeriesStats=True,
68-
timestr='time_avg_daysSinceStartOfSim',
69-
onlyvars=['time_avg_dThreshMLD']))
66+
ds = xr.open_mfdataset(infiles, preprocess=lambda x:
67+
preprocess_mpas(x, yearoffset=yr_offset,
68+
timestr=['xtime_start', 'xtime_end'],
69+
onlyvars=['time_avg_dThreshMLD']))
7070
ds = remove_repeated_time_index(ds)
7171
ds.rename({'time_avg_dThreshMLD':'mpasData'}, inplace = True)
7272

@@ -93,11 +93,11 @@ def ocn_modelvsobs(config, field):
9393

9494
elif field.lower() == 'sst':
9595

96-
ds = xr.open_mfdataset(infiles, preprocess=lambda x: preprocess_mpas(x, yearoffset=yr_offset,
97-
timeSeriesStats=True,
98-
timestr='time_avg_daysSinceStartOfSim',
99-
onlyvars=['time_avg_activeTracers_temperature'],
100-
selvals={'nVertLevels':1}))
96+
ds = xr.open_mfdataset(infiles, preprocess=lambda x:
97+
preprocess_mpas(x, yearoffset=yr_offset,
98+
timestr=['xtime_start', 'xtime_end'],
99+
onlyvars=['time_avg_activeTracers_temperature'],
100+
selvals={'nVertLevels':1}))
101101
ds = remove_repeated_time_index(ds)
102102
ds.rename({'time_avg_activeTracers_temperature':'mpasData'}, inplace = True)
103103

mpas_analysis/ocean/ohc_timeseries.py

Lines changed: 22 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -16,7 +16,7 @@ def ohc_timeseries(config):
1616
Performs analysis of ocean heat content (OHC) from time-series output.
1717
1818
Author: Xylar Asay-Davis, Milena Veneziani
19-
Last Modified: 10/26/2016
19+
Last Modified: 11/25/2016
2020
"""
2121

2222
# read parameters from config file
@@ -50,6 +50,8 @@ def ohc_timeseries(config):
5050
plots_dir = config.get('paths','plots_dir')
5151

5252
yr_offset = config.getint('time','yr_offset')
53+
timeseries_yr1 = yr_offset + config.getint('time', 'timeseries_yr1')
54+
timeseries_yr2 = yr_offset + config.getint('time', 'timeseries_yr2')
5355

5456
N_movavg = config.getint('ohc_timeseries','N_movavg')
5557

@@ -75,20 +77,27 @@ def ohc_timeseries(config):
7577

7678
# Load data
7779
print " Load ocean data..."
78-
ds = xr.open_mfdataset(infiles, preprocess=lambda x: preprocess_mpas(x, yearoffset=yr_offset,
79-
timeSeriesStats=True,
80-
timestr='time_avg_daysSinceStartOfSim',
81-
onlyvars=['time_avg_avgValueWithinOceanLayerRegion_avgLayerTemperature',
82-
'time_avg_avgValueWithinOceanLayerRegion_sumLayerMaskValue',
83-
'time_avg_avgValueWithinOceanLayerRegion_avgLayerArea',
84-
'time_avg_avgValueWithinOceanLayerRegion_avgLayerThickness']))
85-
80+
varList = ['time_avg_avgValueWithinOceanLayerRegion_avgLayerTemperature',
81+
'time_avg_avgValueWithinOceanLayerRegion_sumLayerMaskValue',
82+
'time_avg_avgValueWithinOceanLayerRegion_avgLayerArea',
83+
'time_avg_avgValueWithinOceanLayerRegion_avgLayerThickness']
84+
ds = xr.open_mfdataset(infiles,
85+
preprocess=lambda x:
86+
preprocess_mpas(x,
87+
yearoffset=yr_offset,
88+
timestr=['xtime_start','xtime_end'],
89+
onlyvars=varList))
8690

8791
ds = remove_repeated_time_index(ds)
8892

93+
# select only the data in the specified range of years
94+
# time_start = datetime.datetime(timeseries_yr1, 1, 1)
95+
# time_end = datetime.datetime(timeseries_yr2, 12, 31)
96+
# ds = ds.sel(Time=slice(time_start, time_end))
97+
8998
# Select year-1 data and average it (for later computing anomalies)
90-
time_start = datetime.datetime(yr_offset+1,1,1)
91-
time_end = datetime.datetime(yr_offset+1,12,31)
99+
time_start = datetime.datetime(timeseries_yr1, 1, 1)
100+
time_end = datetime.datetime(timeseries_yr1, 12, 31)
92101
ds_yr1 = ds.sel(Time=slice(time_start,time_end))
93102
mean_yr1 = ds_yr1.mean('Time')
94103

@@ -107,7 +116,8 @@ def ohc_timeseries(config):
107116
if ref_casename_v0 != "None":
108117
print " Load in OHC for ACMEv0 case..."
109118
infiles_v0data = "".join([indir_v0data,'/OHC.',ref_casename_v0,'.year*.nc'])
110-
ds_v0 = xr.open_mfdataset(infiles_v0data,preprocess=lambda x: preprocess_mpas(x, yearoffset=yr_offset))
119+
ds_v0 = xr.open_mfdataset(infiles_v0data, preprocess=lambda x:
120+
preprocess_mpas(x, yearoffset=yr_offset))
111121
ds_v0 = remove_repeated_time_index(ds_v0)
112122
ds_v0_tslice = ds_v0.sel(Time=slice(time_start,time_end))
113123

mpas_analysis/ocean/sst_timeseries.py

Lines changed: 6 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -49,9 +49,10 @@ def sst_timeseries(config):
4949
iregions = config.getlist('sst_timeseries','regionIndicesToPlot',listType=int)
5050

5151
# Load data:
52-
ds = xr.open_mfdataset(infiles, preprocess=lambda x: preprocess_mpas(x, yearoffset=yr_offset,
53-
timeSeriesStats=True, timestr='time_avg_daysSinceStartOfSim',
54-
onlyvars=['time_avg_avgValueWithinOceanRegion_avgSurfaceTemperature']))
52+
ds = xr.open_mfdataset(infiles, preprocess=lambda x:
53+
preprocess_mpas(x, yearoffset=yr_offset,
54+
timestr=['xtime_start', 'xtime_end'],
55+
onlyvars=['time_avg_avgValueWithinOceanRegion_avgSurfaceTemperature']))
5556
ds = remove_repeated_time_index(ds)
5657

5758
SSTregions = ds.time_avg_avgValueWithinOceanRegion_avgSurfaceTemperature
@@ -65,7 +66,8 @@ def sst_timeseries(config):
6566
if ref_casename_v0 != "None":
6667
print " Load in SST for ACMEv0 case..."
6768
infiles_v0data = "".join([indir_v0data,'/SST.',ref_casename_v0,'.year*.nc'])
68-
ds_v0 = xr.open_mfdataset(infiles_v0data,preprocess=lambda x: preprocess_mpas(x, yearoffset=yr_offset))
69+
ds_v0 = xr.open_mfdataset(infiles_v0data,preprocess=lambda x:
70+
preprocess_mpas(x, yearoffset=yr_offset))
6971
ds_v0 = remove_repeated_time_index(ds_v0)
7072
ds_v0_tslice = ds_v0.sel(Time=slice(time_start,time_end))
7173

mpas_analysis/sea_ice/modelvsobs.py

Lines changed: 5 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -112,11 +112,11 @@ def seaice_modelvsobs(config):
112112

113113
# Load data
114114
print " Load sea-ice data..."
115-
ds = xr.open_mfdataset(infiles, preprocess=lambda x: preprocess_mpas(x, yearoffset=yr_offset,
116-
timeSeriesStats=True,
117-
timestr='timeSeriesStatsMonthly_avg_daysSinceStartOfSim_1',
118-
onlyvars=['timeSeriesStatsMonthly_avg_iceAreaCell_1',
119-
'timeSeriesStatsMonthly_avg_iceVolumeCell_1']))
115+
ds = xr.open_mfdataset(infiles, preprocess=lambda x:
116+
preprocess_mpas(x, yearoffset=yr_offset,
117+
timestr='timeSeriesStatsMonthly_avg_daysSinceStartOfSim_1',
118+
onlyvars=['timeSeriesStatsMonthly_avg_iceAreaCell_1',
119+
'timeSeriesStatsMonthly_avg_iceVolumeCell_1']))
120120
ds = remove_repeated_time_index(ds)
121121

122122
# Compute climatologies (first motnhly and then seasonally)

mpas_analysis/sea_ice/timeseries.py

Lines changed: 17 additions & 17 deletions
Original file line numberDiff line numberDiff line change
@@ -72,11 +72,11 @@ def seaice_timeseries(config):
7272
dsmesh = xr.open_dataset(meshfile)
7373

7474
# Load data
75-
ds = xr.open_mfdataset(infiles, preprocess=lambda x: preprocess_mpas(x, yearoffset=yr_offset,
76-
timeSeriesStats=True,
77-
timestr='timeSeriesStatsMonthly_avg_daysSinceStartOfSim_1',
78-
onlyvars=['timeSeriesStatsMonthly_avg_iceAreaCell_1',
79-
'timeSeriesStatsMonthly_avg_iceVolumeCell_1']))
75+
ds = xr.open_mfdataset(infiles, preprocess=lambda x:
76+
preprocess_mpas(x, yearoffset=yr_offset,
77+
timestr='timeSeriesStatsMonthly_avg_daysSinceStartOfSim_1',
78+
onlyvars=['timeSeriesStatsMonthly_avg_iceAreaCell_1',
79+
'timeSeriesStatsMonthly_avg_iceVolumeCell_1']))
8080
ds = remove_repeated_time_index(ds)
8181

8282
ds = ds.merge(dsmesh)
@@ -88,8 +88,8 @@ def seaice_timeseries(config):
8888

8989
if ref_casename_v0 != "None":
9090
infiles_v0data = "".join([indir_v0data,'/icevol.',ref_casename_v0,'.year*.nc'])
91-
ds_v0 = xr.open_mfdataset(infiles_v0data,
92-
preprocess=lambda x: preprocess_mpas(x, yearoffset=yr_offset))
91+
ds_v0 = xr.open_mfdataset(infiles_v0data, preprocess=lambda x:
92+
preprocess_mpas(x, yearoffset=yr_offset))
9393
ds_v0_tslice = ds_v0.sel(Time=slice(time_start,time_end))
9494

9595
# Make Northern and Southern Hemisphere partition:
@@ -165,31 +165,31 @@ def seaice_timeseries(config):
165165
if varname == "iceAreaCell":
166166

167167
if compare_with_obs:
168-
ds_obs = xr.open_mfdataset(obs_filenameNH,
169-
preprocess=lambda x: preprocess_mpas(x, yearoffset=yr_offset))
168+
ds_obs = xr.open_mfdataset(obs_filenameNH, preprocess=lambda x:
169+
preprocess_mpas(x, yearoffset=yr_offset))
170170
ds_obs = remove_repeated_time_index(ds_obs)
171171
var_nh_obs = ds_obs.IceArea
172172
var_nh_obs = replicate_cycle(var_nh,var_nh_obs)
173173

174-
ds_obs = xr.open_mfdataset(obs_filenameSH,
175-
preprocess=lambda x: preprocess_mpas(x, yearoffset=yr_offset))
174+
ds_obs = xr.open_mfdataset(obs_filenameSH, preprocess=lambda x:
175+
preprocess_mpas(x, yearoffset=yr_offset))
176176
ds_obs = remove_repeated_time_index(ds_obs)
177177
var_sh_obs = ds_obs.IceArea
178178
var_sh_obs = replicate_cycle(var_sh,var_sh_obs)
179179

180180
if ref_casename_v0 != "None":
181181
infiles_v0data = "".join([indir_v0data,'/icearea.',ref_casename_v0,'.year*.nc'])
182-
ds_v0 = xr.open_mfdataset(infiles_v0data,
183-
preprocess=lambda x: preprocess_mpas(x, yearoffset=yr_offset))
182+
ds_v0 = xr.open_mfdataset(infiles_v0data, preprocess=lambda x:
183+
preprocess_mpas(x, yearoffset=yr_offset))
184184
ds_v0_tslice = ds_v0.sel(Time=slice(time_start,time_end))
185185
var_nh_v0 = ds_v0_tslice.icearea_nh
186186
var_sh_v0 = ds_v0_tslice.icearea_sh
187187

188188
elif varname == "iceVolumeCell":
189189

190190
if compare_with_obs:
191-
ds_obs = xr.open_mfdataset(obs_filenameNH,
192-
preprocess=lambda x: preprocess_mpas(x, yearoffset=yr_offset))
191+
ds_obs = xr.open_mfdataset(obs_filenameNH, preprocess=lambda x:
192+
preprocess_mpas(x, yearoffset=yr_offset))
193193
ds_obs = remove_repeated_time_index(ds_obs)
194194
var_nh_obs = ds_obs.IceVol
195195
var_nh_obs = replicate_cycle(var_nh,var_nh_obs)
@@ -198,8 +198,8 @@ def seaice_timeseries(config):
198198

199199
if ref_casename_v0 != "None":
200200
infiles_v0data = "".join([indir_v0data,'/icevol.',ref_casename_v0,'.year*.nc'])
201-
ds_v0 = xr.open_mfdataset(infiles_v0data,
202-
preprocess=lambda x: preprocess_mpas(x, yearoffset=yr_offset))
201+
ds_v0 = xr.open_mfdataset(infiles_v0data, preprocess=lambda x:
202+
preprocess_mpas(x, yearoffset=yr_offset))
203203
ds_v0_tslice = ds_v0.sel(Time=slice(time_start,time_end))
204204
var_nh_v0 = ds_v0_tslice.icevolume_nh
205205
var_sh_v0 = ds_v0_tslice.icevolume_sh

mpas_analysis/shared/mpas_xarray/mpas_xarray.py

Lines changed: 57 additions & 47 deletions
Original file line numberDiff line numberDiff line change
@@ -5,12 +5,18 @@
55
Wrapper to handle importing MPAS files into xarray.
66
77
Module:
8-
1. converts MPAS "xtime" to xarray time. Time dimension is assigned via
9-
`preprocess_mpas(..., timeSeriestats=False, ...)`.
10-
2. converts MPAS "timeSinceStartOfSim" to xarray time for MPAS fields coming
11-
from the timeSeriesStatsAM. Time dimension is assigned via
12-
`preprocess_mpas(..., timeSeriesStats=True, ...)`.
13-
3. provides capability to remove redundant time entries from reading of
8+
1. converts MPAS time in various formats to xarray time. The MPAS time
9+
variable is provided via
10+
`preprocess_mpas(..., timestr='xtime', ...)`.
11+
`timestr` can either be a single variable name or a pair of variable
12+
names. In the latter case, each time variable is converted to an
13+
xarray time and the mean of the two times is used as the final xarray
14+
time. Each variable name in `timestr` can refer either to a float
15+
array containing the the number of days since the start of the
16+
simulation (e.g. `daysSinceStartOfSim`) or a string variable with the
17+
date and time (e.g. `xtime`) in the usual MPAS format:
18+
YYYY-MM-DD_hh:mm:ss
19+
2. provides capability to remove redundant time entries from reading of
1420
multiple netCDF datasets via `remove_repeated_time_index`.
1521
1622
Example Usage:
@@ -21,7 +27,7 @@
2127
>>> ds = remove_repeated_time_index(ds)
2228
2329
Phillip J. Wolfram, Xylar Asay-Davis
24-
Last modified: 11/25/2016
30+
Last modified: 12/01/2016
2531
"""
2632

2733
import datetime
@@ -110,26 +116,44 @@ def ensure_list(alist): # {{{
110116
return alist # }}}
111117

112118

113-
def time_series_stat_time(timestr, daysSinceStart): # {{{
119+
def get_datetimes(time_var, yearoffset): # {{{
114120
"""
115-
Modifies daysSinceStart for uniformity based on between differences
116-
between MPAS-O and MPAS-Seaice.
121+
time_var is an xarray DataArray that contains time information as a date
122+
string, a floating-point number of days or a number of days represented
123+
as a pandas timedelta (in ns). The result is a list of datetimes
124+
corresponding to the input dates offset as appropriate by the yearoffset.
117125
118-
Phillip J. Wolfram
119-
09/09/2016
126+
Xylar Asay-Davis
127+
Last modified: 11/25/2016
120128
"""
121129

122-
if (timestr == 'timeSeriesStatsMonthly_avg_daysSinceStartOfSim_1'):
123-
return [datetime.timedelta(x) for x in daysSinceStart.values]
130+
if time_var.dtype == '|S64':
131+
# this is a variable like date strings like 'xtime'
132+
time = [''.join(atime).strip() for atime in time_var.values]
133+
datetimes = [datetime.datetime(yearoffset + int(x[:4]), int(x[5:7]),
134+
int(x[8:10]), int(x[11:13]),
135+
int(x[14:16]), int(x[17:19]))
136+
for x in time]
137+
elif time_var.dtype == 'float64':
138+
# this array contains floating-point days like 'daysSinceStartOfSim'
139+
start = datetime.datetime(year=yearoffset+1, month=1, day=1)
140+
datetimes = [start + datetime.timedelta(x)
141+
for x in time_var.values]
142+
elif time_var.dtype == 'timedelta64[ns]':
143+
# this array contains a variable like 'daysSinceStartOfSim' as a
144+
# timedelta64
145+
start = datetime.datetime(year=yearoffset+1, month=1, day=1)
146+
datetimes = [start + x for x in
147+
pd.to_timedelta(time_var.values, unit='ns')]
124148
else:
125-
return pd.to_timedelta(daysSinceStart.values, unit='ns')
149+
raise TypeError("time_var of unsupported type {}".format(
150+
time_var.dtype))
126151

127-
# }}}
152+
return datetimes # }}}
128153

129154

130155
def preprocess_mpas(ds, onlyvars=None, selvals=None, iselvals=None,
131-
timeSeriesStats=False, timestr=None,
132-
yearoffset=1849): # {{{
156+
timestr='xtime', yearoffset=1849): # {{{
133157
"""
134158
Builds correct time specification for MPAS, allowing a date offset because
135159
the time must be between 1678 and 2262 based on the xarray library.
@@ -141,50 +165,36 @@ def preprocess_mpas(ds, onlyvars=None, selvals=None, iselvals=None,
141165
conditions. Hence, a default date offset is chosen to be yearoffset=1849,
142166
(year 0001 of an 1850 run will correspond with Jan 1st, 1850).
143167
144-
Note, for use with the timeSeriesStats analysis member fields set
145-
timeSeriesStats=True and assign timestr.
146-
147-
The timestr variable designates the appropriate variable to be used as the
148-
unlimited dimension for xarray concatenation. For MPAS-O
149-
timestr='time_avg_daysSinceStartOfSim' and for MPAS-Seaice
150-
timestr='timeSeriesStatsMonthly_avg_daysSinceStartOfSim_1'.
168+
The data set is assumed to have an array of date strings with variable
169+
name given by timestr, typically either 'xtime', or 'xtime_start'.
151170
152171
The onlyvars option reduces the dataset to only include variables in the
153172
onlyvars list. If onlyvars=None, include all dataset variables.
154173
155174
iselvals and selvals provide index and value-based slicing operations for
156175
individual datasets prior to their merge via xarray.
157-
iselvals is a dictionary, e.g., iselvals = {'nVertLevels': slice(0,3),
158-
'nCells': cellIDs}
159-
selvals is a dictionary, e.g., selvals = {'cellLon': 180.0}
176+
iselvals is a dictionary, e.g. iselvals = {'nVertLevels': slice(0, 3),
177+
'nCells': cellIDs}
178+
selvals is a dictionary, e.g. selvals = {'cellLon': 180.0}
160179
161180
Phillip J. Wolfram, Milena Veneziani, Luke van Roekel and Xylar Asay-Davis
162-
11/25/2016
181+
Last modified: 11/25/2016
163182
"""
164183

165-
# ensure timestr is specified used when timeSeriesStats=True
166-
if timeSeriesStats:
167-
if timestr is None:
168-
assert False, 'A value for timestr is required, e.g., ' + \
169-
'for MPAS-O: time_avg_daysSinceStartOfSim, and ' + \
170-
'for MPAS-Seaice: ' + \
171-
'timeSeriesStatsMonthly_avg_daysSinceStartOfSim_1'
172-
173-
# compute shifted datetimes
174-
daysSinceStart = ds[timestr]
175-
datetimes = [datetime.datetime(yearoffset+1, 1, 1) + x
176-
for x in time_series_stat_time(timestr, daysSinceStart)]
184+
if isinstance(timestr, (tuple, list)):
185+
# we want to average the two
186+
assert(len(timestr) == 2)
187+
starts = get_datetimes(ds[timestr[0]], yearoffset)
188+
ends = get_datetimes(ds[timestr[1]], yearoffset)
189+
datetimes = [starts[i] + (ends[i] - starts[i])/2
190+
for i in range(len(starts))]
177191
else:
178-
time = np.array([''.join(atime).strip() for atime in ds.xtime.values])
179-
datetimes = [datetime.datetime(yearoffset + int(x[:4]), int(x[5:7]),
180-
int(x[8:10]), int(x[11:13]),
181-
int(x[14:16]), int(x[17:19]))
182-
for x in time]
192+
datetimes = get_datetimes(ds[timestr], yearoffset)
183193

184194
assert_valid_datetimes(datetimes, yearoffset)
185195

186196
# append the corret time information
187-
ds.coords['Time'] = pd.to_datetime(datetimes)
197+
ds.coords['Time'] = datetimes
188198
# record the yroffset
189199
ds.attrs.__setitem__('time_yearoffset', str(yearoffset))
190200

0 commit comments

Comments
 (0)