Skip to content
Merged
Show file tree
Hide file tree
Changes from 5 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
4 changes: 4 additions & 0 deletions doc/whats-new.rst
Original file line number Diff line number Diff line change
Expand Up @@ -55,6 +55,10 @@ Enhancements
:py:meth:`~xarray.DataArray.interp`, and
:py:meth:`~xarray.Dataset.interp`.
By `Spencer Clark <https://github.com/spencerkclark>`_
- Added support for ``fill_value`` with
:py:meth:`~xarray.DataArray.shift` and :py:meth:`~xarray.Dataset.shift`
By `Maximilian Roos <https://github.com/max-sixty>`_


Bug fixes
~~~~~~~~~
Expand Down
8 changes: 5 additions & 3 deletions xarray/core/dataarray.py
Original file line number Diff line number Diff line change
Expand Up @@ -2080,7 +2080,7 @@ def diff(self, dim, n=1, label='upper'):
ds = self._to_temp_dataset().diff(n=n, dim=dim, label=label)
return self._from_temp_dataset(ds)

def shift(self, shifts=None, **shifts_kwargs):
def shift(self, shifts=None, fill_value=None, **shifts_kwargs):
"""Shift this array by an offset along one or more dimensions.

Only the data is moved; coordinates stay in place. Values shifted from
Expand All @@ -2093,6 +2093,8 @@ def shift(self, shifts=None, **shifts_kwargs):
Integer offset to shift along each of the given dimensions.
Positive offsets shift to the right; negative offsets shift to the
left.
fill_value: scalar, optional
Value to use for newly missing values
**shifts_kwargs:
The keyword arguments form of ``shifts``.
One of shifts or shifts_kwarg must be provided.
Expand All @@ -2117,8 +2119,8 @@ def shift(self, shifts=None, **shifts_kwargs):
Coordinates:
* x (x) int64 0 1 2
"""
ds = self._to_temp_dataset().shift(shifts=shifts, **shifts_kwargs)
return self._from_temp_dataset(ds)
return self._replace(variable=self.variable.shift(
shifts=shifts, fill_value=fill_value, **shifts_kwargs))
Copy link
Collaborator Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Is this a reasonable change for DataArray operations that only change their data?

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I think this is 100% equivalent -- I don't really care either way. But perhaps splitting this into two statements would be clearer.


def roll(self, shifts=None, roll_coords=None, **shifts_kwargs):
"""Roll this array by an offset along one or more dimensions.
Expand Down
7 changes: 5 additions & 2 deletions xarray/core/dataset.py
Original file line number Diff line number Diff line change
Expand Up @@ -3481,7 +3481,7 @@ def diff(self, dim, n=1, label='upper'):
else:
return difference

def shift(self, shifts=None, **shifts_kwargs):
def shift(self, shifts=None, fill_value=None, **shifts_kwargs):
"""Shift this dataset by an offset along one or more dimensions.

Only data variables are moved; coordinates stay in place. This is
Expand All @@ -3493,6 +3493,8 @@ def shift(self, shifts=None, **shifts_kwargs):
Integer offset to shift along each of the given dimensions.
Positive offsets shift to the right; negative offsets shift to the
left.
fill_value: scalar, optional
Value to use for newly missing values
**shifts_kwargs:
The keyword arguments form of ``shifts``.
One of shifts or shifts_kwarg must be provided.
Expand Down Expand Up @@ -3529,7 +3531,8 @@ def shift(self, shifts=None, **shifts_kwargs):
if name in self.data_vars:
var_shifts = dict((k, v) for k, v in shifts.items()
if k in var.dims)
variables[name] = var.shift(**var_shifts)
variables[name] = var.shift(
fill_value=fill_value, **var_shifts)
else:
variables[name] = var

Expand Down
20 changes: 13 additions & 7 deletions xarray/core/variable.py
Original file line number Diff line number Diff line change
Expand Up @@ -929,7 +929,7 @@ def squeeze(self, dim=None):
dims = common.get_squeeze_dims(self, dim)
return self.isel({d: 0 for d in dims})

def _shift_one_dim(self, dim, count):
def _shift_one_dim(self, dim, count, fill_value=None):
axis = self.get_axis_num(dim)

if count > 0:
Expand All @@ -940,7 +940,11 @@ def _shift_one_dim(self, dim, count):
keep = slice(None)

trimmed_data = self[(slice(None),) * axis + (keep,)].data
dtype, fill_value = dtypes.maybe_promote(self.dtype)

if fill_value is None or fill_value is np.nan:
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Use dtypes.NA as the default value for fill_value, and then copy these lines from pad to ensure that this works for arbitrary fill_values such as None:
https://github.com/pydata/xarray/blob/master/xarray/core/variable.py#L1012-L1015

Copy link
Collaborator Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I saw that, but it means that if someone passes np.nan (the value, not the dtypes.NA default) to a int dtype, I get -9223372036854775808. Should we raise in that case? Or I'll see what I can do to coerce.

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Hmm. I'm pretty sure this would qualify as a NumPy bug :(.

I'm mostly concerned with duplicate logic that works slightly differently, so if you want to try doing this differently I'm open to it. It might actually make sense to replace most of this method to a call to Variable.pad_with_fill_value().

Copy link
Collaborator Author

@max-sixty max-sixty Oct 8, 2018

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I'm mostly concerned with duplicate logic that works slightly differently

💯

Great, I'll have a look and report back

dtype, fill_value = dtypes.maybe_promote(self.dtype)
else:
dtype = self.dtype
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

What happens if filler is not compatible with self.dtype?
For example, feeding np.nan to an int array.
Probably it is a part of user responsibility and we do not need to take care of this, but I am just curious of it.

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

In theory, NumPy should raise an error... But it may not.

Copy link
Collaborator Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

(this is the issue I'm looking at ref #2470 (comment)), good foresight @fujiisoup !


shape = list(self.shape)
shape[axis] = min(abs(count), shape[axis])
Expand All @@ -952,12 +956,12 @@ def _shift_one_dim(self, dim, count):
else:
full = np.full

nans = full(shape, fill_value, dtype=dtype)
filler = full(shape, fill_value, dtype=dtype)

if count > 0:
arrays = [nans, trimmed_data]
arrays = [filler, trimmed_data]
else:
arrays = [trimmed_data, nans]
arrays = [trimmed_data, filler]

data = duck_array_ops.concatenate(arrays, axis)

Expand All @@ -969,7 +973,7 @@ def _shift_one_dim(self, dim, count):

return type(self)(self.dims, data, self._attrs, fastpath=True)

def shift(self, shifts=None, **shifts_kwargs):
def shift(self, shifts=None, fill_value=None, **shifts_kwargs):
"""
Return a new Variable with shifted data.

Expand All @@ -979,6 +983,8 @@ def shift(self, shifts=None, **shifts_kwargs):
Integer offset to shift along each of the given dimensions.
Positive offsets shift to the right; negative offsets shift to the
left.
fill_value: scalar, optional
Value to use for newly missing values
**shifts_kwargs:
The keyword arguments form of ``shifts``.
One of shifts or shifts_kwarg must be provided.
Expand All @@ -991,7 +997,7 @@ def shift(self, shifts=None, **shifts_kwargs):
shifts = either_dict_or_kwargs(shifts, shifts_kwargs, 'shift')
result = self
for dim, count in shifts.items():
result = result._shift_one_dim(dim, count)
result = result._shift_one_dim(dim, count, fill_value=fill_value)
return result

def pad_with_fill_value(self, pad_widths=None, fill_value=dtypes.NA,
Expand Down
10 changes: 6 additions & 4 deletions xarray/tests/test_dataarray.py
Original file line number Diff line number Diff line change
Expand Up @@ -3129,12 +3129,14 @@ def test_coordinate_diff(self):
actual = lon.diff('lon')
assert_equal(expected, actual)

@pytest.mark.parametrize('offset', [-5, -2, -1, 0, 1, 2, 5])
def test_shift(self, offset):
@pytest.mark.parametrize('offset', [-5, 0, 1, 2])
@pytest.mark.parametrize('fill_value, dtype', [(2, int), (np.nan, float)])
def test_shift(self, offset, fill_value, dtype):
arr = DataArray([1, 2, 3], dims='x')
actual = arr.shift(x=1)
expected = DataArray([np.nan, 1, 2], dims='x')
actual = arr.shift(x=1, fill_value=fill_value)
expected = DataArray([fill_value, 1, 2], dims='x')
assert_identical(expected, actual)
assert actual.dtype == dtype

arr = DataArray([1, 2, 3], [('x', ['a', 'b', 'c'])])
expected = DataArray(arr.to_pandas().shift(offset))
Expand Down
7 changes: 4 additions & 3 deletions xarray/tests/test_dataset.py
Original file line number Diff line number Diff line change
Expand Up @@ -3896,12 +3896,13 @@ def test_dataset_diff_exception_label_str(self):
with raises_regex(ValueError, '\'label\' argument has to'):
ds.diff('dim2', label='raise_me')

def test_shift(self):
@pytest.mark.parametrize('fill_value', [np.nan, 2, 2.0])
def test_shift(self, fill_value):
coords = {'bar': ('x', list('abc')), 'x': [-4, 3, 2]}
attrs = {'meta': 'data'}
ds = Dataset({'foo': ('x', [1, 2, 3])}, coords, attrs)
actual = ds.shift(x=1)
expected = Dataset({'foo': ('x', [np.nan, 1, 2])}, coords, attrs)
actual = ds.shift(x=1, fill_value=fill_value)
expected = Dataset({'foo': ('x', [fill_value, 1, 2])}, coords, attrs)
assert_identical(expected, actual)

with raises_regex(ValueError, 'dimensions'):
Expand Down
21 changes: 11 additions & 10 deletions xarray/tests/test_variable.py
Original file line number Diff line number Diff line change
Expand Up @@ -1165,33 +1165,34 @@ def test_indexing_0d_unicode(self):
expected = Variable((), u'tmax')
assert_identical(actual, expected)

def test_shift(self):
@pytest.mark.parametrize('fill_value', [np.nan, 2, 2.0])
def test_shift(self, fill_value):
v = Variable('x', [1, 2, 3, 4, 5])

assert_identical(v, v.shift(x=0))
assert v is not v.shift(x=0)

expected = Variable('x', [np.nan, 1, 2, 3, 4])
assert_identical(expected, v.shift(x=1))
expected = Variable('x', [fill_value, 1, 2, 3, 4])
assert_identical(expected, v.shift(x=1, fill_value=fill_value))

expected = Variable('x', [np.nan, np.nan, 1, 2, 3])
assert_identical(expected, v.shift(x=2))

expected = Variable('x', [2, 3, 4, 5, np.nan])
assert_identical(expected, v.shift(x=-1))
expected = Variable('x', [2, 3, 4, 5, fill_value])
assert_identical(expected, v.shift(x=-1, fill_value=fill_value))

expected = Variable('x', [np.nan] * 5)
assert_identical(expected, v.shift(x=5))
assert_identical(expected, v.shift(x=6))
expected = Variable('x', [fill_value] * 5)
assert_identical(expected, v.shift(x=5, fill_value=fill_value))
assert_identical(expected, v.shift(x=6, fill_value=fill_value))

with raises_regex(ValueError, 'dimension'):
v.shift(z=0)

v = Variable('x', [1, 2, 3, 4, 5], {'foo': 'bar'})
assert_identical(v, v.shift(x=0))

expected = Variable('x', [np.nan, 1, 2, 3, 4], {'foo': 'bar'})
assert_identical(expected, v.shift(x=1))
expected = Variable('x', [fill_value, 1, 2, 3, 4], {'foo': 'bar'})
assert_identical(expected, v.shift(x=1, fill_value=fill_value))

def test_shift2d(self):
v = Variable(('x', 'y'), [[1, 2], [3, 4]])
Expand Down