diff --git a/pandas/core/arrays/datetimelike.py b/pandas/core/arrays/datetimelike.py index 65f34b847f8d0..00964fce49358 100644 --- a/pandas/core/arrays/datetimelike.py +++ b/pandas/core/arrays/datetimelike.py @@ -403,7 +403,7 @@ def _addsub_int_array(self, other, op): td = Timedelta(self.freq) return op(self, td * other) - # We should only get here with DatetimeIndex; dispatch + # We should only get here with Datetime Array/Index; dispatch # to _addsub_offset_array assert not is_timedelta64_dtype(self) return op(self, np.array(other) * self.freq) diff --git a/pandas/core/arrays/datetimes.py b/pandas/core/arrays/datetimes.py index 05bc3d23cfb8e..402c635a5a046 100644 --- a/pandas/core/arrays/datetimes.py +++ b/pandas/core/arrays/datetimes.py @@ -1,5 +1,5 @@ # -*- coding: utf-8 -*- -from datetime import timedelta +from datetime import datetime, timedelta import warnings import numpy as np @@ -22,6 +22,8 @@ _ensure_int64) from pandas.core.dtypes.dtypes import DatetimeTZDtype +from pandas.core.algorithms import checked_add_with_arr + from pandas.tseries.frequencies import to_offset, DateOffset from pandas.tseries.offsets import Tick @@ -250,8 +252,41 @@ def _assert_tzawareness_compat(self, other): # ----------------------------------------------------------------- # Arithmetic Methods + def _sub_datelike(self, other): + # subtract a datetime from myself, yielding a ndarray[timedelta64[ns]] + if isinstance(other, (DatetimeArrayMixin, np.ndarray)): + if isinstance(other, np.ndarray): + # if other is an ndarray, we assume it is datetime64-dtype + other = type(self)(other) + # require tz compat + if not self._has_same_tz(other): + raise TypeError("{cls} subtraction must have the same " + "timezones or no timezones" + .format(cls=type(self).__name__)) + result = self._sub_datelike_dti(other) + elif isinstance(other, (datetime, np.datetime64)): + assert other is not NaT + other = Timestamp(other) + if other is NaT: + return self - NaT + elif not self._has_same_tz(other): + # require tz compat + raise TypeError("Timestamp subtraction must have the same " + "timezones or no timezones") + else: + i8 = self.asi8 + result = checked_add_with_arr(i8, -other.value, + arr_mask=self._isnan) + result = self._maybe_mask_results(result, + fill_value=iNaT) + else: + raise TypeError("cannot subtract {cls} and {typ}" + .format(cls=type(self).__name__, + typ=type(other).__name__)) + return result.view('timedelta64[ns]') + def _sub_datelike_dti(self, other): - """subtraction of two DatetimeIndexes""" + """subtraction of two Datetime Arrays/Indexes""" if not len(self) == len(other): raise ValueError("cannot add indices of unequal length") @@ -517,6 +552,48 @@ def to_pydatetime(self): """ return tslib.ints_to_pydatetime(self.asi8, tz=self.tz) + def normalize(self): + """ + Convert times to midnight. + + The time component of the date-time is converted to midnight i.e. + 00:00:00. This is useful in cases, when the time does not matter. + Length is unaltered. The timezones are unaffected. + + This method is available on Series with datetime values under + the ``.dt`` accessor, and directly on DatetimeIndex. + + Returns + ------- + DatetimeArray, DatetimeIndex or Series + The same type as the original data. Series will have the same + name and index. DatetimeIndex will have the same name. + + See Also + -------- + floor : Floor the datetimes to the specified freq. + ceil : Ceil the datetimes to the specified freq. + round : Round the datetimes to the specified freq. + + Examples + -------- + >>> idx = pd.DatetimeIndex(start='2014-08-01 10:00', freq='H', + ... periods=3, tz='Asia/Calcutta') + >>> idx + DatetimeIndex(['2014-08-01 10:00:00+05:30', + '2014-08-01 11:00:00+05:30', + '2014-08-01 12:00:00+05:30'], + dtype='datetime64[ns, Asia/Calcutta]', freq='H') + >>> idx.normalize() + DatetimeIndex(['2014-08-01 00:00:00+05:30', + '2014-08-01 00:00:00+05:30', + '2014-08-01 00:00:00+05:30'], + dtype='datetime64[ns, Asia/Calcutta]', freq=None) + """ + new_values = conversion.normalize_i8_timestamps(self.asi8, self.tz) + return type(self)(new_values, + freq='infer').tz_localize(self.tz) + # ----------------------------------------------------------------- # Properties - Vectorized Timestamp Properties/Methods diff --git a/pandas/core/arrays/period.py b/pandas/core/arrays/period.py index 35baa3262d3dd..2bab49913d2db 100644 --- a/pandas/core/arrays/period.py +++ b/pandas/core/arrays/period.py @@ -5,20 +5,25 @@ import numpy as np from pandas._libs import lib -from pandas._libs.tslib import NaT, iNaT from pandas._libs.tslibs.period import ( Period, IncompatibleFrequency, DIFFERENT_FREQ_INDEX, get_period_field_arr, period_asfreq_arr) -from pandas._libs.tslibs import period as libperiod -from pandas._libs.tslibs.timedeltas import delta_to_nanoseconds +from pandas._libs.tslibs import ( + NaT, iNaT, + delta_to_nanoseconds, + period as libperiod) from pandas._libs.tslibs.fields import isleapyear_arr from pandas import compat +from pandas.compat import zip from pandas.util._decorators import cache_readonly from pandas.core.dtypes.common import ( is_integer_dtype, is_float_dtype, is_period_dtype) from pandas.core.dtypes.dtypes import PeriodDtype +from pandas.core.dtypes.generic import ABCSeries + +import pandas.core.common as com from pandas.tseries import frequencies from pandas.tseries.offsets import Tick, DateOffset @@ -157,6 +162,25 @@ def _from_ordinals(cls, values, freq=None): result._freq = Period._maybe_convert_freq(freq) return result + @classmethod + def _generate_range(cls, start, end, periods, freq, fields): + if freq is not None: + freq = Period._maybe_convert_freq(freq) + + field_count = len(fields) + if com._count_not_none(start, end) > 0: + if field_count > 0: + raise ValueError('Can either instantiate from fields ' + 'or endpoints, but not both') + subarr, freq = _get_ordinal_range(start, end, periods, freq) + elif field_count > 0: + subarr, freq = _range_from_fields(freq=freq, **fields) + else: + raise ValueError('Not enough parameters to construct ' + 'Period range') + + return subarr, freq + # -------------------------------------------------------------------- # Vectorized analogues of Period properties @@ -371,3 +395,102 @@ def _add_comparison_methods(cls): PeriodArrayMixin._add_comparison_methods() + + +# ----------------------------------------------------------------- +# Constructor Helpers + +def _get_ordinal_range(start, end, periods, freq, mult=1): + if com._count_not_none(start, end, periods) != 2: + raise ValueError('Of the three parameters: start, end, and periods, ' + 'exactly two must be specified') + + if freq is not None: + _, mult = frequencies.get_freq_code(freq) + + if start is not None: + start = Period(start, freq) + if end is not None: + end = Period(end, freq) + + is_start_per = isinstance(start, Period) + is_end_per = isinstance(end, Period) + + if is_start_per and is_end_per and start.freq != end.freq: + raise ValueError('start and end must have same freq') + if (start is NaT or end is NaT): + raise ValueError('start and end must not be NaT') + + if freq is None: + if is_start_per: + freq = start.freq + elif is_end_per: + freq = end.freq + else: # pragma: no cover + raise ValueError('Could not infer freq from start/end') + + if periods is not None: + periods = periods * mult + if start is None: + data = np.arange(end.ordinal - periods + mult, + end.ordinal + 1, mult, + dtype=np.int64) + else: + data = np.arange(start.ordinal, start.ordinal + periods, mult, + dtype=np.int64) + else: + data = np.arange(start.ordinal, end.ordinal + 1, mult, dtype=np.int64) + + return data, freq + + +def _range_from_fields(year=None, month=None, quarter=None, day=None, + hour=None, minute=None, second=None, freq=None): + if hour is None: + hour = 0 + if minute is None: + minute = 0 + if second is None: + second = 0 + if day is None: + day = 1 + + ordinals = [] + + if quarter is not None: + if freq is None: + freq = 'Q' + base = frequencies.FreqGroup.FR_QTR + else: + base, mult = frequencies.get_freq_code(freq) + if base != frequencies.FreqGroup.FR_QTR: + raise AssertionError("base must equal FR_QTR") + + year, quarter = _make_field_arrays(year, quarter) + for y, q in zip(year, quarter): + y, m = libperiod._quarter_to_myear(y, q, freq) + val = libperiod.period_ordinal(y, m, 1, 1, 1, 1, 0, 0, base) + ordinals.append(val) + else: + base, mult = frequencies.get_freq_code(freq) + arrays = _make_field_arrays(year, month, day, hour, minute, second) + for y, mth, d, h, mn, s in zip(*arrays): + ordinals.append(libperiod.period_ordinal( + y, mth, d, h, mn, s, 0, 0, base)) + + return np.array(ordinals, dtype=np.int64), freq + + +def _make_field_arrays(*fields): + length = None + for x in fields: + if isinstance(x, (list, np.ndarray, ABCSeries)): + if length is not None and len(x) != length: + raise ValueError('Mismatched Period array lengths') + elif length is None: + length = len(x) + + arrays = [np.asarray(x) if isinstance(x, (np.ndarray, list, ABCSeries)) + else np.repeat(x, length) for x in fields] + + return arrays diff --git a/pandas/core/arrays/timedelta.py b/pandas/core/arrays/timedelta.py index f093cadec5a38..58830253526aa 100644 --- a/pandas/core/arrays/timedelta.py +++ b/pandas/core/arrays/timedelta.py @@ -4,7 +4,7 @@ import numpy as np from pandas._libs import tslibs -from pandas._libs.tslibs import Timedelta, NaT +from pandas._libs.tslibs import Timestamp, Timedelta, NaT, iNaT from pandas._libs.tslibs.fields import get_timedelta_field from pandas._libs.tslibs.timedeltas import array_to_timedelta64 @@ -15,6 +15,8 @@ from pandas.core.dtypes.generic import ABCSeries from pandas.core.dtypes.missing import isna +from pandas.core.algorithms import checked_add_with_arr + from pandas.tseries.offsets import Tick, DateOffset from pandas.tseries.frequencies import to_offset @@ -92,6 +94,24 @@ def _add_offset(self, other): .format(typ=type(other).__name__, cls=type(self).__name__)) + def _add_datelike(self, other): + # adding a timedeltaindex to a datetimelike + from .datetimes import DatetimeArrayMixin + if isinstance(other, (DatetimeArrayMixin, np.ndarray)): + # if other is an ndarray, we assume it is datetime64-dtype + # defer to implementation in DatetimeIndex + if isinstance(other, np.ndarray): + other = DatetimeArrayMixin(other) + return other + self + else: + assert other is not NaT + other = Timestamp(other) + i8 = self.asi8 + result = checked_add_with_arr(i8, other.value, + arr_mask=self._isnan) + result = self._maybe_mask_results(result, fill_value=iNaT) + return DatetimeArrayMixin(result) + def _sub_datelike(self, other): assert other is not NaT raise TypeError("cannot subtract a datelike from a {cls}" @@ -198,3 +218,33 @@ def to_pytimedelta(self): nanoseconds = _field_accessor("nanoseconds", "nanoseconds", "\nNumber of nanoseconds (>= 0 and less " "than 1 microsecond) for each\nelement.\n") + + @property + def components(self): + """ + Return a dataframe of the components (days, hours, minutes, + seconds, milliseconds, microseconds, nanoseconds) of the Timedeltas. + + Returns + ------- + a DataFrame + """ + from pandas import DataFrame + + columns = ['days', 'hours', 'minutes', 'seconds', + 'milliseconds', 'microseconds', 'nanoseconds'] + hasnans = self.hasnans + if hasnans: + def f(x): + if isna(x): + return [np.nan] * len(columns) + return x.components + else: + def f(x): + return x.components + + result = DataFrame([f(x) for x in self]) + result.columns = columns + if not hasnans: + result = result.astype('int64') + return result diff --git a/pandas/core/indexes/datetimes.py b/pandas/core/indexes/datetimes.py index 96c30eeb92628..62a1a3ae59ebc 100644 --- a/pandas/core/indexes/datetimes.py +++ b/pandas/core/indexes/datetimes.py @@ -32,7 +32,6 @@ from pandas.core.dtypes.missing import isna import pandas.core.dtypes.concat as _concat -from pandas.core.algorithms import checked_add_with_arr from pandas.core.arrays.datetimes import DatetimeArrayMixin from pandas.core.indexes.base import Index, _index_shared_docs @@ -782,38 +781,6 @@ def __setstate__(self, state): raise Exception("invalid pickle state") _unpickle_compat = __setstate__ - def _sub_datelike(self, other): - # subtract a datetime from myself, yielding a ndarray[timedelta64[ns]] - if isinstance(other, (DatetimeIndex, np.ndarray)): - # if other is an ndarray, we assume it is datetime64-dtype - other = DatetimeIndex(other) - # require tz compat - if not self._has_same_tz(other): - raise TypeError("{cls} subtraction must have the same " - "timezones or no timezones" - .format(cls=type(self).__name__)) - result = self._sub_datelike_dti(other) - elif isinstance(other, (datetime, np.datetime64)): - assert other is not tslibs.NaT - other = Timestamp(other) - if other is tslibs.NaT: - return self - tslibs.NaT - # require tz compat - elif not self._has_same_tz(other): - raise TypeError("Timestamp subtraction must have the same " - "timezones or no timezones") - else: - i8 = self.asi8 - result = checked_add_with_arr(i8, -other.value, - arr_mask=self._isnan) - result = self._maybe_mask_results(result, - fill_value=tslibs.iNaT) - else: - raise TypeError("cannot subtract {cls} and {typ}" - .format(cls=type(self).__name__, - typ=type(other).__name__)) - return result.view('timedelta64[ns]') - def _maybe_update_attributes(self, attrs): """ Update Index attributes (e.g. freq) depending on op """ freq = attrs.get('freq', None) @@ -1581,48 +1548,11 @@ def slice_indexer(self, start=None, end=None, step=None, kind=None): is_year_end = _wrap_field_accessor('is_year_end') is_leap_year = _wrap_field_accessor('is_leap_year') + @Appender(DatetimeArrayMixin.normalize.__doc__) def normalize(self): - """ - Convert times to midnight. - - The time component of the date-time is converted to midnight i.e. - 00:00:00. This is useful in cases, when the time does not matter. - Length is unaltered. The timezones are unaffected. - - This method is available on Series with datetime values under - the ``.dt`` accessor, and directly on DatetimeIndex. - - Returns - ------- - DatetimeIndex or Series - The same type as the original data. Series will have the same - name and index. DatetimeIndex will have the same name. - - See Also - -------- - floor : Floor the datetimes to the specified freq. - ceil : Ceil the datetimes to the specified freq. - round : Round the datetimes to the specified freq. - - Examples - -------- - >>> idx = pd.DatetimeIndex(start='2014-08-01 10:00', freq='H', - ... periods=3, tz='Asia/Calcutta') - >>> idx - DatetimeIndex(['2014-08-01 10:00:00+05:30', - '2014-08-01 11:00:00+05:30', - '2014-08-01 12:00:00+05:30'], - dtype='datetime64[ns, Asia/Calcutta]', freq='H') - >>> idx.normalize() - DatetimeIndex(['2014-08-01 00:00:00+05:30', - '2014-08-01 00:00:00+05:30', - '2014-08-01 00:00:00+05:30'], - dtype='datetime64[ns, Asia/Calcutta]', freq=None) - """ - new_values = conversion.normalize_i8_timestamps(self.asi8, self.tz) - return DatetimeIndex(new_values, - freq='infer', - name=self.name).tz_localize(self.tz) + res = DatetimeArrayMixin.normalize(self) + res.name = self.name + return res @Substitution(klass='DatetimeIndex') @Appender(_shared_docs['searchsorted']) diff --git a/pandas/core/indexes/period.py b/pandas/core/indexes/period.py index a531a57eb031f..a8e0c7f1aaa6a 100644 --- a/pandas/core/indexes/period.py +++ b/pandas/core/indexes/period.py @@ -16,7 +16,6 @@ is_bool_dtype, pandas_dtype, _ensure_object) -from pandas.core.dtypes.generic import ABCSeries import pandas.tseries.frequencies as frequencies from pandas.tseries.frequencies import get_freq_code as _gfc @@ -29,7 +28,7 @@ from pandas._libs import tslib, index as libindex from pandas._libs.tslibs.period import (Period, IncompatibleFrequency, DIFFERENT_FREQ_INDEX, - _validate_end_alias, _quarter_to_myear) + _validate_end_alias) from pandas._libs.tslibs import resolution, period from pandas.core.arrays.period import PeriodArrayMixin @@ -39,7 +38,6 @@ from pandas import compat from pandas.util._decorators import (Appender, Substitution, cache_readonly, deprecate_kwarg) -from pandas.compat import zip import pandas.core.indexes.base as ibase _index_doc_kwargs = dict(ibase._index_doc_kwargs) @@ -266,25 +264,6 @@ def __new__(cls, data=None, ordinal=None, freq=None, start=None, end=None, def _engine(self): return self._engine_type(lambda: self, len(self)) - @classmethod - def _generate_range(cls, start, end, periods, freq, fields): - if freq is not None: - freq = Period._maybe_convert_freq(freq) - - field_count = len(fields) - if com._count_not_none(start, end) > 0: - if field_count > 0: - raise ValueError('Can either instantiate from fields ' - 'or endpoints, but not both') - subarr, freq = _get_ordinal_range(start, end, periods, freq) - elif field_count > 0: - subarr, freq = _range_from_fields(freq=freq, **fields) - else: - raise ValueError('Not enough parameters to construct ' - 'Period range') - - return subarr, freq - @classmethod def _simple_new(cls, values, name=None, freq=None, **kwargs): """ @@ -877,102 +856,6 @@ def tz_localize(self, tz, ambiguous='raise'): PeriodIndex._add_datetimelike_methods() -def _get_ordinal_range(start, end, periods, freq, mult=1): - if com._count_not_none(start, end, periods) != 2: - raise ValueError('Of the three parameters: start, end, and periods, ' - 'exactly two must be specified') - - if freq is not None: - _, mult = _gfc(freq) - - if start is not None: - start = Period(start, freq) - if end is not None: - end = Period(end, freq) - - is_start_per = isinstance(start, Period) - is_end_per = isinstance(end, Period) - - if is_start_per and is_end_per and start.freq != end.freq: - raise ValueError('start and end must have same freq') - if (start is tslib.NaT or end is tslib.NaT): - raise ValueError('start and end must not be NaT') - - if freq is None: - if is_start_per: - freq = start.freq - elif is_end_per: - freq = end.freq - else: # pragma: no cover - raise ValueError('Could not infer freq from start/end') - - if periods is not None: - periods = periods * mult - if start is None: - data = np.arange(end.ordinal - periods + mult, - end.ordinal + 1, mult, - dtype=np.int64) - else: - data = np.arange(start.ordinal, start.ordinal + periods, mult, - dtype=np.int64) - else: - data = np.arange(start.ordinal, end.ordinal + 1, mult, dtype=np.int64) - - return data, freq - - -def _range_from_fields(year=None, month=None, quarter=None, day=None, - hour=None, minute=None, second=None, freq=None): - if hour is None: - hour = 0 - if minute is None: - minute = 0 - if second is None: - second = 0 - if day is None: - day = 1 - - ordinals = [] - - if quarter is not None: - if freq is None: - freq = 'Q' - base = frequencies.FreqGroup.FR_QTR - else: - base, mult = _gfc(freq) - if base != frequencies.FreqGroup.FR_QTR: - raise AssertionError("base must equal FR_QTR") - - year, quarter = _make_field_arrays(year, quarter) - for y, q in zip(year, quarter): - y, m = _quarter_to_myear(y, q, freq) - val = period.period_ordinal(y, m, 1, 1, 1, 1, 0, 0, base) - ordinals.append(val) - else: - base, mult = _gfc(freq) - arrays = _make_field_arrays(year, month, day, hour, minute, second) - for y, mth, d, h, mn, s in zip(*arrays): - ordinals.append(period.period_ordinal( - y, mth, d, h, mn, s, 0, 0, base)) - - return np.array(ordinals, dtype=np.int64), freq - - -def _make_field_arrays(*fields): - length = None - for x in fields: - if isinstance(x, (list, np.ndarray, ABCSeries)): - if length is not None and len(x) != length: - raise ValueError('Mismatched Period array lengths') - elif length is None: - length = len(x) - - arrays = [np.asarray(x) if isinstance(x, (np.ndarray, list, ABCSeries)) - else np.repeat(x, length) for x in fields] - - return arrays - - def pnow(freq=None): # deprecation, xref #13790 warnings.warn("pd.pnow() and pandas.core.indexes.period.pnow() " diff --git a/pandas/core/indexes/timedeltas.py b/pandas/core/indexes/timedeltas.py index 3af825455caac..2708151d2c2bb 100644 --- a/pandas/core/indexes/timedeltas.py +++ b/pandas/core/indexes/timedeltas.py @@ -23,7 +23,6 @@ import pandas.compat as compat from pandas.tseries.frequencies import to_offset -from pandas.core.algorithms import checked_add_with_arr from pandas.core.base import _shared_docs from pandas.core.indexes.base import _index_shared_docs import pandas.core.common as com @@ -35,7 +34,7 @@ to_timedelta, _coerce_scalar_to_timedelta_type) from pandas.tseries.offsets import Tick, DateOffset from pandas._libs import (lib, index as libindex, - join as libjoin, Timedelta, NaT, iNaT) + join as libjoin, Timedelta, NaT) def _wrap_field_accessor(name): @@ -344,23 +343,6 @@ def _evaluate_with_timedelta_like(self, other, op): return NotImplemented return Index(result, name=self.name, copy=False) - def _add_datelike(self, other): - # adding a timedeltaindex to a datetimelike - from pandas import Timestamp, DatetimeIndex - if isinstance(other, (DatetimeIndex, np.ndarray)): - # if other is an ndarray, we assume it is datetime64-dtype - # defer to implementation in DatetimeIndex - other = DatetimeIndex(other) - return other + self - else: - assert other is not NaT - other = Timestamp(other) - i8 = self.asi8 - result = checked_add_with_arr(i8, other.value, - arr_mask=self._isnan) - result = self._maybe_mask_results(result, fill_value=iNaT) - return DatetimeIndex(result) - def _addsub_offset_array(self, other, op): # Add or subtract Array-like of DateOffset objects try: @@ -383,36 +365,6 @@ def _format_native_types(self, na_rep=u'NaT', date_format=None, **kwargs): microseconds = _wrap_field_accessor("microseconds") nanoseconds = _wrap_field_accessor("nanoseconds") - @property - def components(self): - """ - Return a dataframe of the components (days, hours, minutes, - seconds, milliseconds, microseconds, nanoseconds) of the Timedeltas. - - Returns - ------- - a DataFrame - """ - from pandas import DataFrame - - columns = ['days', 'hours', 'minutes', 'seconds', - 'milliseconds', 'microseconds', 'nanoseconds'] - hasnans = self.hasnans - if hasnans: - def f(x): - if isna(x): - return [np.nan] * len(columns) - return x.components - else: - def f(x): - return x.components - - result = DataFrame([f(x) for x in self]) - result.columns = columns - if not hasnans: - result = result.astype('int64') - return result - def total_seconds(self): """ Return total duration of each element expressed in seconds.