From 02aed6cac37a41174a59c326c1c4a1cca95a186e Mon Sep 17 00:00:00 2001 From: MomIsBestFriend <> Date: Sat, 22 Feb 2020 14:06:19 +0200 Subject: [PATCH] CLN: Some code cleanups --- pandas/_libs/indexing.pyx | 3 +- pandas/_libs/sparse.pyx | 13 +-- pandas/_libs/tslibs/conversion.pyx | 5 +- pandas/_libs/tslibs/resolution.pyx | 41 +++---- pandas/_libs/tslibs/timedeltas.pyx | 177 ++++++++++++++++------------- pandas/_libs/tslibs/timezones.pyx | 18 +-- pandas/_libs/writers.pyx | 26 +++-- 7 files changed, 154 insertions(+), 129 deletions(-) diff --git a/pandas/_libs/indexing.pyx b/pandas/_libs/indexing.pyx index cdccdb504571c..316943edee124 100644 --- a/pandas/_libs/indexing.pyx +++ b/pandas/_libs/indexing.pyx @@ -1,7 +1,6 @@ cdef class _NDFrameIndexerBase: """ - A base class for _NDFrameIndexer for fast instantiation and attribute - access. + A base class for _NDFrameIndexer for fast instantiation and attribute access. """ cdef public object obj, name, _ndim diff --git a/pandas/_libs/sparse.pyx b/pandas/_libs/sparse.pyx index 3a6dd506b2428..50f220af0f5bc 100644 --- a/pandas/_libs/sparse.pyx +++ b/pandas/_libs/sparse.pyx @@ -188,8 +188,7 @@ cdef class IntIndex(SparseIndex): return -1 @cython.wraparound(False) - cpdef ndarray[int32_t] lookup_array(self, ndarray[ - int32_t, ndim=1] indexer): + cpdef ndarray[int32_t] lookup_array(self, ndarray[int32_t, ndim=1] indexer): """ Vectorized lookup, returns ndarray[int32_t] """ @@ -424,12 +423,9 @@ cdef class BlockIndex(SparseIndex): """ Intersect two BlockIndex objects - Parameters - ---------- - Returns ------- - intersection : BlockIndex + BlockIndex """ cdef: BlockIndex y @@ -518,7 +514,7 @@ cdef class BlockIndex(SparseIndex): Returns ------- - union : BlockIndex + BlockIndex """ return BlockUnion(self, y.to_block_index()).result @@ -548,8 +544,7 @@ cdef class BlockIndex(SparseIndex): return -1 @cython.wraparound(False) - cpdef ndarray[int32_t] lookup_array(self, ndarray[ - int32_t, ndim=1] indexer): + cpdef ndarray[int32_t] lookup_array(self, ndarray[int32_t, ndim=1] indexer): """ Vectorized lookup, returns ndarray[int32_t] """ diff --git a/pandas/_libs/tslibs/conversion.pyx b/pandas/_libs/tslibs/conversion.pyx index 57b4100fbceb0..6e978d495c325 100644 --- a/pandas/_libs/tslibs/conversion.pyx +++ b/pandas/_libs/tslibs/conversion.pyx @@ -84,12 +84,11 @@ def ensure_datetime64ns(arr: ndarray, copy: bool=True): Parameters ---------- arr : ndarray - copy : boolean, default True + copy : bool, default True Returns ------- - result : ndarray with dtype datetime64[ns] - + ndarray with dtype datetime64[ns] """ cdef: Py_ssize_t i, n = arr.size diff --git a/pandas/_libs/tslibs/resolution.pyx b/pandas/_libs/tslibs/resolution.pyx index 1e0eb7f97ec54..ecf31c15bb72c 100644 --- a/pandas/_libs/tslibs/resolution.pyx +++ b/pandas/_libs/tslibs/resolution.pyx @@ -110,8 +110,8 @@ def get_freq_group(freq) -> int: """ Return frequency code group of given frequency str or offset. - Example - ------- + Examples + -------- >>> get_freq_group('W-MON') 4000 @@ -193,8 +193,8 @@ class Resolution: """ Return resolution str against resolution code. - Example - ------- + Examples + -------- >>> Resolution.get_str(Resolution.RESO_SEC) 'second' """ @@ -205,8 +205,8 @@ class Resolution: """ Return resolution str against resolution code. - Example - ------- + Examples + -------- >>> Resolution.get_reso('second') 2 @@ -220,8 +220,8 @@ class Resolution: """ Return frequency str against resolution str. - Example - ------- + Examples + -------- >>> f.Resolution.get_freq_group('day') 4000 """ @@ -232,8 +232,8 @@ class Resolution: """ Return frequency str against resolution str. - Example - ------- + Examples + -------- >>> f.Resolution.get_freq('day') 'D' """ @@ -244,8 +244,8 @@ class Resolution: """ Return resolution str against frequency str. - Example - ------- + Examples + -------- >>> Resolution.get_str_from_freq('H') 'hour' """ @@ -256,8 +256,8 @@ class Resolution: """ Return resolution code against frequency str. - Example - ------- + Examples + -------- >>> Resolution.get_reso_from_freq('H') 4 @@ -273,8 +273,8 @@ class Resolution: Parameters ---------- - value : integer or float - freq : string + value : int or float + freq : str Frequency string Raises @@ -282,8 +282,8 @@ class Resolution: ValueError If the float cannot be converted to an integer at any resolution. - Example - ------- + Examples + -------- >>> Resolution.get_stride_from_decimal(1.5, 'T') (90, 'S') @@ -298,8 +298,9 @@ class Resolution: else: start_reso = cls.get_reso_from_freq(freq) if start_reso == 0: - raise ValueError("Could not convert to integer offset " - "at any resolution") + raise ValueError( + "Could not convert to integer offset at any resolution" + ) next_value = cls._reso_mult_map[start_reso] * value next_name = cls._reso_str_bump_map[freq] diff --git a/pandas/_libs/tslibs/timedeltas.pyx b/pandas/_libs/tslibs/timedeltas.pyx index 3742506a7f8af..66660c5f641fd 100644 --- a/pandas/_libs/tslibs/timedeltas.pyx +++ b/pandas/_libs/tslibs/timedeltas.pyx @@ -37,51 +37,61 @@ from pandas._libs.tslibs.offsets import _Tick as Tick # Constants # components named tuple -Components = collections.namedtuple('Components', [ - 'days', 'hours', 'minutes', 'seconds', - 'milliseconds', 'microseconds', 'nanoseconds']) - - -cdef dict timedelta_abbrevs = { 'Y': 'Y', - 'y': 'Y', - 'M': 'M', - 'W': 'W', - 'w': 'W', - 'D': 'D', - 'd': 'D', - 'days': 'D', - 'day': 'D', - 'hours': 'h', - 'hour': 'h', - 'hr': 'h', - 'h': 'h', - 'm': 'm', - 'minute': 'm', - 'min': 'm', - 'minutes': 'm', - 't': 'm', - 's': 's', - 'seconds': 's', - 'sec': 's', - 'second': 's', - 'ms': 'ms', - 'milliseconds': 'ms', - 'millisecond': 'ms', - 'milli': 'ms', - 'millis': 'ms', - 'l': 'ms', - 'us': 'us', - 'microseconds': 'us', - 'microsecond': 'us', - 'micro': 'us', - 'micros': 'us', - 'u': 'us', - 'ns': 'ns', - 'nanoseconds': 'ns', - 'nano': 'ns', - 'nanos': 'ns', - 'nanosecond': 'ns', - 'n': 'ns'} +Components = collections.namedtuple( + "Components", + [ + "days", + "hours", + "minutes", + "seconds", + "milliseconds", + "microseconds", + "nanoseconds", + ], +) + +cdef dict timedelta_abbrevs = { + "Y": "Y", + "y": "Y", + "M": "M", + "W": "W", + "w": "W", + "D": "D", + "d": "D", + "days": "D", + "day": "D", + "hours": "h", + "hour": "h", + "hr": "h", + "h": "h", + "m": "m", + "minute": "m", + "min": "m", + "minutes": "m", + "t": "m", + "s": "s", + "seconds": "s", + "sec": "s", + "second": "s", + "ms": "ms", + "milliseconds": "ms", + "millisecond": "ms", + "milli": "ms", + "millis": "ms", + "l": "ms", + "us": "us", + "microseconds": "us", + "microsecond": "us", + "micro": "us", + "micros": "us", + "u": "us", + "ns": "ns", + "nanoseconds": "ns", + "nano": "ns", + "nanos": "ns", + "nanosecond": "ns", + "n": "ns", +} _no_input = object() @@ -137,9 +147,11 @@ cpdef int64_t delta_to_nanoseconds(delta) except? -1: if is_integer_object(delta): return delta if PyDelta_Check(delta): - return (delta.days * 24 * 60 * 60 * 1000000 + - delta.seconds * 1000000 + - delta.microseconds) * 1000 + return ( + delta.days * 24 * 60 * 60 * 1_000_000 + + delta.seconds * 1_000_000 + + delta.microseconds + ) * 1000 raise TypeError(type(delta)) @@ -212,9 +224,8 @@ def array_to_timedelta64(object[:] values, unit='ns', errors='raise'): Py_ssize_t i, n int64_t[:] iresult - if errors not in ('ignore', 'raise', 'coerce'): - raise ValueError("errors must be one of 'ignore', " - "'raise', or 'coerce'}") + if errors not in {'ignore', 'raise', 'coerce'}: + raise ValueError("errors must be one of {'ignore', 'raise', or 'coerce'}") n = values.shape[0] result = np.empty(n, dtype='m8[ns]') @@ -255,34 +266,34 @@ cpdef inline object precision_from_unit(object unit): int p if unit == 'Y': - m = 1000000000L * 31556952 + m = 1000000000 * 31556952 p = 9 elif unit == 'M': - m = 1000000000L * 2629746 + m = 1000000000 * 2629746 p = 9 elif unit == 'W': - m = 1000000000L * DAY_SECONDS * 7 + m = 1000000000 * DAY_SECONDS * 7 p = 9 elif unit == 'D' or unit == 'd': - m = 1000000000L * DAY_SECONDS + m = 1000000000 * DAY_SECONDS p = 9 elif unit == 'h': - m = 1000000000L * 3600 + m = 1000000000 * 3600 p = 9 elif unit == 'm': - m = 1000000000L * 60 + m = 1000000000 * 60 p = 9 elif unit == 's': - m = 1000000000L + m = 1000000000 p = 9 elif unit == 'ms': - m = 1000000L + m = 1000000 p = 6 elif unit == 'us': - m = 1000L + m = 1000 p = 3 elif unit == 'ns' or unit is None: - m = 1L + m = 1 p = 0 else: raise ValueError(f"cannot cast unit {unit}") @@ -383,13 +394,13 @@ cdef inline int64_t parse_timedelta_string(str ts) except? -1: if len(number): if current_unit is None: current_unit = 'h' - m = 1000000000L * 3600 + m = 1000000000 * 3600 elif current_unit == 'h': current_unit = 'm' - m = 1000000000L * 60 + m = 1000000000 * 60 elif current_unit == 'm': current_unit = 's' - m = 1000000000L + m = 1000000000 r = int(''.join(number)) * m result += timedelta_as_neg(r, neg) have_hhmmss = 1 @@ -408,7 +419,7 @@ cdef inline int64_t parse_timedelta_string(str ts) except? -1: # hh:mm:ss (so current_unit is 'm') if current_unit != 'm': raise ValueError("expected hh:mm:ss format before .") - m = 1000000000L + m = 1000000000 r = int(''.join(number)) * m result += timedelta_as_neg(r, neg) have_value = 1 @@ -437,9 +448,9 @@ cdef inline int64_t parse_timedelta_string(str ts) except? -1: raise ValueError("no units specified") if len(frac) > 0 and len(frac) <= 3: - m = 10**(3 -len(frac)) * 1000L * 1000L + m = 10**(3 -len(frac)) * 1000 * 1000 elif len(frac) > 3 and len(frac) <= 6: - m = 10**(6 -len(frac)) * 1000L + m = 10**(6 -len(frac)) * 1000 else: m = 10**(9 -len(frac)) @@ -451,7 +462,7 @@ cdef inline int64_t parse_timedelta_string(str ts) except? -1: elif current_unit is not None: if current_unit != 'm': raise ValueError("expected hh:mm:ss format") - m = 1000000000L + m = 1000000000 r = int(''.join(number)) * m result += timedelta_as_neg(r, neg) @@ -1018,6 +1029,7 @@ cdef class _Timedelta(timedelta): **Using string input** >>> td = pd.Timedelta('1 days 2 min 3 us 42 ns') + >>> td.nanoseconds 42 @@ -1095,7 +1107,7 @@ cdef class _Timedelta(timedelta): Returns ------- - formatted : str + str See Also -------- @@ -1115,6 +1127,7 @@ cdef class _Timedelta(timedelta): -------- >>> td = pd.Timedelta(days=6, minutes=50, seconds=3, ... milliseconds=10, microseconds=10, nanoseconds=12) + >>> td.isoformat() 'P6DT0H50M3.010010012S' >>> pd.Timedelta(hours=1, seconds=10).isoformat() @@ -1190,10 +1203,12 @@ class Timedelta(_Timedelta): value = nano + convert_to_timedelta64(timedelta(**kwargs), 'ns') except TypeError as e: - raise ValueError("cannot construct a Timedelta from the " - "passed arguments, allowed keywords are " - "[weeks, days, hours, minutes, seconds, " - "milliseconds, microseconds, nanoseconds]") + raise ValueError( + "cannot construct a Timedelta from the passed arguments, " + "allowed keywords are " + "[weeks, days, hours, minutes, seconds, " + "milliseconds, microseconds, nanoseconds]" + ) if unit in {'Y', 'y', 'M'}: raise ValueError( @@ -1230,8 +1245,9 @@ class Timedelta(_Timedelta): return NaT else: raise ValueError( - f"Value must be Timedelta, string, integer, " - f"float, timedelta or convertible, not {type(value).__name__}") + "Value must be Timedelta, string, integer, " + f"float, timedelta or convertible, not {type(value).__name__}" + ) if is_timedelta64_object(value): value = value.view('i8') @@ -1509,10 +1525,13 @@ cdef _rfloordiv(int64_t value, right): return right // value -cdef _broadcast_floordiv_td64(int64_t value, object other, - object (*operation)(int64_t value, - object right)): - """Boilerplate code shared by Timedelta.__floordiv__ and +cdef _broadcast_floordiv_td64( + int64_t value, + object other, + object (*operation)(int64_t value, object right) +): + """ + Boilerplate code shared by Timedelta.__floordiv__ and Timedelta.__rfloordiv__ because np.timedelta64 does not implement these. Parameters diff --git a/pandas/_libs/tslibs/timezones.pyx b/pandas/_libs/tslibs/timezones.pyx index 35ee87e714fa8..0ec3e2ad467e1 100644 --- a/pandas/_libs/tslibs/timezones.pyx +++ b/pandas/_libs/tslibs/timezones.pyx @@ -2,9 +2,11 @@ from datetime import timezone # dateutil compat from dateutil.tz import ( - tzutc as _dateutil_tzutc, + tzfile as _dateutil_tzfile, tzlocal as _dateutil_tzlocal, - tzfile as _dateutil_tzfile) + tzutc as _dateutil_tzutc, +) + from dateutil.tz import gettz as dateutil_gettz @@ -103,7 +105,9 @@ cpdef inline object maybe_get_tz(object tz): def _p_tz_cache_key(tz): - """ Python interface for cache function to facilitate testing.""" + """ + Python interface for cache function to facilitate testing. + """ return tz_cache_key(tz) @@ -120,7 +124,7 @@ cdef inline object tz_cache_key(object tz): dateutil timezones. Notes - ===== + ----- This cannot just be the hash of a timezone object. Unfortunately, the hashes of two dateutil tz objects which represent the same timezone are not equal (even though the tz objects will compare equal and represent @@ -196,7 +200,7 @@ cdef int64_t[:] unbox_utcoffsets(object transinfo): arr = np.empty(sz, dtype='i8') for i in range(sz): - arr[i] = int(transinfo[i][0].total_seconds()) * 1000000000 + arr[i] = int(transinfo[i][0].total_seconds()) * 1_000_000_000 return arr @@ -217,7 +221,7 @@ cdef object get_dst_info(object tz): if cache_key is None: # e.g. pytz.FixedOffset, matplotlib.dates._UTC, # psycopg2.tz.FixedOffsetTimezone - num = int(get_utcoffset(tz, None).total_seconds()) * 1000000000 + num = int(get_utcoffset(tz, None).total_seconds()) * 1_000_000_000 return (np.array([NPY_NAT + 1], dtype=np.int64), np.array([num], dtype=np.int64), None) @@ -313,7 +317,7 @@ cpdef bint tz_compare(object start, object end): Returns: ------- - compare : bint + bool """ # GH 18523 diff --git a/pandas/_libs/writers.pyx b/pandas/_libs/writers.pyx index 73201e75c3c88..9e95dea979577 100644 --- a/pandas/_libs/writers.pyx +++ b/pandas/_libs/writers.pyx @@ -15,8 +15,13 @@ ctypedef fused pandas_string: @cython.boundscheck(False) @cython.wraparound(False) -def write_csv_rows(list data, ndarray data_index, - Py_ssize_t nlevels, ndarray cols, object writer): +def write_csv_rows( + list data, + ndarray data_index, + Py_ssize_t nlevels, + ndarray cols, + object writer +): """ Write the given data to the writer object, pre-allocating where possible for performance improvements. @@ -114,7 +119,9 @@ def convert_json_to_lines(arr: object) -> str: @cython.boundscheck(False) @cython.wraparound(False) def max_len_string_array(pandas_string[:] arr) -> Py_ssize_t: - """ return the maximum size of elements in a 1-dim string array """ + """ + Return the maximum size of elements in a 1-dim string array. + """ cdef: Py_ssize_t i, m = 0, l = 0, length = arr.shape[0] pandas_string val @@ -130,7 +137,9 @@ def max_len_string_array(pandas_string[:] arr) -> Py_ssize_t: cpdef inline Py_ssize_t word_len(object val): - """ return the maximum length of a string or bytes value """ + """ + Return the maximum length of a string or bytes value. + """ cdef: Py_ssize_t l = 0 @@ -148,8 +157,10 @@ cpdef inline Py_ssize_t word_len(object val): @cython.boundscheck(False) @cython.wraparound(False) def string_array_replace_from_nan_rep( - ndarray[object, ndim=1] arr, object nan_rep, - object replace=None): + ndarray[object, ndim=1] arr, + object nan_rep, + object replace=np.nan +): """ Replace the values in the array with 'replacement' if they are 'nan_rep'. Return the same array. @@ -157,9 +168,6 @@ def string_array_replace_from_nan_rep( cdef: Py_ssize_t length = len(arr), i = 0 - if replace is None: - replace = np.nan - for i in range(length): if arr[i] == nan_rep: arr[i] = replace