From 02aed6cac37a41174a59c326c1c4a1cca95a186e Mon Sep 17 00:00:00 2001
From: MomIsBestFriend <>
Date: Sat, 22 Feb 2020 14:06:19 +0200
Subject: [PATCH] CLN: Some code cleanups

---
 pandas/_libs/indexing.pyx          |   3 +-
 pandas/_libs/sparse.pyx            |  13 +--
 pandas/_libs/tslibs/conversion.pyx |   5 +-
 pandas/_libs/tslibs/resolution.pyx |  41 +++----
 pandas/_libs/tslibs/timedeltas.pyx | 177 ++++++++++++++++-------------
 pandas/_libs/tslibs/timezones.pyx  |  18 +--
 pandas/_libs/writers.pyx           |  26 +++--
 7 files changed, 154 insertions(+), 129 deletions(-)

diff --git a/pandas/_libs/indexing.pyx b/pandas/_libs/indexing.pyx
index cdccdb504571c..316943edee124 100644
--- a/pandas/_libs/indexing.pyx
+++ b/pandas/_libs/indexing.pyx
@@ -1,7 +1,6 @@
 cdef class _NDFrameIndexerBase:
     """
-    A base class for _NDFrameIndexer for fast instantiation and attribute
-    access.
+    A base class for _NDFrameIndexer for fast instantiation and attribute access.
     """
     cdef public object obj, name, _ndim
 
diff --git a/pandas/_libs/sparse.pyx b/pandas/_libs/sparse.pyx
index 3a6dd506b2428..50f220af0f5bc 100644
--- a/pandas/_libs/sparse.pyx
+++ b/pandas/_libs/sparse.pyx
@@ -188,8 +188,7 @@ cdef class IntIndex(SparseIndex):
             return -1
 
     @cython.wraparound(False)
-    cpdef ndarray[int32_t] lookup_array(self, ndarray[
-            int32_t, ndim=1] indexer):
+    cpdef ndarray[int32_t] lookup_array(self, ndarray[int32_t, ndim=1] indexer):
         """
         Vectorized lookup, returns ndarray[int32_t]
         """
@@ -424,12 +423,9 @@ cdef class BlockIndex(SparseIndex):
         """
         Intersect two BlockIndex objects
 
-        Parameters
-        ----------
-
         Returns
         -------
-        intersection : BlockIndex
+        BlockIndex
         """
         cdef:
             BlockIndex y
@@ -518,7 +514,7 @@ cdef class BlockIndex(SparseIndex):
 
         Returns
         -------
-        union : BlockIndex
+        BlockIndex
         """
         return BlockUnion(self, y.to_block_index()).result
 
@@ -548,8 +544,7 @@ cdef class BlockIndex(SparseIndex):
         return -1
 
     @cython.wraparound(False)
-    cpdef ndarray[int32_t] lookup_array(self, ndarray[
-            int32_t, ndim=1] indexer):
+    cpdef ndarray[int32_t] lookup_array(self, ndarray[int32_t, ndim=1] indexer):
         """
         Vectorized lookup, returns ndarray[int32_t]
         """
diff --git a/pandas/_libs/tslibs/conversion.pyx b/pandas/_libs/tslibs/conversion.pyx
index 57b4100fbceb0..6e978d495c325 100644
--- a/pandas/_libs/tslibs/conversion.pyx
+++ b/pandas/_libs/tslibs/conversion.pyx
@@ -84,12 +84,11 @@ def ensure_datetime64ns(arr: ndarray, copy: bool=True):
     Parameters
     ----------
     arr : ndarray
-    copy : boolean, default True
+    copy : bool, default True
 
     Returns
     -------
-    result : ndarray with dtype datetime64[ns]
-
+    ndarray with dtype datetime64[ns]
     """
     cdef:
         Py_ssize_t i, n = arr.size
diff --git a/pandas/_libs/tslibs/resolution.pyx b/pandas/_libs/tslibs/resolution.pyx
index 1e0eb7f97ec54..ecf31c15bb72c 100644
--- a/pandas/_libs/tslibs/resolution.pyx
+++ b/pandas/_libs/tslibs/resolution.pyx
@@ -110,8 +110,8 @@ def get_freq_group(freq) -> int:
     """
     Return frequency code group of given frequency str or offset.
 
-    Example
-    -------
+    Examples
+    --------
     >>> get_freq_group('W-MON')
     4000
 
@@ -193,8 +193,8 @@ class Resolution:
         """
         Return resolution str against resolution code.
 
-        Example
-        -------
+        Examples
+        --------
         >>> Resolution.get_str(Resolution.RESO_SEC)
         'second'
         """
@@ -205,8 +205,8 @@ class Resolution:
         """
         Return resolution str against resolution code.
 
-        Example
-        -------
+        Examples
+        --------
         >>> Resolution.get_reso('second')
         2
 
@@ -220,8 +220,8 @@ class Resolution:
         """
         Return frequency str against resolution str.
 
-        Example
-        -------
+        Examples
+        --------
         >>> f.Resolution.get_freq_group('day')
         4000
         """
@@ -232,8 +232,8 @@ class Resolution:
         """
         Return frequency str against resolution str.
 
-        Example
-        -------
+        Examples
+        --------
         >>> f.Resolution.get_freq('day')
         'D'
         """
@@ -244,8 +244,8 @@ class Resolution:
         """
         Return resolution str against frequency str.
 
-        Example
-        -------
+        Examples
+        --------
         >>> Resolution.get_str_from_freq('H')
         'hour'
         """
@@ -256,8 +256,8 @@ class Resolution:
         """
         Return resolution code against frequency str.
 
-        Example
-        -------
+        Examples
+        --------
         >>> Resolution.get_reso_from_freq('H')
         4
 
@@ -273,8 +273,8 @@ class Resolution:
 
         Parameters
         ----------
-        value : integer or float
-        freq : string
+        value : int or float
+        freq : str
             Frequency string
 
         Raises
@@ -282,8 +282,8 @@ class Resolution:
         ValueError
             If the float cannot be converted to an integer at any resolution.
 
-        Example
-        -------
+        Examples
+        --------
         >>> Resolution.get_stride_from_decimal(1.5, 'T')
         (90, 'S')
 
@@ -298,8 +298,9 @@ class Resolution:
         else:
             start_reso = cls.get_reso_from_freq(freq)
             if start_reso == 0:
-                raise ValueError("Could not convert to integer offset "
-                                 "at any resolution")
+                raise ValueError(
+                    "Could not convert to integer offset at any resolution"
+                )
 
             next_value = cls._reso_mult_map[start_reso] * value
             next_name = cls._reso_str_bump_map[freq]
diff --git a/pandas/_libs/tslibs/timedeltas.pyx b/pandas/_libs/tslibs/timedeltas.pyx
index 3742506a7f8af..66660c5f641fd 100644
--- a/pandas/_libs/tslibs/timedeltas.pyx
+++ b/pandas/_libs/tslibs/timedeltas.pyx
@@ -37,51 +37,61 @@ from pandas._libs.tslibs.offsets import _Tick as Tick
 # Constants
 
 # components named tuple
-Components = collections.namedtuple('Components', [
-    'days', 'hours', 'minutes', 'seconds',
-    'milliseconds', 'microseconds', 'nanoseconds'])
-
-
-cdef dict timedelta_abbrevs = { 'Y': 'Y',
-                                'y': 'Y',
-                                'M': 'M',
-                                'W': 'W',
-                                'w': 'W',
-                                'D': 'D',
-                                'd': 'D',
-                                'days': 'D',
-                                'day': 'D',
-                                'hours': 'h',
-                                'hour': 'h',
-                                'hr': 'h',
-                                'h': 'h',
-                                'm': 'm',
-                                'minute': 'm',
-                                'min': 'm',
-                                'minutes': 'm',
-                                't': 'm',
-                                's': 's',
-                                'seconds': 's',
-                                'sec': 's',
-                                'second': 's',
-                                'ms': 'ms',
-                                'milliseconds': 'ms',
-                                'millisecond': 'ms',
-                                'milli': 'ms',
-                                'millis': 'ms',
-                                'l': 'ms',
-                                'us': 'us',
-                                'microseconds': 'us',
-                                'microsecond': 'us',
-                                'micro': 'us',
-                                'micros': 'us',
-                                'u': 'us',
-                                'ns': 'ns',
-                                'nanoseconds': 'ns',
-                                'nano': 'ns',
-                                'nanos': 'ns',
-                                'nanosecond': 'ns',
-                                'n': 'ns'}
+Components = collections.namedtuple(
+    "Components",
+    [
+        "days",
+        "hours",
+        "minutes",
+        "seconds",
+        "milliseconds",
+        "microseconds",
+        "nanoseconds",
+    ],
+)
+
+cdef dict timedelta_abbrevs = {
+    "Y": "Y",
+    "y": "Y",
+    "M": "M",
+    "W": "W",
+    "w": "W",
+    "D": "D",
+    "d": "D",
+    "days": "D",
+    "day": "D",
+    "hours": "h",
+    "hour": "h",
+    "hr": "h",
+    "h": "h",
+    "m": "m",
+    "minute": "m",
+    "min": "m",
+    "minutes": "m",
+    "t": "m",
+    "s": "s",
+    "seconds": "s",
+    "sec": "s",
+    "second": "s",
+    "ms": "ms",
+    "milliseconds": "ms",
+    "millisecond": "ms",
+    "milli": "ms",
+    "millis": "ms",
+    "l": "ms",
+    "us": "us",
+    "microseconds": "us",
+    "microsecond": "us",
+    "micro": "us",
+    "micros": "us",
+    "u": "us",
+    "ns": "ns",
+    "nanoseconds": "ns",
+    "nano": "ns",
+    "nanos": "ns",
+    "nanosecond": "ns",
+    "n": "ns",
+}
 
 _no_input = object()
 
@@ -137,9 +147,11 @@ cpdef int64_t delta_to_nanoseconds(delta) except? -1:
     if is_integer_object(delta):
         return delta
     if PyDelta_Check(delta):
-        return (delta.days * 24 * 60 * 60 * 1000000 +
-                delta.seconds * 1000000 +
-                delta.microseconds) * 1000
+        return (
+            delta.days * 24 * 60 * 60 * 1_000_000
+            + delta.seconds * 1_000_000
+            + delta.microseconds
+        ) * 1000
 
     raise TypeError(type(delta))
 
@@ -212,9 +224,8 @@ def array_to_timedelta64(object[:] values, unit='ns', errors='raise'):
         Py_ssize_t i, n
         int64_t[:] iresult
 
-    if errors not in ('ignore', 'raise', 'coerce'):
-        raise ValueError("errors must be one of 'ignore', "
-                         "'raise', or 'coerce'}")
+    if errors not in {'ignore', 'raise', 'coerce'}:
+        raise ValueError("errors must be one of {'ignore', 'raise', or 'coerce'}")
 
     n = values.shape[0]
     result = np.empty(n, dtype='m8[ns]')
@@ -255,34 +266,34 @@ cpdef inline object precision_from_unit(object unit):
         int p
 
     if unit == 'Y':
-        m = 1000000000L * 31556952
+        m = 1000000000 * 31556952
         p = 9
     elif unit == 'M':
-        m = 1000000000L * 2629746
+        m = 1000000000 * 2629746
         p = 9
     elif unit == 'W':
-        m = 1000000000L * DAY_SECONDS * 7
+        m = 1000000000 * DAY_SECONDS * 7
         p = 9
     elif unit == 'D' or unit == 'd':
-        m = 1000000000L * DAY_SECONDS
+        m = 1000000000 * DAY_SECONDS
         p = 9
     elif unit == 'h':
-        m = 1000000000L * 3600
+        m = 1000000000 * 3600
         p = 9
     elif unit == 'm':
-        m = 1000000000L * 60
+        m = 1000000000 * 60
         p = 9
     elif unit == 's':
-        m = 1000000000L
+        m = 1000000000
         p = 9
     elif unit == 'ms':
-        m = 1000000L
+        m = 1000000
         p = 6
     elif unit == 'us':
-        m = 1000L
+        m = 1000
         p = 3
     elif unit == 'ns' or unit is None:
-        m = 1L
+        m = 1
         p = 0
     else:
         raise ValueError(f"cannot cast unit {unit}")
@@ -383,13 +394,13 @@ cdef inline int64_t parse_timedelta_string(str ts) except? -1:
             if len(number):
                 if current_unit is None:
                     current_unit = 'h'
-                    m = 1000000000L * 3600
+                    m = 1000000000 * 3600
                 elif current_unit == 'h':
                     current_unit = 'm'
-                    m = 1000000000L * 60
+                    m = 1000000000 * 60
                 elif current_unit == 'm':
                     current_unit = 's'
-                    m = 1000000000L
+                    m = 1000000000
                 r = <int64_t>int(''.join(number)) * m
                 result += timedelta_as_neg(r, neg)
                 have_hhmmss = 1
@@ -408,7 +419,7 @@ cdef inline int64_t parse_timedelta_string(str ts) except? -1:
                 # hh:mm:ss (so current_unit is 'm')
                 if current_unit != 'm':
                     raise ValueError("expected hh:mm:ss format before .")
-                m = 1000000000L
+                m = 1000000000
                 r = <int64_t>int(''.join(number)) * m
                 result += timedelta_as_neg(r, neg)
                 have_value = 1
@@ -437,9 +448,9 @@ cdef inline int64_t parse_timedelta_string(str ts) except? -1:
             raise ValueError("no units specified")
 
         if len(frac) > 0 and len(frac) <= 3:
-            m = 10**(3 -len(frac)) * 1000L * 1000L
+            m = 10**(3 -len(frac)) * 1000 * 1000
         elif len(frac) > 3 and len(frac) <= 6:
-            m = 10**(6 -len(frac)) * 1000L
+            m = 10**(6 -len(frac)) * 1000
         else:
             m = 10**(9 -len(frac))
 
@@ -451,7 +462,7 @@ cdef inline int64_t parse_timedelta_string(str ts) except? -1:
     elif current_unit is not None:
         if current_unit != 'm':
             raise ValueError("expected hh:mm:ss format")
-        m = 1000000000L
+        m = 1000000000
         r = <int64_t>int(''.join(number)) * m
         result += timedelta_as_neg(r, neg)
 
@@ -1018,6 +1029,7 @@ cdef class _Timedelta(timedelta):
         **Using string input**
 
         >>> td = pd.Timedelta('1 days 2 min 3 us 42 ns')
+
         >>> td.nanoseconds
         42
 
@@ -1095,7 +1107,7 @@ cdef class _Timedelta(timedelta):
 
         Returns
         -------
-        formatted : str
+        str
 
         See Also
         --------
@@ -1115,6 +1127,7 @@ cdef class _Timedelta(timedelta):
         --------
         >>> td = pd.Timedelta(days=6, minutes=50, seconds=3,
         ...                   milliseconds=10, microseconds=10, nanoseconds=12)
+
         >>> td.isoformat()
         'P6DT0H50M3.010010012S'
         >>> pd.Timedelta(hours=1, seconds=10).isoformat()
@@ -1190,10 +1203,12 @@ class Timedelta(_Timedelta):
                 value = nano + convert_to_timedelta64(timedelta(**kwargs),
                                                       'ns')
             except TypeError as e:
-                raise ValueError("cannot construct a Timedelta from the "
-                                 "passed arguments, allowed keywords are "
-                                 "[weeks, days, hours, minutes, seconds, "
-                                 "milliseconds, microseconds, nanoseconds]")
+                raise ValueError(
+                    "cannot construct a Timedelta from the passed arguments, "
+                    "allowed keywords are "
+                    "[weeks, days, hours, minutes, seconds, "
+                    "milliseconds, microseconds, nanoseconds]"
+                )
 
         if unit in {'Y', 'y', 'M'}:
             raise ValueError(
@@ -1230,8 +1245,9 @@ class Timedelta(_Timedelta):
             return NaT
         else:
             raise ValueError(
-                f"Value must be Timedelta, string, integer, "
-                f"float, timedelta or convertible, not {type(value).__name__}")
+                "Value must be Timedelta, string, integer, "
+                f"float, timedelta or convertible, not {type(value).__name__}"
+            )
 
         if is_timedelta64_object(value):
             value = value.view('i8')
@@ -1509,10 +1525,13 @@ cdef _rfloordiv(int64_t value, right):
     return right // value
 
 
-cdef _broadcast_floordiv_td64(int64_t value, object other,
-                              object (*operation)(int64_t value,
-                                                  object right)):
-    """Boilerplate code shared by Timedelta.__floordiv__ and
+cdef _broadcast_floordiv_td64(
+    int64_t value,
+    object other,
+    object (*operation)(int64_t value, object right)
+):
+    """
+    Boilerplate code shared by Timedelta.__floordiv__ and
     Timedelta.__rfloordiv__ because np.timedelta64 does not implement these.
 
     Parameters
diff --git a/pandas/_libs/tslibs/timezones.pyx b/pandas/_libs/tslibs/timezones.pyx
index 35ee87e714fa8..0ec3e2ad467e1 100644
--- a/pandas/_libs/tslibs/timezones.pyx
+++ b/pandas/_libs/tslibs/timezones.pyx
@@ -2,9 +2,11 @@ from datetime import timezone
 
 # dateutil compat
 from dateutil.tz import (
-    tzutc as _dateutil_tzutc,
+    tzfile as _dateutil_tzfile,
     tzlocal as _dateutil_tzlocal,
-    tzfile as _dateutil_tzfile)
+    tzutc as _dateutil_tzutc,
+)
+
 
 from dateutil.tz import gettz as dateutil_gettz
 
@@ -103,7 +105,9 @@ cpdef inline object maybe_get_tz(object tz):
 
 
 def _p_tz_cache_key(tz):
-    """ Python interface for cache function to facilitate testing."""
+    """
+    Python interface for cache function to facilitate testing.
+    """
     return tz_cache_key(tz)
 
 
@@ -120,7 +124,7 @@ cdef inline object tz_cache_key(object tz):
     dateutil timezones.
 
     Notes
-    =====
+    -----
     This cannot just be the hash of a timezone object. Unfortunately, the
     hashes of two dateutil tz objects which represent the same timezone are
     not equal (even though the tz objects will compare equal and represent
@@ -196,7 +200,7 @@ cdef int64_t[:] unbox_utcoffsets(object transinfo):
     arr = np.empty(sz, dtype='i8')
 
     for i in range(sz):
-        arr[i] = int(transinfo[i][0].total_seconds()) * 1000000000
+        arr[i] = int(transinfo[i][0].total_seconds()) * 1_000_000_000
 
     return arr
 
@@ -217,7 +221,7 @@ cdef object get_dst_info(object tz):
     if cache_key is None:
         # e.g. pytz.FixedOffset, matplotlib.dates._UTC,
         # psycopg2.tz.FixedOffsetTimezone
-        num = int(get_utcoffset(tz, None).total_seconds()) * 1000000000
+        num = int(get_utcoffset(tz, None).total_seconds()) * 1_000_000_000
         return (np.array([NPY_NAT + 1], dtype=np.int64),
                 np.array([num], dtype=np.int64),
                 None)
@@ -313,7 +317,7 @@ cpdef bint tz_compare(object start, object end):
 
     Returns:
     -------
-    compare : bint
+    bool
 
     """
     # GH 18523
diff --git a/pandas/_libs/writers.pyx b/pandas/_libs/writers.pyx
index 73201e75c3c88..9e95dea979577 100644
--- a/pandas/_libs/writers.pyx
+++ b/pandas/_libs/writers.pyx
@@ -15,8 +15,13 @@ ctypedef fused pandas_string:
 
 @cython.boundscheck(False)
 @cython.wraparound(False)
-def write_csv_rows(list data, ndarray data_index,
-                   Py_ssize_t nlevels, ndarray cols, object writer):
+def write_csv_rows(
+    list data,
+    ndarray data_index,
+    Py_ssize_t nlevels,
+    ndarray cols,
+    object writer
+):
     """
     Write the given data to the writer object, pre-allocating where possible
     for performance improvements.
@@ -114,7 +119,9 @@ def convert_json_to_lines(arr: object) -> str:
 @cython.boundscheck(False)
 @cython.wraparound(False)
 def max_len_string_array(pandas_string[:] arr) -> Py_ssize_t:
-    """ return the maximum size of elements in a 1-dim string array """
+    """
+    Return the maximum size of elements in a 1-dim string array.
+    """
     cdef:
         Py_ssize_t i, m = 0, l = 0, length = arr.shape[0]
         pandas_string val
@@ -130,7 +137,9 @@ def max_len_string_array(pandas_string[:] arr) -> Py_ssize_t:
 
 
 cpdef inline Py_ssize_t word_len(object val):
-    """ return the maximum length of a string or bytes value """
+    """
+    Return the maximum length of a string or bytes value.
+    """
     cdef:
         Py_ssize_t l = 0
 
@@ -148,8 +157,10 @@ cpdef inline Py_ssize_t word_len(object val):
 @cython.boundscheck(False)
 @cython.wraparound(False)
 def string_array_replace_from_nan_rep(
-        ndarray[object, ndim=1] arr, object nan_rep,
-        object replace=None):
+    ndarray[object, ndim=1] arr,
+    object nan_rep,
+    object replace=np.nan
+):
     """
     Replace the values in the array with 'replacement' if
     they are 'nan_rep'. Return the same array.
@@ -157,9 +168,6 @@ def string_array_replace_from_nan_rep(
     cdef:
         Py_ssize_t length = len(arr), i = 0
 
-    if replace is None:
-        replace = np.nan
-
     for i in range(length):
         if arr[i] == nan_rep:
             arr[i] = replace