harshit-py · harshit-py · Jun 28, 2019 · Jun 19, 2019 · Jun 20, 2019 · Jun 20, 2019
diff --git a/asv_bench/benchmarks/ctors.py b/asv_bench/benchmarks/ctors.py
@@ -55,7 +55,14 @@ class SeriesConstructors:
               [False, True],
               ['float', 'int']]
 
+    # Generators get exhausted on use, so run setup before every call
+    number = 1
+    repeat = (3, 250, 10)
+
     def setup(self, data_fmt, with_index, dtype):
+        if data_fmt in (gen_of_str, gen_of_tuples) and with_index:
+            raise NotImplementedError('Series constructors do not support '
+                                      'using generators with indexes')
         N = 10**4
         if dtype == 'float':
             arr = np.random.randn(N)

diff --git a/asv_bench/benchmarks/frame_ctor.py b/asv_bench/benchmarks/frame_ctor.py
@@ -72,6 +72,10 @@ class FromRecords:
     params = [None, 1000]
     param_names = ['nrows']
 
+    # Generators get exhausted on use, so run setup before every call
+    number = 1
+    repeat = (3, 250, 10)
+
     def setup(self, nrows):
         N = 100000
         self.gen = ((x, (x * 20), (x * 100)) for x in range(N))

diff --git a/asv_bench/benchmarks/offset.py b/asv_bench/benchmarks/offset.py
@@ -9,7 +9,7 @@
     pass
 
 hcal = pd.tseries.holiday.USFederalHolidayCalendar()
-# These offests currently raise a NotImplimentedError with .apply_index()
+# These offsets currently raise a NotImplimentedError with .apply_index()
 non_apply = [pd.offsets.Day(),
              pd.offsets.BYearEnd(),
              pd.offsets.BYearBegin(),

diff --git a/ci/code_checks.sh b/ci/code_checks.sh
@@ -256,8 +256,8 @@ fi
 ### DOCSTRINGS ###
 if [[ -z "$CHECK" || "$CHECK" == "docstrings" ]]; then
 
-    MSG='Validate docstrings (GL03, GL06, GL07, GL09, SS04, SS05, PR03, PR04, PR05, PR10, EX04, RT01, RT04, RT05, SA05)' ; echo $MSG
-    $BASE_DIR/scripts/validate_docstrings.py --format=azure --errors=GL03,GL06,GL07,GL09,SS04,SS05,PR03,PR04,PR05,PR10,EX04,RT01,RT04,RT05,SA05
+    MSG='Validate docstrings (GL03, GL04, GL05, GL06, GL07, GL09, SS04, SS05, PR03, PR04, PR05, PR10, EX04, RT01, RT04, RT05, SA05)' ; echo $MSG
+    $BASE_DIR/scripts/validate_docstrings.py --format=azure --errors=GL03,GL04,GL05,GL06,GL07,GL09,SS04,SS05,PR03,PR04,PR05,PR10,EX04,RT01,RT04,RT05,SA05
     RET=$(($RET + $?)) ; echo $MSG "DONE"
 
 fi

diff --git a/doc/source/getting_started/comparison/comparison_with_sas.rst b/doc/source/getting_started/comparison/comparison_with_sas.rst
@@ -660,7 +660,7 @@ example, to subtract the mean for each observation by smoker group.
    run;
 
 
-pandas ``groubpy`` provides a ``transform`` mechanism that allows
+pandas ``groupby`` provides a ``transform`` mechanism that allows
 these type of operations to be succinctly expressed in one
 operation.
 

diff --git a/doc/source/getting_started/comparison/comparison_with_stata.rst b/doc/source/getting_started/comparison/comparison_with_stata.rst
@@ -634,7 +634,7 @@ For example, to subtract the mean for each observation by smoker group.
    generate adj_total_bill = total_bill - group_bill
 
 
-pandas ``groubpy`` provides a ``transform`` mechanism that allows
+pandas ``groupby`` provides a ``transform`` mechanism that allows
 these type of operations to be succinctly expressed in one
 operation.
 

diff --git a/doc/source/install.rst b/doc/source/install.rst
@@ -286,6 +286,7 @@ psycopg2                                     PostgreSQL engine for sqlalchemy
 pyarrow                   0.9.0              Parquet and feather reading / writing
 pymysql                                      MySQL engine for sqlalchemy
 pyreadstat                                   SPSS files (.sav) reading
+pytables                  3.4.2              HDF5 reading / writing
 qtpy                                         Clipboard I/O
 s3fs                      0.0.8              Amazon S3 access
 xarray                    0.8.2              pandas-like API for N-dimensional data

diff --git a/doc/source/user_guide/io.rst b/doc/source/user_guide/io.rst
@@ -488,7 +488,7 @@ specification:
 
 .. versionadded:: 0.21.0
 
-Specifying ``dtype='cateogry'`` will result in an unordered ``Categorical``
+Specifying ``dtype='category'`` will result in an unordered ``Categorical``
 whose ``categories`` are the unique values observed in the data. For more
 control on the categories and order, create a
 :class:`~pandas.api.types.CategoricalDtype` ahead of time, and pass that for
@@ -1679,7 +1679,7 @@ S3 URLs are handled as well but require installing the `S3Fs
 
    df = pd.read_csv('s3://pandas-test/tips.csv')
 
-If your S3 bucket requires cedentials you will need to set them as environment
+If your S3 bucket requires credentials you will need to set them as environment
 variables or in the ``~/.aws/credentials`` config file, refer to the `S3Fs
 documentation on credentials
 <https://s3fs.readthedocs.io/en/latest/#credentials>`_.
@@ -2078,7 +2078,7 @@ Dates written in nanoseconds need to be read back in nanoseconds:
 
    json = dfj2.to_json(date_unit='ns')
 
-   # Try to parse timestamps as millseconds -> Won't Work
+   # Try to parse timestamps as milliseconds -> Won't Work
    dfju = pd.read_json(json, date_unit='ms')
    dfju
 

diff --git a/doc/source/user_guide/options.rst b/doc/source/user_guide/options.rst
@@ -431,6 +431,12 @@ compute.use_bottleneck                  True         Use the bottleneck library
                                                      computation if it is installed.
 compute.use_numexpr                     True         Use the numexpr library to accelerate
                                                      computation if it is installed.
+plotting.backend                        matplotlib   Change the plotting backend to a different
+                                                     backend than the current matplotlib one.
+                                                     Backends can be implemented as third-party
+                                                     libraries implementing the pandas plotting
+                                                     API. They can use other plotting libraries
+                                                     like Bokeh, Altair, etc.
 plotting.matplotlib.register_converters True         Register custom converters with
                                                      matplotlib. Set to False to de-register.
 ======================================= ============ ==================================

diff --git a/doc/source/user_guide/timeseries.rst b/doc/source/user_guide/timeseries.rst
@@ -1133,7 +1133,7 @@ Valid business hours are distinguished by whether it started from valid ``Busine
     pd.Timestamp('2014-08-01 17:00') + bh
     pd.Timestamp('2014-08-01 23:00') + bh
 
-    # Although 2014-08-02 is Satuaday,
+    # Although 2014-08-02 is Saturday,
     # it is valid because it starts from 08-01 (Friday).
     pd.Timestamp('2014-08-02 04:00') + bh
 

diff --git a/doc/source/whatsnew/v0.10.1.rst b/doc/source/whatsnew/v0.10.1.rst
@@ -170,7 +170,7 @@ combined result, by using ``where`` on a selector table.
                             df_mt, selector='df1_mt')
    store
 
-   # indiviual tables were created
+   # individual tables were created
    store.select('df1_mt')
    store.select('df2_mt')
 

diff --git a/doc/source/whatsnew/v0.14.0.rst b/doc/source/whatsnew/v0.14.0.rst
@@ -816,7 +816,7 @@ Enhancements
 - Implemented ``Panel.pct_change`` (:issue:`6904`)
 - Added ``how`` option to rolling-moment functions to dictate how to handle resampling; :func:`rolling_max` defaults to max,
   :func:`rolling_min` defaults to min, and all others default to mean (:issue:`6297`)
-- ``CustomBuisnessMonthBegin`` and ``CustomBusinessMonthEnd`` are now available (:issue:`6866`)
+- ``CustomBusinessMonthBegin`` and ``CustomBusinessMonthEnd`` are now available (:issue:`6866`)
 - :meth:`Series.quantile` and :meth:`DataFrame.quantile` now accept an array of
   quantiles.
 - :meth:`~DataFrame.describe` now accepts an array of percentiles to include in the summary statistics (:issue:`4196`)

diff --git a/doc/source/whatsnew/v0.14.1.rst b/doc/source/whatsnew/v0.14.1.rst
@@ -247,7 +247,7 @@ Bug Fixes
 - Bug in ``DatetimeIndex`` comparison doesn't handle ``NaT`` properly (:issue:`7529`)
 - Bug in passing input with ``tzinfo`` to some offsets ``apply``, ``rollforward`` or ``rollback`` resets ``tzinfo`` or raises ``ValueError`` (:issue:`7465`)
 - Bug in ``DatetimeIndex.to_period``, ``PeriodIndex.asobject``, ``PeriodIndex.to_timestamp`` doesn't preserve ``name`` (:issue:`7485`)
-- Bug in ``DatetimeIndex.to_period`` and ``PeriodIndex.to_timestanp`` handle ``NaT`` incorrectly (:issue:`7228`)
+- Bug in ``DatetimeIndex.to_period`` and ``PeriodIndex.to_timestamp`` handle ``NaT`` incorrectly (:issue:`7228`)
 - Bug in ``offsets.apply``, ``rollforward`` and ``rollback`` may return normal ``datetime`` (:issue:`7502`)
 - Bug in ``resample`` raises ``ValueError`` when target contains ``NaT`` (:issue:`7227`)
 - Bug in ``Timestamp.tz_localize`` resets ``nanosecond`` info (:issue:`7534`)

diff --git a/doc/source/whatsnew/v0.19.0.rst b/doc/source/whatsnew/v0.19.0.rst
@@ -1513,7 +1513,7 @@ Bug Fixes
 - Bug in ``Series`` comparison may output incorrect result if rhs contains ``NaT`` (:issue:`9005`)
 - Bug in ``Series`` and ``Index`` comparison may output incorrect result if it contains ``NaT`` with ``object`` dtype (:issue:`13592`)
 - Bug in ``Period`` addition raises ``TypeError`` if ``Period`` is on right hand side (:issue:`13069`)
-- Bug in ``Peirod`` and ``Series`` or ``Index`` comparison raises ``TypeError`` (:issue:`13200`)
+- Bug in ``Period`` and ``Series`` or ``Index`` comparison raises ``TypeError`` (:issue:`13200`)
 - Bug in ``pd.set_eng_float_format()`` that would prevent NaN and Inf from formatting (:issue:`11981`)
 - Bug in ``.unstack`` with ``Categorical`` dtype resets ``.ordered`` to ``True`` (:issue:`13249`)
 - Clean some compile time warnings in datetime parsing (:issue:`13607`)

diff --git a/doc/source/whatsnew/v0.21.0.rst b/doc/source/whatsnew/v0.21.0.rst
@@ -263,7 +263,7 @@ Now, to find prices per store/product, we can simply do:
 See the :ref:`documentation <groupby.pipe>` for more.
 
 
-.. _whatsnew_0210.enhancements.reanme_categories:
+.. _whatsnew_0210.enhancements.rename_categories:
 
 ``Categorical.rename_categories`` accepts a dict-like
 ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
@@ -418,7 +418,7 @@ New Behavior, without regard to the bottleneck installation:
 
    s.sum()
 
-Note that this also changes the sum of an empty ``Series``. Previously this always returned 0 regardless of a ``bottlenck`` installation:
+Note that this also changes the sum of an empty ``Series``. Previously this always returned 0 regardless of a ``bottleneck`` installation:
 
 .. code-block:: ipython
 

diff --git a/doc/source/whatsnew/v0.23.0.rst b/doc/source/whatsnew/v0.23.0.rst
@@ -276,7 +276,7 @@ To show only observed values:
 
    df.groupby(['A', 'B', 'C'], observed=True).count()
 
-For pivotting operations, this behavior is *already* controlled by the ``dropna`` keyword:
+For pivoting operations, this behavior is *already* controlled by the ``dropna`` keyword:
 
 .. ipython:: python
 

diff --git a/doc/source/whatsnew/v0.23.1.rst b/doc/source/whatsnew/v0.23.1.rst
@@ -26,7 +26,7 @@ Fixed Regressions
 **Comparing Series with datetime.date**
 
 We've reverted a 0.23.0 change to comparing a :class:`Series` holding datetimes and a ``datetime.date`` object (:issue:`21152`).
-In pandas 0.22 and earlier, comparing a Series holding datetimes and ``datetime.date`` objects would coerce the ``datetime.date`` to a datetime before comapring.
+In pandas 0.22 and earlier, comparing a Series holding datetimes and ``datetime.date`` objects would coerce the ``datetime.date`` to a datetime before comparing.
 This was inconsistent with Python, NumPy, and :class:`DatetimeIndex`, which never consider a datetime and ``datetime.date`` equal.
 
 In 0.23.0, we unified operations between DatetimeIndex and Series, and in the process changed comparisons between a Series of datetimes and ``datetime.date`` without warning.

diff --git a/doc/source/whatsnew/v0.24.0.rst b/doc/source/whatsnew/v0.24.0.rst
@@ -1061,7 +1061,7 @@ The affected cases are:
 
 .. code-block:: ipython
 
-   # Comparison operations and arithmetic opeartions both raise ValueError.
+   # Comparison operations and arithmetic operations both raise ValueError.
    In [6]: df == (1, 2, 3)
    ...
    ValueError: Unable to coerce to Series, length must be 2: given 3
@@ -1324,7 +1324,7 @@ Deprecations
 - :meth:`Series.clip_lower`, :meth:`Series.clip_upper`, :meth:`DataFrame.clip_lower` and :meth:`DataFrame.clip_upper` are deprecated and will be removed in a future version. Use ``Series.clip(lower=threshold)``, ``Series.clip(upper=threshold)`` and the equivalent ``DataFrame`` methods (:issue:`24203`)
 - :meth:`Series.nonzero` is deprecated and will be removed in a future version (:issue:`18262`)
 - Passing an integer to :meth:`Series.fillna` and :meth:`DataFrame.fillna` with ``timedelta64[ns]`` dtypes is deprecated, will raise ``TypeError`` in a future version.  Use ``obj.fillna(pd.Timedelta(...))`` instead (:issue:`24694`)
-- ``Series.cat.categorical``, ``Series.cat.name`` and ``Sersies.cat.index`` have been deprecated. Use the attributes on ``Series.cat`` or ``Series`` directly. (:issue:`24751`).
+- ``Series.cat.categorical``, ``Series.cat.name`` and ``Series.cat.index`` have been deprecated. Use the attributes on ``Series.cat`` or ``Series`` directly. (:issue:`24751`).
 - Passing a dtype without a precision like ``np.dtype('datetime64')`` or ``timedelta64`` to :class:`Index`, :class:`DatetimeIndex` and :class:`TimedeltaIndex` is now deprecated. Use the nanosecond-precision dtype instead (:issue:`24753`).
 
 .. _whatsnew_0240.deprecations.datetimelike_int_ops:
@@ -1604,7 +1604,7 @@ Datetimelike
 - Bug in :class:`DatetimeIndex` where calling ``np.array(dtindex, dtype=object)`` would incorrectly return an array of ``long`` objects (:issue:`23524`)
 - Bug in :class:`Index` where passing a timezone-aware :class:`DatetimeIndex` and `dtype=object` would incorrectly raise a ``ValueError`` (:issue:`23524`)
 - Bug in :class:`Index` where calling ``np.array(dtindex, dtype=object)`` on a timezone-naive :class:`DatetimeIndex` would return an array of ``datetime`` objects instead of :class:`Timestamp` objects, potentially losing nanosecond portions of the timestamps (:issue:`23524`)
-- Bug in :class:`Categorical.__setitem__` not allowing setting with another ``Categorical`` when both are undordered and have the same categories, but in a different order (:issue:`24142`)
+- Bug in :class:`Categorical.__setitem__` not allowing setting with another ``Categorical`` when both are unordered and have the same categories, but in a different order (:issue:`24142`)
 - Bug in :func:`date_range` where using dates with millisecond resolution or higher could return incorrect values or the wrong number of values in the index (:issue:`24110`)
 - Bug in :class:`DatetimeIndex` where constructing a :class:`DatetimeIndex` from a :class:`Categorical` or :class:`CategoricalIndex` would incorrectly drop timezone information (:issue:`18664`)
 - Bug in :class:`DatetimeIndex` and :class:`TimedeltaIndex` where indexing with ``Ellipsis`` would incorrectly lose the index's ``freq`` attribute (:issue:`21282`)
@@ -1670,7 +1670,7 @@ Timezones
 Offsets
 ^^^^^^^
 
-- Bug in :class:`FY5253` where date offsets could incorrectly raise an ``AssertionError`` in arithmetic operatons (:issue:`14774`)
+- Bug in :class:`FY5253` where date offsets could incorrectly raise an ``AssertionError`` in arithmetic operations (:issue:`14774`)
 - Bug in :class:`DateOffset` where keyword arguments ``week`` and ``milliseconds`` were accepted and ignored.  Passing these will now raise ``ValueError`` (:issue:`19398`)
 - Bug in adding :class:`DateOffset` with :class:`DataFrame` or :class:`PeriodIndex` incorrectly raising ``TypeError`` (:issue:`23215`)
 - Bug in comparing :class:`DateOffset` objects with non-DateOffset objects, particularly strings, raising ``ValueError`` instead of returning ``False`` for equality checks and ``True`` for not-equal checks (:issue:`23524`)
@@ -1838,7 +1838,7 @@ Groupby/Resample/Rolling
   ``SeriesGroupBy`` when the grouping variable only contains NaNs and numpy version < 1.13 (:issue:`21956`).
 - Multiple bugs in :func:`pandas.core.window.Rolling.min` with ``closed='left'`` and a
   datetime-like index leading to incorrect results and also segfault. (:issue:`21704`)
-- Bug in :meth:`pandas.core.resample.Resampler.apply` when passing postiional arguments to applied func (:issue:`14615`).
+- Bug in :meth:`pandas.core.resample.Resampler.apply` when passing positional arguments to applied func (:issue:`14615`).
 - Bug in :meth:`Series.resample` when passing ``numpy.timedelta64`` to ``loffset`` kwarg (:issue:`7687`).
 - Bug in :meth:`pandas.core.resample.Resampler.asfreq` when frequency of ``TimedeltaIndex`` is a subperiod of a new frequency (:issue:`13022`).
 - Bug in :meth:`pandas.core.groupby.SeriesGroupBy.mean` when values were integral but could not fit inside of int64, overflowing instead. (:issue:`22487`)