Skip to content

DOC: Improved the docstring of Series.str.findall #2

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Closed
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
18 commits
Select commit Hold shift + click to select a range
96b8bb1
ENH: Implement DataFrame.astype('category') (#18099)
jschendel Mar 1, 2018
4a27697
Cythonized GroupBy any (#19722)
WillAyd Mar 1, 2018
52559f5
ENH: Allow Timestamp to accept Nanosecond argument (#19889)
mroeschke Mar 1, 2018
c8859b5
DOC: script to build single docstring page (#19840)
jorisvandenbossche Mar 1, 2018
3b4eb8d
CLN: remove redundant clean_fill_method calls (#19947)
jorisvandenbossche Mar 1, 2018
9958ce6
BUG: Preserve column metadata with DataFrame.astype (#19948)
jschendel Mar 1, 2018
c5a1ef1
DOC: remove empty attribute/method lists from class docstrings html p…
jorisvandenbossche Mar 1, 2018
9242248
BUG: DataFrame.diff(axis=0) with DatetimeTZ data (#19773)
mroeschke Mar 1, 2018
87fefe2
dispatch Series[datetime64] comparison ops to DatetimeIndex (#19800)
jbrockmendel Mar 1, 2018
d44a6ec
Making to_datetime('today') and Timestamp('today') consistent (#19937)
shangyian Mar 1, 2018
072545d
ENH: Add option to disable MathJax (#19824). (#19856)
davidchall Mar 1, 2018
5f271eb
BUG: Adding skipna as an option to groupby cumsum and cumprod (#19914)
shangyian Mar 1, 2018
d615f86
DOC: Adding script to validate docstrings, and generate list of all f…
datapythonista Mar 2, 2018
e6c7dea
ENH: Let initialisation from dicts use insertion order for python >= …
topper-123 Mar 2, 2018
b167483
DOC: update install.rst to include ActivePython distribution (#19908)
Dr-G Mar 2, 2018
a7a7f8c
DOC: clarify version of ActivePython that includes pandas (#19964)
jorisvandenbossche Mar 2, 2018
fe09b66
First try of my docstring
jcontesti Mar 2, 2018
a227404
DOC: Improved the docstring of Series.str.findall
jcontesti Mar 2, 2018
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
16 changes: 14 additions & 2 deletions asv_bench/benchmarks/groupby.py
Original file line number Diff line number Diff line change
Expand Up @@ -11,6 +11,13 @@
from .pandas_vb_common import setup # noqa


method_blacklist = {
'object': {'median', 'prod', 'sem', 'cumsum', 'sum', 'cummin', 'mean',
'max', 'skew', 'cumprod', 'cummax', 'rank', 'pct_change', 'min',
'var', 'mad', 'describe', 'std'}
}


class ApplyDictReturn(object):
goal_time = 0.2

Expand Down Expand Up @@ -153,6 +160,7 @@ def time_frame_nth_any(self, df):
def time_frame_nth(self, df):
df.groupby(0).nth(0)


def time_series_nth_any(self, df):
df[1].groupby(df[0]).nth(0, dropna='any')

Expand Down Expand Up @@ -369,23 +377,27 @@ class GroupByMethods(object):
goal_time = 0.2

param_names = ['dtype', 'method']
params = [['int', 'float'],
params = [['int', 'float', 'object'],
['all', 'any', 'bfill', 'count', 'cumcount', 'cummax', 'cummin',
'cumprod', 'cumsum', 'describe', 'ffill', 'first', 'head',
'last', 'mad', 'max', 'min', 'median', 'mean', 'nunique',
'pct_change', 'prod', 'rank', 'sem', 'shift', 'size', 'skew',
'std', 'sum', 'tail', 'unique', 'value_counts', 'var']]

def setup(self, dtype, method):
if method in method_blacklist.get(dtype, {}):
raise NotImplementedError # skip benchmark
ngroups = 1000
size = ngroups * 2
rng = np.arange(ngroups)
values = rng.take(np.random.randint(0, ngroups, size=size))
if dtype == 'int':
key = np.random.randint(0, size, size=size)
else:
elif dtype == 'float':
key = np.concatenate([np.random.random(ngroups) * 0.1,
np.random.random(ngroups) * 10.0])
elif dtype == 'object':
key = ['foo'] * size

df = DataFrame({'values': values, 'key': key})
self.df_groupby_method = getattr(df.groupby('key')['values'], method)
Expand Down
166 changes: 135 additions & 31 deletions doc/make.py
Original file line number Diff line number Diff line change
Expand Up @@ -14,40 +14,21 @@
import sys
import os
import shutil
import subprocess
# import subprocess
import argparse
from contextlib import contextmanager
import webbrowser
import jinja2

import pandas


DOC_PATH = os.path.dirname(os.path.abspath(__file__))
SOURCE_PATH = os.path.join(DOC_PATH, 'source')
BUILD_PATH = os.path.join(DOC_PATH, 'build')
BUILD_DIRS = ['doctrees', 'html', 'latex', 'plots', '_static', '_templates']


def _generate_index(include_api, single_doc=None):
"""Create index.rst file with the specified sections.

Parameters
----------
include_api : bool
Whether API documentation will be built.
single_doc : str or None
If provided, this single documentation page will be generated.
"""
if single_doc is not None:
single_doc = os.path.splitext(os.path.basename(single_doc))[0]
include_api = False

with open(os.path.join(SOURCE_PATH, 'index.rst.template')) as f:
t = jinja2.Template(f.read())

with open(os.path.join(SOURCE_PATH, 'index.rst'), 'w') as f:
f.write(t.render(include_api=include_api,
single_doc=single_doc))


@contextmanager
def _maybe_exclude_notebooks():
"""Skip building the notebooks if pandoc is not installed.
Expand All @@ -58,6 +39,7 @@ def _maybe_exclude_notebooks():
1. nbconvert isn't installed, or
2. nbconvert is installed, but pandoc isn't
"""
# TODO move to exclude_pattern
base = os.path.dirname(__file__)
notebooks = [os.path.join(base, 'source', nb)
for nb in ['style.ipynb']]
Expand Down Expand Up @@ -96,8 +78,110 @@ class DocBuilder:
All public methods of this class can be called as parameters of the
script.
"""
def __init__(self, num_jobs=1):
def __init__(self, num_jobs=1, include_api=True, single_doc=None):
self.num_jobs = num_jobs
self.include_api = include_api
self.single_doc = None
self.single_doc_type = None
if single_doc is not None:
self._process_single_doc(single_doc)
self.exclude_patterns = self._exclude_patterns

self._generate_index()
if self.single_doc_type == 'docstring':
self._run_os('sphinx-autogen', '-o',
'source/generated_single', 'source/index.rst')

@property
def _exclude_patterns(self):
"""Docs source files that will be excluded from building."""
# TODO move maybe_exclude_notebooks here
if self.single_doc is not None:
rst_files = [f for f in os.listdir(SOURCE_PATH)
if ((f.endswith('.rst') or f.endswith('.ipynb'))
and (f != 'index.rst')
and (f != '{0}.rst'.format(self.single_doc)))]
if self.single_doc_type != 'api':
rst_files += ['generated/*.rst']
elif not self.include_api:
rst_files = ['api.rst', 'generated/*.rst']
else:
rst_files = ['generated_single/*.rst']

exclude_patterns = ','.join(
'{!r}'.format(i) for i in ['**.ipynb_checkpoints'] + rst_files)

return exclude_patterns

def _process_single_doc(self, single_doc):
"""Extract self.single_doc (base name) and self.single_doc_type from
passed single_doc kwarg.

"""
self.include_api = False

if single_doc == 'api.rst':
self.single_doc_type = 'api'
self.single_doc = 'api'
elif os.path.exists(os.path.join(SOURCE_PATH, single_doc)):
self.single_doc_type = 'rst'
self.single_doc = os.path.splitext(os.path.basename(single_doc))[0]
elif os.path.exists(
os.path.join(SOURCE_PATH, '{}.rst'.format(single_doc))):
self.single_doc_type = 'rst'
self.single_doc = single_doc
elif single_doc is not None:
try:
obj = pandas
for name in single_doc.split('.'):
obj = getattr(obj, name)
except AttributeError:
raise ValueError('Single document not understood, it should '
'be a file in doc/source/*.rst (e.g. '
'"contributing.rst" or a pandas function or '
'method (e.g. "pandas.DataFrame.head")')
else:
self.single_doc_type = 'docstring'
if single_doc.startswith('pandas.'):
self.single_doc = single_doc[len('pandas.'):]
else:
self.single_doc = single_doc

def _copy_generated_docstring(self):
"""Copy existing generated (from api.rst) docstring page because
this is more correct in certain cases (where a custom autodoc
template is used).

"""
fname = os.path.join(SOURCE_PATH, 'generated',
'pandas.{}.rst'.format(self.single_doc))
temp_dir = os.path.join(SOURCE_PATH, 'generated_single')

try:
os.makedirs(temp_dir)
except OSError:
pass

if os.path.exists(fname):
try:
# copying to make sure sphinx always thinks it is new
# and needs to be re-generated (to pick source code changes)
shutil.copy(fname, temp_dir)
except: # noqa
pass

def _generate_index(self):
"""Create index.rst file with the specified sections."""
if self.single_doc_type == 'docstring':
self._copy_generated_docstring()

with open(os.path.join(SOURCE_PATH, 'index.rst.template')) as f:
t = jinja2.Template(f.read())

with open(os.path.join(SOURCE_PATH, 'index.rst'), 'w') as f:
f.write(t.render(include_api=self.include_api,
single_doc=self.single_doc,
single_doc_type=self.single_doc_type))

@staticmethod
def _create_build_structure():
Expand All @@ -121,7 +205,10 @@ def _run_os(*args):
--------
>>> DocBuilder()._run_os('python', '--version')
"""
subprocess.check_call(args, stderr=subprocess.STDOUT)
# TODO check_call should be more safe, but it fails with
# exclude patterns, needs investigation
# subprocess.check_call(args, stderr=subprocess.STDOUT)
os.system(' '.join(args))

def _sphinx_build(self, kind):
"""Call sphinx to build documentation.
Expand All @@ -142,11 +229,21 @@ def _sphinx_build(self, kind):
self._run_os('sphinx-build',
'-j{}'.format(self.num_jobs),
'-b{}'.format(kind),
'-d{}'.format(os.path.join(BUILD_PATH,
'doctrees')),
'-d{}'.format(os.path.join(BUILD_PATH, 'doctrees')),
'-Dexclude_patterns={}'.format(self.exclude_patterns),
SOURCE_PATH,
os.path.join(BUILD_PATH, kind))

def _open_browser(self):
base_url = os.path.join('file://', DOC_PATH, 'build', 'html')
if self.single_doc_type == 'docstring':
url = os.path.join(
base_url,
'generated_single', 'pandas.{}.html'.format(self.single_doc))
else:
url = os.path.join(base_url, '{}.html'.format(self.single_doc))
webbrowser.open(url, new=2)

def html(self):
"""Build HTML documentation."""
self._create_build_structure()
Expand All @@ -156,6 +253,11 @@ def html(self):
if os.path.exists(zip_fname):
os.remove(zip_fname)

if self.single_doc is not None:
self._open_browser()
shutil.rmtree(os.path.join(SOURCE_PATH, 'generated_single'),
ignore_errors=True)

def latex(self, force=False):
"""Build PDF documentation."""
self._create_build_structure()
Expand Down Expand Up @@ -222,8 +324,8 @@ def main():
metavar='FILENAME',
type=str,
default=None,
help=('filename of section to compile, '
'e.g. "indexing"'))
help=('filename of section or method name to '
'compile, e.g. "indexing", "DataFrame.join"'))
argparser.add_argument('--python-path',
type=str,
default=os.path.join(DOC_PATH, '..'),
Expand All @@ -235,8 +337,10 @@ def main():
args.command, ', '.join(cmds)))

os.environ['PYTHONPATH'] = args.python_path
_generate_index(not args.no_api, args.single)
getattr(DocBuilder(args.num_jobs), args.command)()

getattr(DocBuilder(args.num_jobs,
not args.no_api,
args.single), args.command)()


if __name__ == '__main__':
Expand Down
5 changes: 5 additions & 0 deletions doc/source/api.rst
Original file line number Diff line number Diff line change
Expand Up @@ -2179,8 +2179,12 @@ Computations / Descriptive Stats
.. autosummary::
:toctree: generated/

GroupBy.all
GroupBy.any
GroupBy.bfill
GroupBy.count
GroupBy.cumcount
GroupBy.ffill
GroupBy.first
GroupBy.head
GroupBy.last
Expand All @@ -2192,6 +2196,7 @@ Computations / Descriptive Stats
GroupBy.nth
GroupBy.ohlc
GroupBy.prod
GroupBy.rank
GroupBy.size
GroupBy.sem
GroupBy.std
Expand Down
Loading