From fe09b6680e18065ccbd7769e39b56bd8b9927c86 Mon Sep 17 00:00:00 2001 From: jcontesti <25779507+jcontesti@users.noreply.github.com> Date: Fri, 2 Mar 2018 20:28:56 +0100 Subject: [PATCH 1/8] First try of my docstring --- pandas/core/strings.py | 59 +++++++++++++++++++++++++++++++++++++++--- 1 file changed, 55 insertions(+), 4 deletions(-) diff --git a/pandas/core/strings.py b/pandas/core/strings.py index 6b427ed1da834..fe083b31be03b 100644 --- a/pandas/core/strings.py +++ b/pandas/core/strings.py @@ -898,8 +898,10 @@ def str_join(arr, sep): def str_findall(arr, pat, flags=0): """ - Find all occurrences of pattern or regular expression in the - Series/Index. Equivalent to :func:`re.findall`. + Find all occurrences of pattern or regular expression in the Series/Index. + + Equivalent to apply :func:`re.findall` to all the elements in the + Series/Index. Empty matches are included in the result. Parameters ---------- @@ -910,11 +912,60 @@ def str_findall(arr, pat, flags=0): Returns ------- - matches : Series/Index of lists + matches : Series/Index of lists of strings, with all non-overlapping + matches of pattern or regular expression in each string of this + Series/Index See Also -------- - extractall : returns DataFrame with one column per capture group + Series.str.extractall : For each subject string in the Series, extract \ + groups from all matches of regular expression pat + Series.str.count : Count occurrences of pattern in each string of the \ + Series/Index + re.findall: Return all non-overlapping matches of pattern in string, \ + as a list of strings + + Examples + -------- + + >>> s = pd.Series(['Lion', 'Monkey', 'Rabbit']) + >>> s.str.findall('Monkey') + 0 [] + 1 [Monkey] + 2 [] + dtype: object + + >>> s.str.findall('MONKEY') + 0 [] + 1 [] + 2 [] + dtype: object + + >>> s.str.findall('MONKEY', flags=re.IGNORECASE) + 0 [] + 1 [Monkey] + 2 [] + dtype: object + + >>> s.str.findall('on') + 0 [on] + 1 [on] + 2 [] + dtype: object + + >>> s.str.findall('on$') + 0 [on] + 1 [] + 2 [] + dtype: object + + >>> s.str.findall('b') + 0 [] + 1 [] + 2 [b, b] + dtype: object + + """ regex = re.compile(pat, flags=flags) return _na_map(regex.findall, arr) From a22740469e2fc909c1579fb8d18ec98d1bd55d90 Mon Sep 17 00:00:00 2001 From: jcontesti <25779507+jcontesti@users.noreply.github.com> Date: Fri, 2 Mar 2018 21:03:00 +0100 Subject: [PATCH 2/8] DOC: Improved the docstring of Series.str.findall --- pandas/core/strings.py | 30 ++++++++++++++++-------------- 1 file changed, 16 insertions(+), 14 deletions(-) diff --git a/pandas/core/strings.py b/pandas/core/strings.py index fe083b31be03b..98d015fc89aaf 100644 --- a/pandas/core/strings.py +++ b/pandas/core/strings.py @@ -901,29 +901,31 @@ def str_findall(arr, pat, flags=0): Find all occurrences of pattern or regular expression in the Series/Index. Equivalent to apply :func:`re.findall` to all the elements in the - Series/Index. Empty matches are included in the result. + Series/Index. Parameters ---------- - pat : string - Pattern or regular expression - flags : int, default 0 (no flags) - re module flags, e.g. re.IGNORECASE + pat : str + Pattern or regular expression. + flags : int + re module flags, e.g. re.IGNORECASE (default is 0, which means + no flags). + kwargs + These parameters will be ignored. Returns ------- - matches : Series/Index of lists of strings, with all non-overlapping - matches of pattern or regular expression in each string of this - Series/Index + Series/Index of lists of strings + All non-overlapping matches of pattern or regular expression in each + string of this Series/Index. See Also -------- - Series.str.extractall : For each subject string in the Series, extract \ - groups from all matches of regular expression pat - Series.str.count : Count occurrences of pattern in each string of the \ - Series/Index + extractall : For each subject string in the Series, extract groups \ + from all matches of regular expression pat. + count : Count occurrences of pattern in each string of the Series/Index. re.findall: Return all non-overlapping matches of pattern in string, \ - as a list of strings + as a list of strings. Examples -------- @@ -941,6 +943,7 @@ def str_findall(arr, pat, flags=0): 2 [] dtype: object + >>> import re >>> s.str.findall('MONKEY', flags=re.IGNORECASE) 0 [] 1 [Monkey] @@ -965,7 +968,6 @@ def str_findall(arr, pat, flags=0): 2 [b, b] dtype: object - """ regex = re.compile(pat, flags=flags) return _na_map(regex.findall, arr) From ce333e428c98eb80d5fda1f7950b833f6bf02d0f Mon Sep 17 00:00:00 2001 From: jcontesti <25779507+jcontesti@users.noreply.github.com> Date: Wed, 7 Mar 2018 00:03:01 +0100 Subject: [PATCH 3/8] DOC: fixing PR Series.str.findall: str is now string, text corrections, kwargs in parameters list removed, comments to all the examples added --- pandas/core/strings.py | 32 ++++++++++++++++++++++++-------- 1 file changed, 24 insertions(+), 8 deletions(-) diff --git a/pandas/core/strings.py b/pandas/core/strings.py index 98d015fc89aaf..b5e7fc36e6066 100644 --- a/pandas/core/strings.py +++ b/pandas/core/strings.py @@ -900,18 +900,16 @@ def str_findall(arr, pat, flags=0): """ Find all occurrences of pattern or regular expression in the Series/Index. - Equivalent to apply :func:`re.findall` to all the elements in the + Equivalent to applying :func:`re.findall` to all the elements in the Series/Index. Parameters ---------- - pat : str + pat : string Pattern or regular expression. flags : int re module flags, e.g. re.IGNORECASE (default is 0, which means no flags). - kwargs - These parameters will be ignored. Returns ------- @@ -921,28 +919,37 @@ def str_findall(arr, pat, flags=0): See Also -------- - extractall : For each subject string in the Series, extract groups \ - from all matches of regular expression pat. + extractall : For each subject string in the Series, extract groups + from all matches of regular expression pattern. count : Count occurrences of pattern in each string of the Series/Index. - re.findall: Return all non-overlapping matches of pattern in string, \ - as a list of strings. + re.findall: Return all non-overlapping matches of pattern in string, + as a list of strings. Examples -------- >>> s = pd.Series(['Lion', 'Monkey', 'Rabbit']) + + The search for the pattern `Monkey` returns one match: + >>> s.str.findall('Monkey') 0 [] 1 [Monkey] 2 [] dtype: object + On the other hand, the search for the pattern 'MONKEY' doesn't return any + match: + >>> s.str.findall('MONKEY') 0 [] 1 [] 2 [] dtype: object + Flags can be added to the regular expression. For instance, to find the + pattern `MONKEY` ignoring the case: + >>> import re >>> s.str.findall('MONKEY', flags=re.IGNORECASE) 0 [] @@ -950,18 +957,27 @@ def str_findall(arr, pat, flags=0): 2 [] dtype: object + When the pattern matches more than one string in the Series, all matches + are returned: + >>> s.str.findall('on') 0 [on] 1 [on] 2 [] dtype: object + Regular expressions are supported too. For instance, the search for all the + strings ending with the word `on` is shown next: + >>> s.str.findall('on$') 0 [on] 1 [] 2 [] dtype: object + If the pattern is found more than once in the same string, then a list of + strings is returned: + >>> s.str.findall('b') 0 [] 1 [] From 19854e19718848ecc436173d1d22f26dbb28b321 Mon Sep 17 00:00:00 2001 From: jcontesti <25779507+jcontesti@users.noreply.github.com> Date: Thu, 8 Mar 2018 00:13:35 +0100 Subject: [PATCH 4/8] Default 0 moved after int, improved explanation of extractall, pattern vs regular expression, single backtick quotes when needed added --- pandas/core/strings.py | 23 ++++++++++++----------- 1 file changed, 12 insertions(+), 11 deletions(-) diff --git a/pandas/core/strings.py b/pandas/core/strings.py index b5e7fc36e6066..2295c7144a269 100644 --- a/pandas/core/strings.py +++ b/pandas/core/strings.py @@ -907,9 +907,8 @@ def str_findall(arr, pat, flags=0): ---------- pat : string Pattern or regular expression. - flags : int - re module flags, e.g. re.IGNORECASE (default is 0, which means - no flags). + flags : int (default 0) + `re` module flags, e.g. `re.IGNORECASE`. Returns ------- @@ -919,11 +918,13 @@ def str_findall(arr, pat, flags=0): See Also -------- - extractall : For each subject string in the Series, extract groups - from all matches of regular expression pattern. - count : Count occurrences of pattern in each string of the Series/Index. - re.findall: Return all non-overlapping matches of pattern in string, - as a list of strings. + count : Count occurrences of pattern or regular expression in each string + of the Series/Index. + extractall : For each string in the Series, extract groups from all matches + of regular expression and return a DataFrame with one row for each + match and one column for each group. + re.findall: Return all non-overlapping matches of pattern or regular + expression in string, as a list of strings. Examples -------- @@ -938,7 +939,7 @@ def str_findall(arr, pat, flags=0): 2 [] dtype: object - On the other hand, the search for the pattern 'MONKEY' doesn't return any + On the other hand, the search for the pattern `MONKEY` doesn't return any match: >>> s.str.findall('MONKEY') @@ -947,8 +948,8 @@ def str_findall(arr, pat, flags=0): 2 [] dtype: object - Flags can be added to the regular expression. For instance, to find the - pattern `MONKEY` ignoring the case: + Flags can be added to the pattern or regular expression. For instance, + to find the pattern `MONKEY` ignoring the case: >>> import re >>> s.str.findall('MONKEY', flags=re.IGNORECASE) From c688b502b8cb560632dc316e43fa3a9f5b5125fd Mon Sep 17 00:00:00 2001 From: jcontesti <25779507+jcontesti@users.noreply.github.com> Date: Thu, 8 Mar 2018 00:17:34 +0100 Subject: [PATCH 5/8] Format of default value changed --- pandas/core/strings.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pandas/core/strings.py b/pandas/core/strings.py index 2295c7144a269..1c1c8296817f1 100644 --- a/pandas/core/strings.py +++ b/pandas/core/strings.py @@ -907,7 +907,7 @@ def str_findall(arr, pat, flags=0): ---------- pat : string Pattern or regular expression. - flags : int (default 0) + flags : int, default 0 `re` module flags, e.g. `re.IGNORECASE`. Returns From 62c6a5a71b73a7f29af7d0c761ac9a333964427d Mon Sep 17 00:00:00 2001 From: jcontesti <25779507+jcontesti@users.noreply.github.com> Date: Thu, 8 Mar 2018 00:21:33 +0100 Subject: [PATCH 6/8] re is now in double backtick quotes --- pandas/core/strings.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pandas/core/strings.py b/pandas/core/strings.py index 1c1c8296817f1..670b459d0cafb 100644 --- a/pandas/core/strings.py +++ b/pandas/core/strings.py @@ -908,7 +908,7 @@ def str_findall(arr, pat, flags=0): pat : string Pattern or regular expression. flags : int, default 0 - `re` module flags, e.g. `re.IGNORECASE`. + ``re`` module flags, e.g. `re.IGNORECASE`. Returns ------- From 31b7919f944b01bd9a23991b695eacc0d0a2936c Mon Sep 17 00:00:00 2001 From: Joris Van den Bossche Date: Sat, 10 Mar 2018 13:19:41 +0100 Subject: [PATCH 7/8] small edits --- pandas/core/strings.py | 13 +++++++------ 1 file changed, 7 insertions(+), 6 deletions(-) diff --git a/pandas/core/strings.py b/pandas/core/strings.py index 670b459d0cafb..b8e4c96d8f5e6 100644 --- a/pandas/core/strings.py +++ b/pandas/core/strings.py @@ -908,7 +908,8 @@ def str_findall(arr, pat, flags=0): pat : string Pattern or regular expression. flags : int, default 0 - ``re`` module flags, e.g. `re.IGNORECASE`. + ``re`` module flags, e.g. `re.IGNORECASE` (default is 0, which means + no flags). Returns ------- @@ -931,7 +932,7 @@ def str_findall(arr, pat, flags=0): >>> s = pd.Series(['Lion', 'Monkey', 'Rabbit']) - The search for the pattern `Monkey` returns one match: + The search for the pattern 'Monkey' returns one match: >>> s.str.findall('Monkey') 0 [] @@ -939,7 +940,7 @@ def str_findall(arr, pat, flags=0): 2 [] dtype: object - On the other hand, the search for the pattern `MONKEY` doesn't return any + On the other hand, the search for the pattern 'MONKEY' doesn't return any match: >>> s.str.findall('MONKEY') @@ -949,7 +950,7 @@ def str_findall(arr, pat, flags=0): dtype: object Flags can be added to the pattern or regular expression. For instance, - to find the pattern `MONKEY` ignoring the case: + to find the pattern 'MONKEY' ignoring the case: >>> import re >>> s.str.findall('MONKEY', flags=re.IGNORECASE) @@ -968,7 +969,7 @@ def str_findall(arr, pat, flags=0): dtype: object Regular expressions are supported too. For instance, the search for all the - strings ending with the word `on` is shown next: + strings ending with the word 'on' is shown next: >>> s.str.findall('on$') 0 [on] @@ -977,7 +978,7 @@ def str_findall(arr, pat, flags=0): dtype: object If the pattern is found more than once in the same string, then a list of - strings is returned: + multiple strings is returned: >>> s.str.findall('b') 0 [] From cd7223bd5b7ab50cff0ba018f104cbf0c2db6b91 Mon Sep 17 00:00:00 2001 From: Joris Van den Bossche Date: Sat, 10 Mar 2018 13:23:05 +0100 Subject: [PATCH 8/8] final edit --- pandas/core/strings.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/pandas/core/strings.py b/pandas/core/strings.py index b8e4c96d8f5e6..fac607f4621a8 100644 --- a/pandas/core/strings.py +++ b/pandas/core/strings.py @@ -924,8 +924,8 @@ def str_findall(arr, pat, flags=0): extractall : For each string in the Series, extract groups from all matches of regular expression and return a DataFrame with one row for each match and one column for each group. - re.findall: Return all non-overlapping matches of pattern or regular - expression in string, as a list of strings. + re.findall : The equivalent ``re`` function to all non-overlapping matches + of pattern or regular expression in string, as a list of strings. Examples --------