Skip to content

STY: some files in pandas/_libs/ #30156

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 1 commit into from
Dec 9, 2019
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
4 changes: 0 additions & 4 deletions pandas/_libs/groupby.pyx
Original file line number Diff line number Diff line change
Expand Up @@ -175,7 +175,6 @@ def group_cumprod_float64(float64_t[:, :] out,
-----
This method modifies the `out` parameter, rather than returning an object.
"""

cdef:
Py_ssize_t i, j, N, K, size
float64_t val
Expand Down Expand Up @@ -233,7 +232,6 @@ def group_cumsum(numeric[:, :] out,
-----
This method modifies the `out` parameter, rather than returning an object.
"""

cdef:
Py_ssize_t i, j, N, K, size
numeric val
Expand Down Expand Up @@ -1404,7 +1402,6 @@ def group_cummin(groupby_t[:, :] out,
-----
This method modifies the `out` parameter, rather than returning an object.
"""

cdef:
Py_ssize_t i, j, N, K, size
groupby_t val, mval
Expand Down Expand Up @@ -1465,7 +1462,6 @@ def group_cummax(groupby_t[:, :] out,
-----
This method modifies the `out` parameter, rather than returning an object.
"""

cdef:
Py_ssize_t i, j, N, K, size
groupby_t val, mval
Expand Down
17 changes: 10 additions & 7 deletions pandas/_libs/hashing.pyx
Original file line number Diff line number Diff line change
Expand Up @@ -25,13 +25,17 @@ def hash_object_array(object[:] arr, object key, object encoding='utf8'):

Returns
-------
1-d uint64 ndarray of hashes
1-d uint64 ndarray of hashes.

Raises
------
TypeError
If the array contains mixed types.

Notes
-----
allowed values must be strings, or nulls
mixed array types will raise TypeError

Allowed values must be strings, or nulls
mixed array types will raise TypeError.
"""
cdef:
Py_ssize_t i, l, n
Expand All @@ -47,7 +51,7 @@ def hash_object_array(object[:] arr, object key, object encoding='utf8'):
k = <bytes>key.encode(encoding)
kb = <uint8_t *>k
if len(k) != 16:
raise ValueError(f"key should be a 16-byte string encoded, "
raise ValueError("key should be a 16-byte string encoded, "
f"got {k} (len {len(k)})")

n = len(arr)
Expand All @@ -68,8 +72,7 @@ def hash_object_array(object[:] arr, object key, object encoding='utf8'):

else:
raise TypeError(f"{val} of type {type(val)} is not a valid type "
f"for hashing, must be string or null"
)
"for hashing, must be string or null")

l = len(data)
lens[i] = l
Expand Down
2 changes: 1 addition & 1 deletion pandas/_libs/hashtable.pyx
Original file line number Diff line number Diff line change
Expand Up @@ -142,7 +142,7 @@ cdef class Int64Factorizer:
@cython.boundscheck(False)
def unique_label_indices(const int64_t[:] labels):
"""
indices of the first occurrences of the unique labels
Indices of the first occurrences of the unique labels
*excluding* -1. equivalent to:
np.unique(labels, return_index=True)[1]
"""
Expand Down
43 changes: 23 additions & 20 deletions pandas/_libs/index.pyx
Original file line number Diff line number Diff line change
Expand Up @@ -52,7 +52,7 @@ cpdef get_value_at(ndarray arr, object loc, object tz=None):


# Don't populate hash tables in monotonic indexes larger than this
_SIZE_CUTOFF = 1000000
_SIZE_CUTOFF = 1_000_000
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I would be OK with this in other places as well if you have a way to easily identify; more readable for sure



cdef class IndexEngine:
Expand All @@ -79,6 +79,8 @@ cdef class IndexEngine:

cpdef get_value(self, ndarray arr, object key, object tz=None):
"""
Parameters
----------
arr : 1-dimensional ndarray
"""
cdef:
Expand All @@ -93,6 +95,8 @@ cdef class IndexEngine:

cpdef set_value(self, ndarray arr, object key, object value):
"""
Parameters
----------
arr : 1-dimensional ndarray
"""
cdef:
Expand Down Expand Up @@ -283,11 +287,12 @@ cdef class IndexEngine:
return self.mapping.lookup(values)

def get_indexer_non_unique(self, targets):
""" return an indexer suitable for takng from a non unique index
return the labels in the same order ast the target
and a missing indexer into the targets (which correspond
to the -1 indices in the results """

"""
Return an indexer suitable for takng from a non unique index
return the labels in the same order ast the target
and a missing indexer into the targets (which correspond
to the -1 indices in the results
"""
cdef:
ndarray values, x
ndarray[int64_t] result, missing
Expand All @@ -302,8 +307,8 @@ cdef class IndexEngine:
stargets = set(targets)
n = len(values)
n_t = len(targets)
if n > 10000:
n_alloc = 10000
if n > 10_000:
n_alloc = 10_000
else:
n_alloc = n

Expand Down Expand Up @@ -345,7 +350,7 @@ cdef class IndexEngine:

# realloc if needed
if count >= n_alloc:
n_alloc += 10000
n_alloc += 10_000
result = np.resize(result, n_alloc)

result[count] = j
Expand All @@ -355,7 +360,7 @@ cdef class IndexEngine:
else:

if count >= n_alloc:
n_alloc += 10000
n_alloc += 10_000
result = np.resize(result, n_alloc)
result[count] = -1
count += 1
Expand Down Expand Up @@ -393,7 +398,7 @@ cdef Py_ssize_t _bin_search(ndarray values, object val) except -1:

cdef class ObjectEngine(IndexEngine):
"""
Index Engine for use with object-dtype Index, namely the base class Index
Index Engine for use with object-dtype Index, namely the base class Index.
"""
cdef _make_hash_table(self, Py_ssize_t n):
return _hash.PyObjectHashTable(n)
Expand Down Expand Up @@ -560,7 +565,7 @@ cpdef convert_scalar(ndarray arr, object value):
pass
elif value is None or value != value:
return np.datetime64("NaT", "ns")
raise ValueError(f"cannot set a Timestamp with a non-timestamp "
raise ValueError("cannot set a Timestamp with a non-timestamp "
f"{type(value).__name__}")

elif arr.descr.type_num == NPY_TIMEDELTA:
Expand All @@ -577,17 +582,17 @@ cpdef convert_scalar(ndarray arr, object value):
pass
elif value is None or value != value:
return np.timedelta64("NaT", "ns")
raise ValueError(f"cannot set a Timedelta with a non-timedelta "
raise ValueError("cannot set a Timedelta with a non-timedelta "
f"{type(value).__name__}")

if (issubclass(arr.dtype.type, (np.integer, np.floating, np.complex)) and
not issubclass(arr.dtype.type, np.bool_)):
if util.is_bool_object(value):
raise ValueError('Cannot assign bool to float/integer series')
raise ValueError("Cannot assign bool to float/integer series")

if issubclass(arr.dtype.type, (np.integer, np.bool_)):
if util.is_float_object(value) and value != value:
raise ValueError('Cannot assign nan to integer series')
raise ValueError("Cannot assign nan to integer series")

return value

Expand Down Expand Up @@ -625,13 +630,12 @@ cdef class BaseMultiIndexCodesEngine:
Parameters
----------
levels : list-like of numpy arrays
Levels of the MultiIndex
Levels of the MultiIndex.
labels : list-like of numpy arrays of integer dtype
Labels of the MultiIndex
Labels of the MultiIndex.
offsets : numpy array of uint64 dtype
Pre-calculated offsets, one for each level of the index
Pre-calculated offsets, one for each level of the index.
"""

self.levels = levels
self.offsets = offsets

Expand Down Expand Up @@ -664,7 +668,6 @@ cdef class BaseMultiIndexCodesEngine:
int_keys : 1-dimensional array of dtype uint64 or object
Integers representing one combination each
"""

level_codes = [lev.get_indexer(codes) + 1 for lev, codes
in zip(self.levels, zip(*target))]
return self._codes_to_ints(np.array(level_codes, dtype='uint64').T)
Expand Down
5 changes: 2 additions & 3 deletions pandas/_libs/indexing.pyx
Original file line number Diff line number Diff line change
Expand Up @@ -18,7 +18,6 @@ cdef class _NDFrameIndexerBase:
if ndim is None:
ndim = self._ndim = self.obj.ndim
if ndim > 2:
msg = ("NDFrameIndexer does not support NDFrame objects with"
" ndim > 2")
raise ValueError(msg)
raise ValueError("NDFrameIndexer does not support "
"NDFrame objects with ndim > 2")
return ndim