Skip to content

POC Use fused types more, tempita less #22432

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Closed
wants to merge 5 commits into from
Closed
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
218 changes: 150 additions & 68 deletions pandas/_libs/algos_common_helper.pxi.in
Original file line number Diff line number Diff line change
@@ -19,33 +19,44 @@ WARNING: DO NOT edit .pxi FILE directly, .pxi is generated from .pxi.in
# 1-d template
#----------------------------------------------------------------------

{{py:
ctypedef fused algos_t:
float64_t
float32_t
object
int32_t
int64_t
uint64_t
uint8_t

# name, c_type, dtype, can_hold_na, nogil
dtypes = [('float64', 'float64_t', 'np.float64', True, True),
('float32', 'float32_t', 'np.float32', True, True),
('object', 'object', 'object', True, False),
('int32', 'int32_t', 'np.int32', False, True),
('int64', 'int64_t', 'np.int64', False, True),
('uint64', 'uint64_t', 'np.uint64', False, True),
('bool', 'uint8_t', 'np.bool', False, True)]

def get_dispatch(dtypes):
@cython.wraparound(False)
@cython.boundscheck(False)
def arrmap(ndarray[algos_t] index, object func):
cdef:
Py_ssize_t length = index.shape[0]
Py_ssize_t i = 0
ndarray[object] result = np.empty(length, dtype=np.object_)

for name, c_type, dtype, can_hold_na, nogil in dtypes:
from pandas._libs.lib import maybe_convert_objects

nogil_str = 'with nogil:' if nogil else ''
tab = ' ' if nogil else ''
yield name, c_type, dtype, can_hold_na, nogil_str, tab
}}
for i in range(length):
result[i] = func(index[i])

return maybe_convert_objects(result)

{{for name, c_type, dtype, can_hold_na, nogil_str, tab
in get_dispatch(dtypes)}}

arrmap_float64 = arrmap["float64_t"]
arrmap_float32 = arrmap["float32_t"]
arrmap_object = arrmap["object"]
arrmap_int32 = arrmap["int32_t"]
arrmap_int64 = arrmap["int64_t"]
arrmap_uint64 = arrmap["uint64_t"]
arrmap_bool = arrmap["uint8_t"]


@cython.wraparound(False)
@cython.boundscheck(False)
cpdef map_indices_{{name}}(ndarray[{{c_type}}] index):
cpdef map_indices(ndarray[algos_t] index):
"""
Produce a dict mapping the values of the input array to their respective
locations.
@@ -55,8 +66,9 @@ cpdef map_indices_{{name}}(ndarray[{{c_type}}] index):

Better to do this with Cython because of the enormous speed boost.
"""
cdef Py_ssize_t i, length
cdef dict result = {}
cdef:
Py_ssize_t i, length
dict result = {}

length = len(index)

@@ -66,13 +78,22 @@ cpdef map_indices_{{name}}(ndarray[{{c_type}}] index):
return result


map_indices_float64 = map_indices["float64_t"]
map_indices_float32 = map_indices["float32_t"]
map_indices_object = map_indices["object"]
map_indices_int32 = map_indices["int32_t"]
map_indices_int64 = map_indices["int64_t"]
map_indices_uint64 = map_indices["uint64_t"]
map_indices_bool = map_indices["uint8_t"]


@cython.boundscheck(False)
@cython.wraparound(False)
def pad_{{name}}(ndarray[{{c_type}}] old, ndarray[{{c_type}}] new, limit=None):
def pad(ndarray[algos_t] old, ndarray[algos_t] new, limit=None):
cdef:
Py_ssize_t i, j, nleft, nright
ndarray[int64_t, ndim=1] indexer
{{c_type}} cur, next
algos_t cur, next
int lim, fill_count = 0

nleft = len(old)
@@ -129,20 +150,28 @@ def pad_{{name}}(ndarray[{{c_type}}] old, ndarray[{{c_type}}] new, limit=None):

return indexer

pad_float64 = pad["float64_t"]
pad_float32 = pad["float32_t"]
pad_object = pad["object"]
pad_int32 = pad["int32_t"]
pad_int64 = pad["int64_t"]
pad_uint64 = pad["uint64_t"]
pad_bool = pad["uint8_t"]


@cython.boundscheck(False)
@cython.wraparound(False)
def pad_inplace_{{name}}(ndarray[{{c_type}}] values,
ndarray[uint8_t, cast=True] mask,
limit=None):
def pad_inplace(ndarray[algos_t] values,
ndarray[uint8_t, cast=True] mask,
limit=None):
cdef:
Py_ssize_t i, N
{{c_type}} val
int lim, fill_count = 0
cdef Py_ssize_t i, N
cdef algos_t val
cdef int lim, fill_count = 0

N = len(values)

# GH 2778
# GH#2778
if N == 0:
return

@@ -167,19 +196,28 @@ def pad_inplace_{{name}}(ndarray[{{c_type}}] values,
val = values[i]


pad_inplace_float64 = pad_inplace["float64_t"]
pad_inplace_float32 = pad_inplace["float32_t"]
pad_inplace_object = pad_inplace["object"]
pad_inplace_int32 = pad_inplace["int32_t"]
pad_inplace_int64 = pad_inplace["int64_t"]
pad_inplace_uint64 = pad_inplace["uint64_t"]
pad_inplace_bool = pad_inplace["uint8_t"]


@cython.boundscheck(False)
@cython.wraparound(False)
def pad_2d_inplace_{{name}}(ndarray[{{c_type}}, ndim=2] values,
ndarray[uint8_t, ndim=2] mask,
limit=None):
def pad_2d_inplace(ndarray[algos_t, ndim=2] values,
ndarray[uint8_t, ndim=2] mask,
limit=None):
cdef:
Py_ssize_t i, j, N, K
{{c_type}} val
algos_t val
int lim, fill_count = 0

K, N = (<object> values).shape

# GH 2778
# GH#2778
if N == 0:
return

@@ -205,6 +243,16 @@ def pad_2d_inplace_{{name}}(ndarray[{{c_type}}, ndim=2] values,
fill_count = 0
val = values[j, i]


pad_2d_inplace_float64 = pad_2d_inplace["float64_t"]
pad_2d_inplace_float32 = pad_2d_inplace["float32_t"]
pad_2d_inplace_object = pad_2d_inplace["object"]
pad_2d_inplace_int32 = pad_2d_inplace["int32_t"]
pad_2d_inplace_int64 = pad_2d_inplace["int64_t"]
pad_2d_inplace_uint64 = pad_2d_inplace["uint64_t"]
pad_2d_inplace_bool = pad_2d_inplace["uint8_t"]


"""
Backfilling logic for generating fill vector

@@ -233,13 +281,12 @@ D

@cython.boundscheck(False)
@cython.wraparound(False)
def backfill_{{name}}(ndarray[{{c_type}}] old, ndarray[{{c_type}}] new,
limit=None):
def backfill(ndarray[algos_t] old, ndarray[algos_t] new,limit=None):
cdef:
Py_ssize_t i, j, nleft, nright
ndarray[int64_t, ndim=1] indexer
{{c_type}} cur, prev
int lim, fill_count = 0
cdef Py_ssize_t i, j, nleft, nright
cdef ndarray[int64_t, ndim=1] indexer
cdef algos_t cur, prev
cdef int lim, fill_count = 0

nleft = len(old)
nright = len(new)
@@ -297,19 +344,28 @@ def backfill_{{name}}(ndarray[{{c_type}}] old, ndarray[{{c_type}}] new,
return indexer


backfill_float64 = backfill["float64_t"]
backfill_float32 = backfill["float32_t"]
backfill_object = backfill["object"]
backfill_int32 = backfill["int32_t"]
backfill_int64 = backfill["int64_t"]
backfill_uint64 = backfill["uint64_t"]
backfill_bool = backfill["uint8_t"]


@cython.boundscheck(False)
@cython.wraparound(False)
def backfill_inplace_{{name}}(ndarray[{{c_type}}] values,
ndarray[uint8_t, cast=True] mask,
limit=None):
def backfill_inplace(ndarray[algos_t] values,
ndarray[uint8_t, cast=True] mask,
limit=None):
cdef:
Py_ssize_t i, N
{{c_type}} val
int lim, fill_count = 0
cdef Py_ssize_t i, N
cdef algos_t val
cdef int lim, fill_count = 0

N = len(values)

# GH 2778
# GH#2778
if N == 0:
return

@@ -334,19 +390,28 @@ def backfill_inplace_{{name}}(ndarray[{{c_type}}] values,
val = values[i]


backfill_inplace_float64 = backfill_inplace["float64_t"]
backfill_inplace_float32 = backfill_inplace["float32_t"]
backfill_inplace_object = backfill_inplace["object"]
backfill_inplace_int32 = backfill_inplace["int32_t"]
backfill_inplace_int64 = backfill_inplace["int64_t"]
backfill_inplace_uint64 = backfill_inplace["uint64_t"]
backfill_inplace_bool = backfill_inplace["uint8_t"]


@cython.boundscheck(False)
@cython.wraparound(False)
def backfill_2d_inplace_{{name}}(ndarray[{{c_type}}, ndim=2] values,
ndarray[uint8_t, ndim=2] mask,
limit=None):
def backfill_2d_inplace(ndarray[algos_t, ndim=2] values,
ndarray[uint8_t, ndim=2] mask,
limit=None):
cdef:
Py_ssize_t i, j, N, K
{{c_type}} val
algos_t val
int lim, fill_count = 0

K, N = (<object> values).shape

# GH 2778
# GH#2778
if N == 0:
return

@@ -373,6 +438,39 @@ def backfill_2d_inplace_{{name}}(ndarray[{{c_type}}, ndim=2] values,
val = values[j, i]


backfill_2d_inplace_float64 = backfill_2d_inplace["float64_t"]
backfill_2d_inplace_float32 = backfill_2d_inplace["float32_t"]
backfill_2d_inplace_object = backfill_2d_inplace["object"]
backfill_2d_inplace_int32 = backfill_2d_inplace["int32_t"]
backfill_2d_inplace_int64 = backfill_2d_inplace["int64_t"]
backfill_2d_inplace_uint64 = backfill_2d_inplace["uint64_t"]
backfill_2d_inplace_bool = backfill_2d_inplace["uint8_t"]


{{py:

# name, c_type, dtype, can_hold_na, nogil
dtypes = [('float64', 'float64_t', 'np.float64', True, True),
('float32', 'float32_t', 'np.float32', True, True),
('object', 'object', 'object', True, False),
('int32', 'int32_t', 'np.int32', False, True),
('int64', 'int64_t', 'np.int64', False, True),
('uint64', 'uint64_t', 'np.uint64', False, True),
('bool', 'uint8_t', 'np.bool', False, True)]

def get_dispatch(dtypes):

for name, c_type, dtype, can_hold_na, nogil in dtypes:

nogil_str = 'with nogil:' if nogil else ''
tab = ' ' if nogil else ''
yield name, c_type, dtype, can_hold_na, nogil_str, tab
}}

{{for name, c_type, dtype, can_hold_na, nogil_str, tab
in get_dispatch(dtypes)}}


@cython.boundscheck(False)
@cython.wraparound(False)
def is_monotonic_{{name}}(ndarray[{{c_type}}] arr, bint timelike):
@@ -429,22 +527,6 @@ def is_monotonic_{{name}}(ndarray[{{c_type}}] arr, bint timelike):
return is_monotonic_inc, is_monotonic_dec, \
is_unique and (is_monotonic_inc or is_monotonic_dec)


@cython.wraparound(False)
@cython.boundscheck(False)
def arrmap_{{name}}(ndarray[{{c_type}}] index, object func):
cdef:
Py_ssize_t length = index.shape[0]
Py_ssize_t i = 0
ndarray[object] result = np.empty(length, dtype=np.object_)

from pandas._libs.lib import maybe_convert_objects

for i in range(length):
result[i] = func(index[i])

return maybe_convert_objects(result)

{{endfor}}

#----------------------------------------------------------------------
31 changes: 18 additions & 13 deletions pandas/_libs/algos_take_helper.pxi.in
Original file line number Diff line number Diff line change
@@ -264,29 +264,34 @@ def take_2d_multi_{{name}}_{{dest}}(ndarray[{{c_type_in}}, ndim=2] values,
# take_2d internal function
#----------------------------------------------------------------------

{{py:

# dtype, ctype, init_result
dtypes = [('float64', 'float64_t', 'np.empty_like(values)'),
('uint64', 'uint64_t', 'np.empty_like(values)'),
('object', 'object', 'values.copy()'),
('int64', 'int64_t', 'np.empty_like(values)')]
}}
ctypedef fused take_t:
float64_t
uint64_t
object
int64_t

{{for dtype, ctype, init_result in dtypes}}

cdef _take_2d_{{dtype}}(ndarray[{{ctype}}, ndim=2] values, object idx):
cdef _take_2d(ndarray[take_t, ndim=2] values, object idx):
cdef:
Py_ssize_t i, j, N, K
ndarray[Py_ssize_t, ndim=2, cast=True] indexer = idx
ndarray[{{ctype}}, ndim=2] result
ndarray[take_t, ndim=2] result
object val

N, K = (<object> values).shape
result = {{init_result}}
if take_t is object:
result = values.copy()
else:
result = np.empty_like(values)

for i in range(N):
for j in range(K):
result[i, j] = values[i, indexer[i, j]]
return result

{{endfor}}

# TODO: Are these treated as cdefs?
_take_2d_float64 = _take_2d[float64_t]
_take_2d_uint64 = _take_2d[uint64_t]
_take_2d_object = _take_2d[object]
_take_2d_int64 = _take_2d[int64_t]
190 changes: 112 additions & 78 deletions pandas/_libs/groupby_helper.pxi.in
Original file line number Diff line number Diff line change
@@ -593,37 +593,26 @@ def group_rank_{{name}}(ndarray[float64_t, ndim=2] out,
# group_min, group_max
#----------------------------------------------------------------------

{{py:

# name, c_type, dest_type2, nan_val
dtypes = [('float64', 'float64_t', 'NAN', 'np.inf'),
('float32', 'float32_t', 'NAN', 'np.inf'),
('int64', 'int64_t', 'iNaT', '_int64_max')]

def get_dispatch(dtypes):

for name, dest_type2, nan_val, inf_val in dtypes:
yield name, dest_type2, nan_val, inf_val
}}


{{for name, dest_type2, nan_val, inf_val in get_dispatch(dtypes)}}
ctypedef fused group_t:
float64_t
float32_t
int64_t


@cython.wraparound(False)
@cython.boundscheck(False)
def group_max_{{name}}(ndarray[{{dest_type2}}, ndim=2] out,
ndarray[int64_t] counts,
ndarray[{{dest_type2}}, ndim=2] values,
ndarray[int64_t] labels,
Py_ssize_t min_count=-1):
def group_max(ndarray[group_t, ndim=2] out,
ndarray[int64_t] counts,
ndarray[group_t, ndim=2] values,
ndarray[int64_t] labels,
Py_ssize_t min_count=-1):
"""
Only aggregates on axis=0
"""
cdef:
Py_ssize_t i, j, N, K, lab, ncounts = len(counts)
{{dest_type2}} val, count
ndarray[{{dest_type2}}, ndim=2] maxx, nobs
group_t val, count
ndarray[group_t, ndim=2] maxx, nobs

assert min_count == -1, "'min_count' only used in add and prod"

@@ -633,7 +622,12 @@ def group_max_{{name}}(ndarray[{{dest_type2}}, ndim=2] out,
nobs = np.zeros_like(out)

maxx = np.empty_like(out)
maxx.fill(-{{inf_val}})

if group_t is int64_t:
# evaluated at compile-time
maxx.fill(-_int64_max)
else:
maxx.fill(-np.inf)

N, K = (<object> values).shape

@@ -648,37 +642,43 @@ def group_max_{{name}}(ndarray[{{dest_type2}}, ndim=2] out,
val = values[i, j]

# not nan
{{if name == 'int64'}}
if val != {{nan_val}}:
{{else}}
if val == val and val != {{nan_val}}:
{{endif}}
if ((group_t is int64_t and val != iNaT) or
(group_t is not int64_t and
val == val and val != NAN)):
nobs[lab, j] += 1
if val > maxx[lab, j]:
maxx[lab, j] = val

for i in range(ncounts):
for j in range(K):
if nobs[i, j] == 0:
out[i, j] = {{nan_val}}
if group_t is int64_t:
out[i, j] = iNaT
else:
out[i, j] = NAN
else:
out[i, j] = maxx[i, j]


group_max_float64 = group_max["float64_t"]
group_max_float32 = group_max["float32_t"]
group_max_int64 = group_max["int64_t"]


@cython.wraparound(False)
@cython.boundscheck(False)
def group_min_{{name}}(ndarray[{{dest_type2}}, ndim=2] out,
ndarray[int64_t] counts,
ndarray[{{dest_type2}}, ndim=2] values,
ndarray[int64_t] labels,
Py_ssize_t min_count=-1):
def group_min(ndarray[group_t, ndim=2] out,
ndarray[int64_t] counts,
ndarray[group_t, ndim=2] values,
ndarray[int64_t] labels,
Py_ssize_t min_count=-1):
"""
Only aggregates on axis=0
"""
cdef:
Py_ssize_t i, j, N, K, lab, ncounts = len(counts)
{{dest_type2}} val, count
ndarray[{{dest_type2}}, ndim=2] minx, nobs
group_t val, count
ndarray[group_t, ndim=2] minx, nobs

assert min_count == -1, "'min_count' only used in add and prod"

@@ -688,7 +688,12 @@ def group_min_{{name}}(ndarray[{{dest_type2}}, ndim=2] out,
nobs = np.zeros_like(out)

minx = np.empty_like(out)
minx.fill({{inf_val}})

if group_t is int64_t:
# evaluated at compile-time
minx.fill(_int64_max)
else:
minx.fill(np.inf)

N, K = (<object> values).shape

@@ -703,41 +708,52 @@ def group_min_{{name}}(ndarray[{{dest_type2}}, ndim=2] out,
val = values[i, j]

# not nan
{{if name == 'int64'}}
if val != {{nan_val}}:
{{else}}
if val == val and val != {{nan_val}}:
{{endif}}
if ((group_t is int64_t and val != iNaT) or
(group_t is not int64_t and
val == val and val != NAN)):
nobs[lab, j] += 1
if val < minx[lab, j]:
minx[lab, j] = val

for i in range(ncounts):
for j in range(K):
if nobs[i, j] == 0:
out[i, j] = {{nan_val}}
if group_t is int64_t:
out[i, j] = iNaT
else:
out[i, j] = NAN
else:
out[i, j] = minx[i, j]


group_min_float64 = group_min["float64_t"]
group_min_float32 = group_min["float32_t"]
group_min_int64 = group_min["int64_t"]


@cython.boundscheck(False)
@cython.wraparound(False)
def group_cummin_{{name}}(ndarray[{{dest_type2}}, ndim=2] out,
ndarray[{{dest_type2}}, ndim=2] values,
ndarray[int64_t] labels,
bint is_datetimelike):
def group_cummin(ndarray[group_t, ndim=2] out,
ndarray[group_t, ndim=2] values,
ndarray[int64_t] labels,
bint is_datetimelike):
"""
Only transforms on axis=0
"""
cdef:
Py_ssize_t i, j, N, K, size
{{dest_type2}} val, mval
ndarray[{{dest_type2}}, ndim=2] accum
group_t val, mval
ndarray[group_t, ndim=2] accum
int64_t lab

N, K = (<object> values).shape
accum = np.empty_like(values)
accum.fill({{inf_val}})

if group_t is int64_t:
# evaluated at compile-time
accum.fill(_int64_max)
else:
accum.fill(np.inf)

with nogil:
for i in range(N):
@@ -749,37 +765,50 @@ def group_cummin_{{name}}(ndarray[{{dest_type2}}, ndim=2] out,
val = values[i, j]

# val = nan
{{if name == 'int64'}}
if is_datetimelike and val == {{nan_val}}:
out[i, j] = {{nan_val}}
if group_t is int64_t:
# evaluated at compile-time
if is_datetimelike and val == iNaT:
out[i, j] = iNaT
continue

else:
{{else}}
if val == val:
{{endif}}
mval = accum[lab, j]
if val < mval:
accum[lab, j] = mval = val
out[i, j] = mval
if val != val:
continue

mval = accum[lab, j]
if val < mval:
accum[lab, j] = mval = val
out[i, j] = mval


group_cummin_float64 = group_cummin["float64_t"]
group_cummin_float32 = group_cummin["float32_t"]
group_cummin_int64 = group_cummin["int64_t"]


@cython.boundscheck(False)
@cython.wraparound(False)
def group_cummax_{{name}}(ndarray[{{dest_type2}}, ndim=2] out,
ndarray[{{dest_type2}}, ndim=2] values,
ndarray[int64_t] labels,
bint is_datetimelike):
def group_cummax(ndarray[group_t, ndim=2] out,
ndarray[group_t, ndim=2] values,
ndarray[int64_t] labels,
bint is_datetimelike):
"""
Only transforms on axis=0
"""
cdef:
Py_ssize_t i, j, N, K, size
{{dest_type2}} val, mval
ndarray[{{dest_type2}}, ndim=2] accum
group_t val, mval
ndarray[group_t, ndim=2] accum
int64_t lab

N, K = (<object> values).shape
accum = np.empty_like(values)
accum.fill(-{{inf_val}})

if group_t is int64_t:
# evaluated at compile-time
accum.fill(-_int64_max)
else:
accum.fill(-np.inf)

with nogil:
for i in range(N):
@@ -790,16 +819,21 @@ def group_cummax_{{name}}(ndarray[{{dest_type2}}, ndim=2] out,
for j in range(K):
val = values[i, j]

{{if name == 'int64'}}
if is_datetimelike and val == {{nan_val}}:
out[i, j] = {{nan_val}}
if group_t is int64_t:
# evaluated at compile-time
if is_datetimelike and val == iNaT:
out[i, j] = iNaT
continue
else:
{{else}}
if val == val:
{{endif}}
mval = accum[lab, j]
if val > mval:
accum[lab, j] = mval = val
out[i, j] = mval
if val != val:
continue

{{endfor}}
mval = accum[lab, j]
if val > mval:
accum[lab, j] = mval = val
out[i, j] = mval


group_cummax_float64 = group_cummax["float64_t"]
group_cummax_float32 = group_cummax["float32_t"]
group_cummax_int64 = group_cummax["int64_t"]
61 changes: 40 additions & 21 deletions pandas/_libs/join_helper.pxi.in
Original file line number Diff line number Diff line change
@@ -8,24 +8,13 @@ WARNING: DO NOT edit .pxi FILE directly, .pxi is generated from .pxi.in
# left_join_indexer, inner_join_indexer, outer_join_indexer
#----------------------------------------------------------------------

{{py:

# name, c_type, dtype
dtypes = [('float64', 'float64_t', 'np.float64'),
('float32', 'float32_t', 'np.float32'),
('object', 'object', 'object'),
('int32', 'int32_t', 'np.int32'),
('int64', 'int64_t', 'np.int64'),
('uint64', 'uint64_t', 'np.uint64')]

def get_dispatch(dtypes):

for name, c_type, dtype in dtypes:
yield name, c_type, dtype

}}

{{for name, c_type, dtype in get_dispatch(dtypes)}}
ctypedef fused join_t:
float64_t
float32_t
object
int32_t
int64_t
uint64_t

# Joins on ordered, unique indices

@@ -34,12 +23,11 @@ def get_dispatch(dtypes):

@cython.wraparound(False)
@cython.boundscheck(False)
def left_join_indexer_unique_{{name}}(ndarray[{{c_type}}] left,
ndarray[{{c_type}}] right):
def left_join_indexer_unique(ndarray[join_t] left, ndarray[join_t] right):
cdef:
Py_ssize_t i, j, nleft, nright
ndarray[int64_t] indexer
{{c_type}} lval, rval
join_t lval, rval

i = 0
j = 0
@@ -78,6 +66,37 @@ def left_join_indexer_unique_{{name}}(ndarray[{{c_type}}] left,
return indexer


left_join_indexer_unique_float64 = left_join_indexer_unique["float64_t"]
left_join_indexer_unique_float32 = left_join_indexer_unique["float32_t"]
left_join_indexer_unique_object = left_join_indexer_unique["object"]
left_join_indexer_unique_int32 = left_join_indexer_unique["int32_t"]
left_join_indexer_unique_int64 = left_join_indexer_unique["int64_t"]
left_join_indexer_unique_uint64 = left_join_indexer_unique["uint64_t"]

{{py:

# name, c_type, dtype
dtypes = [('float64', 'float64_t', 'np.float64'),
('float32', 'float32_t', 'np.float32'),
('object', 'object', 'object'),
('int32', 'int32_t', 'np.int32'),
('int64', 'int64_t', 'np.int64'),
('uint64', 'uint64_t', 'np.uint64')]

def get_dispatch(dtypes):

for name, c_type, dtype in dtypes:
yield name, c_type, dtype

}}

{{for name, c_type, dtype in get_dispatch(dtypes)}}

# Joins on ordered, unique indices

# right might contain non-unique values


# @cython.wraparound(False)
# @cython.boundscheck(False)
def left_join_indexer_{{name}}(ndarray[{{c_type}}] left,
85 changes: 50 additions & 35 deletions pandas/_libs/reshape_helper.pxi.in
Original file line number Diff line number Diff line change
@@ -8,34 +8,28 @@ WARNING: DO NOT edit .pxi FILE directly, .pxi is generated from .pxi.in
# reshape
# ----------------------------------------------------------------------

{{py:

# name, c_type
dtypes = [('uint8', 'uint8_t'),
('uint16', 'uint16_t'),
('uint32', 'uint32_t'),
('uint64', 'uint64_t'),
('int8', 'int8_t'),
('int16', 'int16_t'),
('int32', 'int32_t'),
('int64', 'int64_t'),
('float32', 'float32_t'),
('float64', 'float64_t'),
('object', 'object')]
}}

{{for dtype, c_type in dtypes}}

ctypedef fused reshape_t:
uint8_t
uint16_t
uint32_t
uint64_t
int8_t
int16_t
int32_t
int64_t
float32_t
float64_t
object

@cython.wraparound(False)
@cython.boundscheck(False)
def unstack_{{dtype}}(ndarray[{{c_type}}, ndim=2] values,
ndarray[uint8_t, ndim=1] mask,
Py_ssize_t stride,
Py_ssize_t length,
Py_ssize_t width,
ndarray[{{c_type}}, ndim=2] new_values,
ndarray[uint8_t, ndim=2] new_mask):
def unstack(ndarray[reshape_t, ndim=2] values,
ndarray[uint8_t, ndim=1] mask,
Py_ssize_t stride,
Py_ssize_t length,
Py_ssize_t width,
ndarray[reshape_t, ndim=2] new_values,
ndarray[uint8_t, ndim=2] new_mask):
"""
transform long sorted_values to wide new_values

@@ -50,23 +44,33 @@ def unstack_{{dtype}}(ndarray[{{c_type}}, ndim=2] values,
result array
new_mask : boolean ndarray
result mask

"""

cdef:
Py_ssize_t i, j, w, nulls, s, offset

{{if dtype == 'object'}}
if True:
{{else}}
with nogil:
{{endif}}
if reshape_t is not object:
with nogil:
for i in range(stride):
nulls = 0

for i in range(stride):
for j in range(length):
for w in range(width):

offset = j * width + w

if mask[offset]:
s = i * width + w
new_values[j, s] = values[offset - nulls, i]
new_mask[j, s] = 1
else:
nulls += 1

else:
# identical to above version, but "with nogil" is not available
for i in range(stride):
nulls = 0
for j in range(length):

for j in range(length):
for w in range(width):

offset = j * width + w
@@ -78,4 +82,15 @@ def unstack_{{dtype}}(ndarray[{{c_type}}, ndim=2] values,
else:
nulls += 1

{{endfor}}

unstack_uint8 = unstack["uint8_t"]
unstack_uint16 = unstack["uint16_t"]
unstack_uint32 = unstack["uint32_t"]
unstack_uint64 = unstack["uint64_t"]
unstack_int8 = unstack["int8_t"]
unstack_int16 = unstack["int16_t"]
unstack_int32 = unstack["int32_t"]
unstack_int64 = unstack["int64_t"]
unstack_float32 = unstack["float32_t"]
unstack_float64 = unstack["float64_t"]
unstack_object = unstack["object"]