@@ -1019,10 +1019,14 @@ def _maybe_add_join_keys(
1019
1019
take_left , take_right = None , None
1020
1020
1021
1021
if name in result :
1022
- if left_indexer is not None and right_indexer is not None :
1022
+ if left_indexer is not None or right_indexer is not None :
1023
1023
if name in self .left :
1024
1024
if left_has_missing is None :
1025
- left_has_missing = (left_indexer == - 1 ).any ()
1025
+ left_has_missing = (
1026
+ False
1027
+ if left_indexer is None
1028
+ else (left_indexer == - 1 ).any ()
1029
+ )
1026
1030
1027
1031
if left_has_missing :
1028
1032
take_right = self .right_join_keys [i ]
@@ -1032,21 +1036,27 @@ def _maybe_add_join_keys(
1032
1036
1033
1037
elif name in self .right :
1034
1038
if right_has_missing is None :
1035
- right_has_missing = (right_indexer == - 1 ).any ()
1039
+ right_has_missing = (
1040
+ False
1041
+ if right_indexer is None
1042
+ else (right_indexer == - 1 ).any ()
1043
+ )
1036
1044
1037
1045
if right_has_missing :
1038
1046
take_left = self .left_join_keys [i ]
1039
1047
1040
1048
if result [name ].dtype != self .right [name ].dtype :
1041
1049
take_right = self .right [name ]._values
1042
1050
1043
- elif left_indexer is not None :
1051
+ else :
1044
1052
take_left = self .left_join_keys [i ]
1045
1053
take_right = self .right_join_keys [i ]
1046
1054
1047
1055
if take_left is not None or take_right is not None :
1048
1056
if take_left is None :
1049
1057
lvals = result [name ]._values
1058
+ elif left_indexer is None :
1059
+ lvals = take_left
1050
1060
else :
1051
1061
# TODO: can we pin down take_left's type earlier?
1052
1062
take_left = extract_array (take_left , extract_numpy = True )
@@ -1055,6 +1065,8 @@ def _maybe_add_join_keys(
1055
1065
1056
1066
if take_right is None :
1057
1067
rvals = result [name ]._values
1068
+ elif right_indexer is None :
1069
+ rvals = take_right
1058
1070
else :
1059
1071
# TODO: can we pin down take_right's type earlier?
1060
1072
taker = extract_array (take_right , extract_numpy = True )
@@ -1063,16 +1075,17 @@ def _maybe_add_join_keys(
1063
1075
1064
1076
# if we have an all missing left_indexer
1065
1077
# make sure to just use the right values or vice-versa
1066
- mask_left = left_indexer == - 1
1067
- # error: Item "bool" of "Union[Any, bool]" has no attribute "all"
1068
- if mask_left .all (): # type: ignore[union-attr]
1078
+ if left_indexer is not None and (left_indexer == - 1 ).all ():
1069
1079
key_col = Index (rvals )
1070
1080
result_dtype = rvals .dtype
1071
1081
elif right_indexer is not None and (right_indexer == - 1 ).all ():
1072
1082
key_col = Index (lvals )
1073
1083
result_dtype = lvals .dtype
1074
1084
else :
1075
- key_col = Index (lvals ).where (~ mask_left , rvals )
1085
+ key_col = Index (lvals )
1086
+ if left_indexer is not None :
1087
+ mask_left = left_indexer == - 1
1088
+ key_col = key_col .where (~ mask_left , rvals )
1076
1089
result_dtype = find_common_type ([lvals .dtype , rvals .dtype ])
1077
1090
if (
1078
1091
lvals .dtype .kind == "M"
@@ -1103,7 +1116,9 @@ def _maybe_add_join_keys(
1103
1116
else :
1104
1117
result .insert (i , name or f"key_{ i } " , key_col )
1105
1118
1106
- def _get_join_indexers (self ) -> tuple [npt .NDArray [np .intp ], npt .NDArray [np .intp ]]:
1119
+ def _get_join_indexers (
1120
+ self ,
1121
+ ) -> tuple [npt .NDArray [np .intp ] | None , npt .NDArray [np .intp ] | None ]:
1107
1122
"""return the join indexers"""
1108
1123
# make mypy happy
1109
1124
assert self .how != "asof"
@@ -1143,6 +1158,8 @@ def _get_join_info(
1143
1158
left_indexer ,
1144
1159
how = "right" ,
1145
1160
)
1161
+ elif right_indexer is None :
1162
+ join_index = right_ax .copy ()
1146
1163
else :
1147
1164
join_index = right_ax .take (right_indexer )
1148
1165
elif self .left_index :
@@ -1162,10 +1179,13 @@ def _get_join_info(
1162
1179
right_indexer ,
1163
1180
how = "left" ,
1164
1181
)
1182
+ elif left_indexer is None :
1183
+ join_index = left_ax .copy ()
1165
1184
else :
1166
1185
join_index = left_ax .take (left_indexer )
1167
1186
else :
1168
- join_index = default_index (len (left_indexer ))
1187
+ n = len (left_ax ) if left_indexer is None else len (left_indexer )
1188
+ join_index = default_index (n )
1169
1189
1170
1190
return join_index , left_indexer , right_indexer
1171
1191
@@ -1174,17 +1194,20 @@ def _create_join_index(
1174
1194
self ,
1175
1195
index : Index ,
1176
1196
other_index : Index ,
1177
- indexer : npt .NDArray [np .intp ],
1197
+ indexer : npt .NDArray [np .intp ] | None ,
1178
1198
how : JoinHow = "left" ,
1179
1199
) -> Index :
1180
1200
"""
1181
1201
Create a join index by rearranging one index to match another
1182
1202
1183
1203
Parameters
1184
1204
----------
1185
- index : Index being rearranged
1186
- other_index : Index used to supply values not found in index
1187
- indexer : np.ndarray[np.intp] how to rearrange index
1205
+ index : Index
1206
+ index being rearranged
1207
+ other_index : Index
1208
+ used to supply values not found in index
1209
+ indexer : np.ndarray[np.intp] or None
1210
+ how to rearrange index
1188
1211
how : str
1189
1212
Replacement is only necessary if indexer based on other_index.
1190
1213
@@ -1202,6 +1225,8 @@ def _create_join_index(
1202
1225
if np .any (mask ):
1203
1226
fill_value = na_value_for_dtype (index .dtype , compat = False )
1204
1227
index = index .append (Index ([fill_value ]))
1228
+ if indexer is None :
1229
+ return index .copy ()
1205
1230
return index .take (indexer )
1206
1231
1207
1232
@final
@@ -1660,7 +1685,7 @@ def get_join_indexers(
1660
1685
right_keys : list [ArrayLike ],
1661
1686
sort : bool = False ,
1662
1687
how : JoinHow = "inner" ,
1663
- ) -> tuple [npt .NDArray [np .intp ], npt .NDArray [np .intp ]]:
1688
+ ) -> tuple [npt .NDArray [np .intp ] | None , npt .NDArray [np .intp ] | None ]:
1664
1689
"""
1665
1690
1666
1691
Parameters
@@ -1672,9 +1697,9 @@ def get_join_indexers(
1672
1697
1673
1698
Returns
1674
1699
-------
1675
- np.ndarray[np.intp]
1700
+ np.ndarray[np.intp] or None
1676
1701
Indexer into the left_keys.
1677
- np.ndarray[np.intp]
1702
+ np.ndarray[np.intp] or None
1678
1703
Indexer into the right_keys.
1679
1704
"""
1680
1705
assert len (left_keys ) == len (
@@ -1695,37 +1720,77 @@ def get_join_indexers(
1695
1720
elif not sort and how in ["left" , "outer" ]:
1696
1721
return _get_no_sort_one_missing_indexer (left_n , False )
1697
1722
1698
- # get left & right join labels and num. of levels at each location
1699
- mapped = (
1700
- _factorize_keys (left_keys [n ], right_keys [n ], sort = sort )
1701
- for n in range (len (left_keys ))
1702
- )
1703
- zipped = zip (* mapped )
1704
- llab , rlab , shape = (list (x ) for x in zipped )
1723
+ lkey : ArrayLike
1724
+ rkey : ArrayLike
1725
+ if len (left_keys ) > 1 :
1726
+ # get left & right join labels and num. of levels at each location
1727
+ mapped = (
1728
+ _factorize_keys (left_keys [n ], right_keys [n ], sort = sort )
1729
+ for n in range (len (left_keys ))
1730
+ )
1731
+ zipped = zip (* mapped )
1732
+ llab , rlab , shape = (list (x ) for x in zipped )
1705
1733
1706
- # get flat i8 keys from label lists
1707
- lkey , rkey = _get_join_keys (llab , rlab , tuple (shape ), sort )
1734
+ # get flat i8 keys from label lists
1735
+ lkey , rkey = _get_join_keys (llab , rlab , tuple (shape ), sort )
1736
+ else :
1737
+ lkey = left_keys [0 ]
1738
+ rkey = right_keys [0 ]
1708
1739
1709
- # factorize keys to a dense i8 space
1710
- # `count` is the num. of unique keys
1711
- # set(lkey) | set(rkey) == range(count)
1740
+ left = Index (lkey )
1741
+ right = Index (rkey )
1712
1742
1713
- lkey , rkey , count = _factorize_keys (lkey , rkey , sort = sort )
1714
- # preserve left frame order if how == 'left' and sort == False
1715
- kwargs = {}
1716
- if how in ("inner" , "left" , "right" ):
1717
- kwargs ["sort" ] = sort
1718
- join_func = {
1719
- "inner" : libjoin .inner_join ,
1720
- "left" : libjoin .left_outer_join ,
1721
- "right" : lambda x , y , count , ** kwargs : libjoin .left_outer_join (
1722
- y , x , count , ** kwargs
1723
- )[::- 1 ],
1724
- "outer" : libjoin .full_outer_join ,
1725
- }[how ]
1726
-
1727
- # error: Cannot call function of unknown type
1728
- return join_func (lkey , rkey , count , ** kwargs ) # type: ignore[operator]
1743
+ if (
1744
+ left .is_monotonic_increasing
1745
+ and right .is_monotonic_increasing
1746
+ and (left .is_unique or right .is_unique )
1747
+ ):
1748
+ _ , lidx , ridx = left .join (right , how = how , return_indexers = True , sort = sort )
1749
+ else :
1750
+ lidx , ridx = get_join_indexers_non_unique (
1751
+ left ._values , right ._values , sort , how
1752
+ )
1753
+
1754
+ if lidx is not None and is_range_indexer (lidx , len (left )):
1755
+ lidx = None
1756
+ if ridx is not None and is_range_indexer (ridx , len (right )):
1757
+ ridx = None
1758
+ return lidx , ridx
1759
+
1760
+
1761
+ def get_join_indexers_non_unique (
1762
+ left : ArrayLike ,
1763
+ right : ArrayLike ,
1764
+ sort : bool = False ,
1765
+ how : JoinHow = "inner" ,
1766
+ ) -> tuple [npt .NDArray [np .intp ], npt .NDArray [np .intp ]]:
1767
+ """
1768
+ Get join indexers for left and right.
1769
+
1770
+ Parameters
1771
+ ----------
1772
+ left : ArrayLike
1773
+ right : ArrayLike
1774
+ sort : bool, default False
1775
+ how : {'inner', 'outer', 'left', 'right'}, default 'inner'
1776
+
1777
+ Returns
1778
+ -------
1779
+ np.ndarray[np.intp]
1780
+ Indexer into left.
1781
+ np.ndarray[np.intp]
1782
+ Indexer into right.
1783
+ """
1784
+ lkey , rkey , count = _factorize_keys (left , right , sort = sort )
1785
+ if how == "left" :
1786
+ lidx , ridx = libjoin .left_outer_join (lkey , rkey , count , sort = sort )
1787
+ elif how == "right" :
1788
+ ridx , lidx = libjoin .left_outer_join (rkey , lkey , count , sort = sort )
1789
+ elif how == "inner" :
1790
+ lidx , ridx = libjoin .inner_join (lkey , rkey , count , sort = sort )
1791
+ elif how == "outer" :
1792
+ lidx , ridx = libjoin .full_outer_join (lkey , rkey , count )
1793
+ return lidx , ridx
1729
1794
1730
1795
1731
1796
def restore_dropped_levels_multijoin (
@@ -1860,7 +1925,10 @@ def get_result(self, copy: bool | None = True) -> DataFrame:
1860
1925
left_indexer = cast ("npt.NDArray[np.intp]" , left_indexer )
1861
1926
right_indexer = cast ("npt.NDArray[np.intp]" , right_indexer )
1862
1927
left_join_indexer = libjoin .ffill_indexer (left_indexer )
1863
- right_join_indexer = libjoin .ffill_indexer (right_indexer )
1928
+ if right_indexer is None :
1929
+ right_join_indexer = None
1930
+ else :
1931
+ right_join_indexer = libjoin .ffill_indexer (right_indexer )
1864
1932
elif self .fill_method is None :
1865
1933
left_join_indexer = left_indexer
1866
1934
right_join_indexer = right_indexer
0 commit comments