@@ -1775,6 +1775,129 @@ def to_numpy(
1775
1775
1776
1776
return result
1777
1777
1778
+ def _to_dict_helper (self , orient , into_c , into ):
1779
+ """Helper function to do main work to convert frame into dict based on
1780
+ `orient` and `into`
1781
+
1782
+ As part of GH46470 also takes care in when to use maybe_box_native as this
1783
+ function can perform badly and is not necessary for non object cols
1784
+ """
1785
+ object_dtype_cols = {
1786
+ col for col , dtype in self .dtypes .items () if is_object_dtype (dtype )
1787
+ }
1788
+ if orient == "dict" :
1789
+ return into_c ((k , v .to_dict (into )) for k , v in self .items ())
1790
+ elif orient == "list" :
1791
+ return into_c (
1792
+ (
1793
+ k ,
1794
+ list (map (maybe_box_native , v .tolist ()))
1795
+ if k in object_dtype_cols
1796
+ else v .tolist (),
1797
+ )
1798
+ for k , v in self .items ()
1799
+ )
1800
+ elif orient == "split" :
1801
+ if object_dtype_cols :
1802
+ is_object_dtype_by_index = [
1803
+ col in object_dtype_cols for col in self .columns
1804
+ ]
1805
+ data = [
1806
+ [
1807
+ maybe_box_native (v ) if is_object_dtype_by_index [i ] else v
1808
+ for i , v in enumerate (t )
1809
+ ]
1810
+ for t in self .itertuples (index = False , name = None )
1811
+ ]
1812
+ else :
1813
+ data = [list (t ) for t in self .itertuples (index = False , name = None )]
1814
+ return into_c (
1815
+ (
1816
+ ("index" , self .index .tolist ()),
1817
+ ("columns" , self .columns .tolist ()),
1818
+ ("data" , data ),
1819
+ )
1820
+ )
1821
+ elif orient == "series" :
1822
+ return into_c ((k , v ) for k , v in self .items ())
1823
+ elif orient == "records" :
1824
+ columns = self .columns .tolist ()
1825
+ if object_dtype_cols :
1826
+ is_object_dtype_by_index = [col in object_dtype_cols for col in columns ]
1827
+ return [
1828
+ into_c (
1829
+ zip (
1830
+ columns ,
1831
+ [
1832
+ maybe_box_native (v )
1833
+ if is_object_dtype_by_index [i ]
1834
+ else v
1835
+ for i , v in enumerate (t )
1836
+ ],
1837
+ )
1838
+ )
1839
+ for t in self .itertuples (index = False , name = None )
1840
+ ]
1841
+ else :
1842
+ return [
1843
+ into_c (zip (columns , t ))
1844
+ for t in self .itertuples (index = False , name = None )
1845
+ ]
1846
+ elif orient == "index" :
1847
+ if not self .index .is_unique :
1848
+ raise ValueError ("DataFrame index must be unique for orient='index'." )
1849
+ if object_dtype_cols :
1850
+ is_object_dtype_by_index = [
1851
+ col in object_dtype_cols for col in self .columns
1852
+ ]
1853
+ return into_c (
1854
+ (
1855
+ t [0 ],
1856
+ dict (
1857
+ zip (
1858
+ self .columns ,
1859
+ [
1860
+ maybe_box_native (v )
1861
+ if is_object_dtype_by_index [i ]
1862
+ else v
1863
+ for i , v in enumerate (t [1 :])
1864
+ ],
1865
+ )
1866
+ ),
1867
+ )
1868
+ for t in self .itertuples (name = None )
1869
+ )
1870
+ else :
1871
+ return into_c (
1872
+ (t [0 ], dict (zip (self .columns , t [1 :])))
1873
+ for t in self .itertuples (name = None )
1874
+ )
1875
+ elif orient == "tight" :
1876
+ if object_dtype_cols :
1877
+ is_object_dtype_by_index = [
1878
+ col in object_dtype_cols for col in self .columns
1879
+ ]
1880
+ data = [
1881
+ [
1882
+ maybe_box_native (v ) if is_object_dtype_by_index [i ] else v
1883
+ for i , v in enumerate (t )
1884
+ ]
1885
+ for t in self .itertuples (index = False , name = None )
1886
+ ]
1887
+ else :
1888
+ data = [list (t ) for t in self .itertuples (index = False , name = None )]
1889
+ return into_c (
1890
+ (
1891
+ ("index" , self .index .tolist ()),
1892
+ ("columns" , self .columns .tolist ()),
1893
+ ("data" , data ),
1894
+ ("index_names" , list (self .index .names )),
1895
+ ("column_names" , list (self .columns .names )),
1896
+ )
1897
+ )
1898
+ else :
1899
+ raise ValueError (f"orient '{ orient } ' not understood" )
1900
+
1778
1901
def to_dict (self , orient : str = "dict" , into = dict ):
1779
1902
"""
1780
1903
Convert the DataFrame to a dictionary.
@@ -1913,67 +2036,7 @@ def to_dict(self, orient: str = "dict", into=dict):
1913
2036
elif orient .startswith ("i" ):
1914
2037
orient = "index"
1915
2038
1916
- if orient == "dict" :
1917
- return into_c ((k , v .to_dict (into )) for k , v in self .items ())
1918
-
1919
- elif orient == "list" :
1920
- return into_c ((k , v .tolist ()) for k , v in self .items ())
1921
-
1922
- elif orient == "split" :
1923
- return into_c (
1924
- (
1925
- ("index" , self .index .tolist ()),
1926
- ("columns" , self .columns .tolist ()),
1927
- (
1928
- "data" ,
1929
- [
1930
- list (map (maybe_box_native , t ))
1931
- for t in self .itertuples (index = False , name = None )
1932
- ],
1933
- ),
1934
- )
1935
- )
1936
-
1937
- elif orient == "tight" :
1938
- return into_c (
1939
- (
1940
- ("index" , self .index .tolist ()),
1941
- ("columns" , self .columns .tolist ()),
1942
- (
1943
- "data" ,
1944
- [
1945
- list (map (maybe_box_native , t ))
1946
- for t in self .itertuples (index = False , name = None )
1947
- ],
1948
- ),
1949
- ("index_names" , list (self .index .names )),
1950
- ("column_names" , list (self .columns .names )),
1951
- )
1952
- )
1953
-
1954
- elif orient == "series" :
1955
- return into_c ((k , v ) for k , v in self .items ())
1956
-
1957
- elif orient == "records" :
1958
- columns = self .columns .tolist ()
1959
- rows = (
1960
- dict (zip (columns , row ))
1961
- for row in self .itertuples (index = False , name = None )
1962
- )
1963
- return [
1964
- into_c ((k , maybe_box_native (v )) for k , v in row .items ()) for row in rows
1965
- ]
1966
-
1967
- elif orient == "index" :
1968
- if not self .index .is_unique :
1969
- raise ValueError ("DataFrame index must be unique for orient='index'." )
1970
- return into_c (
1971
- (t [0 ], dict (zip (self .columns , t [1 :])))
1972
- for t in self .itertuples (name = None )
1973
- )
1974
-
1975
- else :
1976
- raise ValueError (f"orient '{ orient } ' not understood" )
2039
+ return self ._to_dict_helper (orient , into_c , into )
1977
2040
1978
2041
def to_gbq (
1979
2042
self ,
0 commit comments