Skip to content

Commit 7a9e834

Browse files
committed
Cleaned up code for table orient in read_json
1 parent 29d3ca7 commit 7a9e834

File tree

3 files changed

+160
-105
lines changed

3 files changed

+160
-105
lines changed

doc/source/whatsnew/v0.23.0.txt

+45-1
Original file line numberDiff line numberDiff line change
@@ -145,6 +145,51 @@ Current Behavior
145145

146146
s.rank(na_option='top')
147147

148+
.. _whatsnew_0230.enhancements.round-trippable_json:
149+
150+
JSON read/write round-trippable with ``orient='table'``
151+
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
152+
153+
A ``DataFrame`` can now be written to and subsequently read back via JSON while preserving metadata through usage of the ``orient='table'`` argument (see :issue:`18912` and :issue:`9146`). Previously, none of the available ``orient`` values guaranteed the preservation of dtypes and index names, amongst other metadata.
154+
155+
.. ipython:: python
156+
157+
df = pd.DataFrame({'foo': [1, 2, 3, 4],
158+
'bar': ['a', 'b', 'c', 'd'],
159+
'baz': pd.date_range('2018-01-01', freq='d', periods=4),
160+
'qux': pd.Categorical(['a', 'b', 'c', 'c'])
161+
}, index=pd.Index(range(4), name='idx'))
162+
df
163+
164+
Previous Behavior:
165+
166+
.. code-block:: ipython
167+
168+
In [17]: df.to_json("test.json", orient='columns')
169+
In [17]: pd.read_json("test.json", orient='columns')
170+
Out[18]:
171+
bar baz foo qux
172+
0 a 1514764800000 1 a
173+
1 b 1514851200000 2 b
174+
2 c 1514937600000 3 c
175+
3 d 1515024000000 4 c
176+
177+
Current Behavior:
178+
179+
.. code-block:: ipython
180+
181+
In [29]: df.to_json("test.json", orient='table')
182+
In [30]: pd.read_json("test.json", orient='table')
183+
Out[30]:
184+
bar baz foo qux
185+
idx
186+
0 a 2018-01-01 1 a
187+
1 b 2018-01-02 2 b
188+
2 c 2018-01-03 3 c
189+
3 d 2018-01-04 4 c
190+
191+
Please note that the string `index` is not supported with the round trip format, as it is used by default in ``write_json`` to indicate a missing index name.
192+
148193
.. _whatsnew_0230.enhancements.other:
149194

150195
Other Enhancements
@@ -171,7 +216,6 @@ Other Enhancements
171216
- ``Resampler`` objects now have a functioning :attr:`~pandas.core.resample.Resampler.pipe` method.
172217
Previously, calls to ``pipe`` were diverted to the ``mean`` method (:issue:`17905`).
173218
- :func:`~pandas.api.types.is_scalar` now returns ``True`` for ``DateOffset`` objects (:issue:`18943`).
174-
- :func:`read_json` now supports ``table`` as a value to the ``orient`` argument (:issue:`18912`)
175219

176220
.. _whatsnew_0230.api_breaking:
177221

pandas/io/json/table_schema.py

+52-13
Original file line numberDiff line numberDiff line change
@@ -80,7 +80,7 @@ def set_default_names(data):
8080
return data
8181

8282

83-
def make_field(arr, dtype=None):
83+
def convert_pandas_type_to_json_field(arr, dtype=None):
8484
dtype = dtype or arr.dtype
8585
if arr.name is None:
8686
name = 'values'
@@ -108,8 +108,8 @@ def make_field(arr, dtype=None):
108108
return field
109109

110110

111-
def revert_field(field):
112-
'''
111+
def convert_json_field_to_pandas_type(field):
112+
"""
113113
Converts a JSON field descriptor into its corresponding NumPy / pandas type
114114
115115
Parameters
@@ -120,9 +120,35 @@ def revert_field(field):
120120
Returns
121121
-------
122122
dtype
123-
'''
123+
124+
Raises
125+
-----
126+
ValueError
127+
If the type of the provided field is unknown or currently unsupported
128+
129+
Examples
130+
--------
131+
>>> convert_json_field_to_pandas_type({'name': 'an_int',
132+
'type': 'integer'})
133+
'int64'
134+
>>> convert_json_field_to_pandas_type({'name': 'a_categorical',
135+
'type': 'any',
136+
'contraints': {'enum': [
137+
'a', 'b', 'c']},
138+
'ordered': True})
139+
'CategoricalDtype(categories=['a', 'b', 'c'], ordered=True)'
140+
>>> convert_json_field_to_pandas_type({'name': 'a_datetime',
141+
'type': 'datetime'})
142+
'datetime64[ns]'
143+
>>> convert_json_field_to_pandas_type({'name': 'a_datetime_with_tz',
144+
'type': 'datetime',
145+
'tz': 'US/Central'})
146+
'datetime64[ns, US/Central]'
147+
"""
124148
typ = field['type']
125-
if typ == 'integer':
149+
if typ == 'string':
150+
return 'object'
151+
elif typ == 'integer':
126152
return 'int64'
127153
elif typ == 'number':
128154
return 'float64'
@@ -139,7 +165,10 @@ def revert_field(field):
139165
if 'constraints' in field and 'ordered' in field:
140166
return CategoricalDtype(categories=field['constraints']['enum'],
141167
ordered=field['ordered'])
142-
return 'object'
168+
else:
169+
return 'object'
170+
171+
raise ValueError("Unsupported or invalid field type: {}".format(typ))
143172

144173

145174
def build_table_schema(data, index=True, primary_key=None, version=True):
@@ -197,15 +226,15 @@ def build_table_schema(data, index=True, primary_key=None, version=True):
197226
if index:
198227
if data.index.nlevels > 1:
199228
for level in data.index.levels:
200-
fields.append(make_field(level))
229+
fields.append(convert_pandas_type_to_json_field(level))
201230
else:
202-
fields.append(make_field(data.index))
231+
fields.append(convert_pandas_type_to_json_field(data.index))
203232

204233
if data.ndim > 1:
205234
for column, s in data.iteritems():
206-
fields.append(make_field(s))
235+
fields.append(convert_pandas_type_to_json_field(s))
207236
else:
208-
fields.append(make_field(data))
237+
fields.append(convert_pandas_type_to_json_field(data))
209238

210239
schema['fields'] = fields
211240
if index and data.index.is_unique and primary_key is None:
@@ -242,6 +271,13 @@ def parse_table_schema(json, precise_float):
242271
NotImplementedError
243272
If the JSON table schema contains either timezone or timedelta data
244273
274+
Notes
275+
-----
276+
Because ``write_json`` uses the string `index` to denote a name-less
277+
``Index``, this function sets the name of the returned ``DataFrame`` to
278+
``None`` when said string is encountered. Therefore, intentional usage
279+
of `index` as the ``Index`` name is not supported.
280+
245281
See also
246282
--------
247283
build_table_schema : inverse function
@@ -251,7 +287,7 @@ def parse_table_schema(json, precise_float):
251287
col_order = [field['name'] for field in table['schema']['fields']]
252288
df = DataFrame(table['data'])[col_order]
253289

254-
dtypes = {field['name']: revert_field(field)
290+
dtypes = {field['name']: convert_json_field_to_pandas_type(field)
255291
for field in table['schema']['fields']}
256292

257293
# Cannot directly use as_type with timezone data on object; raise for now
@@ -267,7 +303,10 @@ def parse_table_schema(json, precise_float):
267303
df = df.astype(dtypes)
268304

269305
df = df.set_index(table['schema']['primaryKey'])
270-
if all(x.startswith('level_') for x in df.index.names):
271-
df.index.names = [None] * len(df.index.names)
306+
if len(df.index.names) == 1 and df.index.name == 'index':
307+
df.index.name = None
308+
else:
309+
if all(x.startswith('level_') for x in df.index.names):
310+
df.index.names = [None] * len(df.index.names)
272311

273312
return df

0 commit comments

Comments
 (0)