From 6798bf727f6b2af3ccda534b78e43b920a84a666 Mon Sep 17 00:00:00 2001 From: David Fischer Date: Sun, 3 Dec 2017 14:50:45 +0100 Subject: [PATCH 1/3] Make code more pythonic and avoid modification of meta if mutable --- pandas/io/json/normalize.py | 6 ++---- pandas/tests/io/json/test_normalize.py | 14 ++++++++++++++ 2 files changed, 16 insertions(+), 4 deletions(-) diff --git a/pandas/io/json/normalize.py b/pandas/io/json/normalize.py index d062e4f2830ff..595031b04e367 100644 --- a/pandas/io/json/normalize.py +++ b/pandas/io/json/normalize.py @@ -181,7 +181,7 @@ def _pull_field(js, spec): return result - if isinstance(data, list) and len(data) is 0: + if isinstance(data, list) and not data: return DataFrame() # A bit of a hackjob @@ -207,9 +207,7 @@ def _pull_field(js, spec): elif not isinstance(meta, list): meta = [meta] - for i, x in enumerate(meta): - if not isinstance(x, list): - meta[i] = [x] + meta = [m if isinstance(m, list) else [m] for m in meta] # Disastrously inefficient for now records = [] diff --git a/pandas/tests/io/json/test_normalize.py b/pandas/tests/io/json/test_normalize.py index 49b765b18d623..d95c7565538de 100644 --- a/pandas/tests/io/json/test_normalize.py +++ b/pandas/tests/io/json/test_normalize.py @@ -173,6 +173,20 @@ def test_meta_name_conflict(self): for val in ['metafoo', 'metabar', 'foo', 'bar']: assert val in result + def test_meta_parameter_not_modified(self): + data = [{'foo': 'hello', + 'bar': 'there', + 'data': [{'foo': 'something', 'bar': 'else'}, + {'foo': 'something2', 'bar': 'else2'}]}] + + COLUMNS = ['foo', 'bar'] + result = json_normalize(data, 'data', meta=COLUMNS, + meta_prefix='meta') + + assert COLUMNS == ['foo', 'bar'] + for val in ['metafoo', 'metabar', 'foo', 'bar']: + assert val in result + def test_record_prefix(self, state_data): result = json_normalize(state_data[0], 'counties') expected = DataFrame(state_data[0]['counties']) From 23a203e5f33820eeff249c3a64309acb65c908e0 Mon Sep 17 00:00:00 2001 From: David Fischer Date: Sun, 3 Dec 2017 17:57:50 +0100 Subject: [PATCH 2/3] Update whatsnew + add ref in test --- doc/source/whatsnew/v0.21.1.txt | 1 + pandas/tests/io/json/test_normalize.py | 1 + 2 files changed, 2 insertions(+) diff --git a/doc/source/whatsnew/v0.21.1.txt b/doc/source/whatsnew/v0.21.1.txt index 3d4850b334ff9..f736fdc411890 100644 --- a/doc/source/whatsnew/v0.21.1.txt +++ b/doc/source/whatsnew/v0.21.1.txt @@ -65,6 +65,7 @@ Conversion - Bug in :meth:`IntervalIndex.copy` when copying and ``IntervalIndex`` with non-default ``closed`` (:issue:`18339`) - Bug in :func:`DataFrame.to_dict` where columns of datetime that are tz-aware were not converted to required arrays when used with ``orient='records'``, raising``TypeError` (:issue:`18372`) - Bug in :class:`DateTimeIndex` and :meth:`date_range` where mismatching tz-aware ``start`` and ``end`` timezones would not raise an err if ``end.tzinfo`` is None (:issue:`18431`) +- Bug in :func:`pandas.io.json.json_normalize` Make code more Pythonic and avoid modification of meta if mutable (:issue:`18610`) - Indexing diff --git a/pandas/tests/io/json/test_normalize.py b/pandas/tests/io/json/test_normalize.py index d95c7565538de..1cceae32cd748 100644 --- a/pandas/tests/io/json/test_normalize.py +++ b/pandas/tests/io/json/test_normalize.py @@ -174,6 +174,7 @@ def test_meta_name_conflict(self): assert val in result def test_meta_parameter_not_modified(self): + # GH 18610 data = [{'foo': 'hello', 'bar': 'there', 'data': [{'foo': 'something', 'bar': 'else'}, From 79892a0d6c358cd1a9dea69827251e69e273128b Mon Sep 17 00:00:00 2001 From: Jeff Reback Date: Mon, 4 Dec 2017 07:55:16 -0500 Subject: [PATCH 3/3] doc fix --- doc/source/whatsnew/v0.21.1.txt | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/doc/source/whatsnew/v0.21.1.txt b/doc/source/whatsnew/v0.21.1.txt index f736fdc411890..a9608594be547 100644 --- a/doc/source/whatsnew/v0.21.1.txt +++ b/doc/source/whatsnew/v0.21.1.txt @@ -65,7 +65,6 @@ Conversion - Bug in :meth:`IntervalIndex.copy` when copying and ``IntervalIndex`` with non-default ``closed`` (:issue:`18339`) - Bug in :func:`DataFrame.to_dict` where columns of datetime that are tz-aware were not converted to required arrays when used with ``orient='records'``, raising``TypeError` (:issue:`18372`) - Bug in :class:`DateTimeIndex` and :meth:`date_range` where mismatching tz-aware ``start`` and ``end`` timezones would not raise an err if ``end.tzinfo`` is None (:issue:`18431`) -- Bug in :func:`pandas.io.json.json_normalize` Make code more Pythonic and avoid modification of meta if mutable (:issue:`18610`) - Indexing @@ -91,6 +90,7 @@ I/O - Bug in parsing integer datetime-like columns with specified format in ``read_sql`` (:issue:`17855`). - Bug in :meth:`DataFrame.to_msgpack` when serializing data of the numpy.bool_ datatype (:issue:`18390`) - Bug in :func:`read_json` not decoding when reading line deliminted JSON from S3 (:issue:`17200`) +- Bug in :func:`pandas.io.json.json_normalize` to avoid modification of ``meta`` (:issue:`18610`) Plotting ^^^^^^^^