From 5c8c2ecee77b2bee1d7cd946fa5a1cf8e018bc3b Mon Sep 17 00:00:00 2001 From: Itay Azolay Date: Tue, 22 Aug 2023 10:58:43 +0000 Subject: [PATCH 1/7] Fix issue #54654 on pickle roundtrip astype(str) might change original array even when copy is True --- pandas/_libs/lib.pyx | 2 +- pandas/tests/copy_view/test_astype.py | 11 +++++++++++ 2 files changed, 12 insertions(+), 1 deletion(-) diff --git a/pandas/_libs/lib.pyx b/pandas/_libs/lib.pyx index 2681115bbdcfb..adb6f5179384a 100644 --- a/pandas/_libs/lib.pyx +++ b/pandas/_libs/lib.pyx @@ -775,7 +775,7 @@ cpdef ndarray[object] ensure_string_array( result = np.asarray(arr, dtype="object") - if copy and result is arr: + if copy and (result is arr or np.may_share_memory(arr, result)): result = result.copy() elif not copy and result is arr: already_copied = False diff --git a/pandas/tests/copy_view/test_astype.py b/pandas/tests/copy_view/test_astype.py index 4b751ad452ec4..e224c2919e664 100644 --- a/pandas/tests/copy_view/test_astype.py +++ b/pandas/tests/copy_view/test_astype.py @@ -1,3 +1,5 @@ +import pickle + import numpy as np import pytest @@ -131,6 +133,15 @@ def test_astype_string_and_object_update_original( tm.assert_frame_equal(df2, df_orig) +def test_astype_string_copy_on_pickle_roundrip(): + # https://github.com/pandas-dev/pandas/issues/54654 + # ensure_string_array may alter array inplace + base = Series(np.array([(1, 2), None, 1], dtype="object")) + base_copy = pickle.loads(pickle.dumps(base)) + base_copy.astype(str) + tm.assert_series_equal(base, base_copy) + + def test_astype_dict_dtypes(using_copy_on_write): df = DataFrame( {"a": [1, 2, 3], "b": [4, 5, 6], "c": Series([1.5, 1.5, 1.5], dtype="float64")} From d2f25c240974b65d75bc60f46ea5d612d5d1a7d2 Mon Sep 17 00:00:00 2001 From: Itay Azolay Date: Tue, 22 Aug 2023 11:03:47 +0000 Subject: [PATCH 2/7] changelog --- doc/source/whatsnew/v2.2.0.rst | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/doc/source/whatsnew/v2.2.0.rst b/doc/source/whatsnew/v2.2.0.rst index d8b63a6d1395d..284e4081a145c 100644 --- a/doc/source/whatsnew/v2.2.0.rst +++ b/doc/source/whatsnew/v2.2.0.rst @@ -145,7 +145,7 @@ Numeric Conversion ^^^^^^^^^^ -- +- When :meth:`.astype(str)` was called on an array that was loaded from pickle, the array might change inplace (:issue:`54654`) - Strings From 3e39fed6feb76d775835591fbee8cd8845344832 Mon Sep 17 00:00:00 2001 From: Itayazolay Date: Wed, 23 Aug 2023 09:00:28 +0300 Subject: [PATCH 3/7] Update v2.2.0.rst rephrase --- doc/source/whatsnew/v2.2.0.rst | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/doc/source/whatsnew/v2.2.0.rst b/doc/source/whatsnew/v2.2.0.rst index 284e4081a145c..49b09668d4045 100644 --- a/doc/source/whatsnew/v2.2.0.rst +++ b/doc/source/whatsnew/v2.2.0.rst @@ -145,7 +145,7 @@ Numeric Conversion ^^^^^^^^^^ -- When :meth:`.astype(str)` was called on an array that was loaded from pickle, the array might change inplace (:issue:`54654`) +- Bug in :func:`astype` when called with `str` on unpickled array - the array might change in-place (:issue:`54654`) - Strings From 4a552562043b1c2d27163d951885df787a37909d Mon Sep 17 00:00:00 2001 From: Itay Azolay Date: Wed, 23 Aug 2023 08:01:34 +0000 Subject: [PATCH 4/7] rephrase --- doc/source/whatsnew/v2.2.0.rst | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/doc/source/whatsnew/v2.2.0.rst b/doc/source/whatsnew/v2.2.0.rst index 0c6b7fdc25bf3..1e8b0bb6949a0 100644 --- a/doc/source/whatsnew/v2.2.0.rst +++ b/doc/source/whatsnew/v2.2.0.rst @@ -188,7 +188,7 @@ Numeric Conversion ^^^^^^^^^^ -- When :meth:`.astype(str)` was called on an array that was loaded from pickle, the array might change inplace (:issue:`54654`) +- Bug in :func:`astype` when called with ``str`` on unpickled array - the array might change in-place (:issue:`54654`) - Strings From 47ee1beec5c74a93d77cd0e5463927cc661df62f Mon Sep 17 00:00:00 2001 From: Itayazolay Date: Tue, 19 Sep 2023 11:46:59 +0300 Subject: [PATCH 5/7] Update lib.pyx add gh comment --- pandas/_libs/lib.pyx | 1 + 1 file changed, 1 insertion(+) diff --git a/pandas/_libs/lib.pyx b/pandas/_libs/lib.pyx index adb6f5179384a..2bf5d41ec27fc 100644 --- a/pandas/_libs/lib.pyx +++ b/pandas/_libs/lib.pyx @@ -776,6 +776,7 @@ cpdef ndarray[object] ensure_string_array( result = np.asarray(arr, dtype="object") if copy and (result is arr or np.may_share_memory(arr, result)): + # GH#54654 result = result.copy() elif not copy and result is arr: already_copied = False From 2e78a4ff46de14b51da8e7a1b65d2737ef59f3f9 Mon Sep 17 00:00:00 2001 From: Itayazolay Date: Fri, 20 Oct 2023 23:32:11 +0300 Subject: [PATCH 6/7] Update v2.2.0.rst --- doc/source/whatsnew/v2.2.0.rst | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/doc/source/whatsnew/v2.2.0.rst b/doc/source/whatsnew/v2.2.0.rst index a46ca040ef22a..6f1d698b81f98 100644 --- a/doc/source/whatsnew/v2.2.0.rst +++ b/doc/source/whatsnew/v2.2.0.rst @@ -325,8 +325,8 @@ Numeric Conversion ^^^^^^^^^^ -- Bug in :meth:`Series.convert_dtypes` not converting all NA column to ``null[pyarrow]`` (:issue:`55346`) - Bug in :func:`astype` when called with ``str`` on unpickled array - the array might change in-place (:issue:`54654`) +- Bug in :meth:`Series.convert_dtypes` not converting all NA column to ``null[pyarrow]`` (:issue:`55346`) - Strings From 622a724db4fbdc4b2ecabdf82b7da30a618d051e Mon Sep 17 00:00:00 2001 From: Itayazolay Date: Mon, 23 Oct 2023 17:04:05 +0300 Subject: [PATCH 7/7] Update lib.pyx fix CR --- pandas/_libs/lib.pyx | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pandas/_libs/lib.pyx b/pandas/_libs/lib.pyx index f8ca124331e6e..bd6534494d973 100644 --- a/pandas/_libs/lib.pyx +++ b/pandas/_libs/lib.pyx @@ -792,7 +792,7 @@ cpdef ndarray[object] ensure_string_array( result = np.asarray(arr, dtype="object") - if copy and (result is arr or np.may_share_memory(arr, result)): + if copy and (result is arr or np.shares_memory(arr, result)): # GH#54654 result = result.copy() elif not copy and result is arr: