From ecf04f11f23efe1f1f6562df27e680cbaf34f0fa Mon Sep 17 00:00:00 2001
From: Artem Vorobyev <artem.vorobyev@finn.auto>
Date: Tue, 18 Apr 2023 16:53:03 +0200
Subject: [PATCH 1/7] DF join cow tests

---
 pandas/tests/copy_view/test_functions.py | 90 ++++++++++++++++++++++++
 1 file changed, 90 insertions(+)

diff --git a/pandas/tests/copy_view/test_functions.py b/pandas/tests/copy_view/test_functions.py
index 53d72baf7da4e..caa9b700a69bd 100644
--- a/pandas/tests/copy_view/test_functions.py
+++ b/pandas/tests/copy_view/test_functions.py
@@ -310,3 +310,93 @@ def test_merge_copy_keyword(using_copy_on_write, copy):
     else:
         assert not np.shares_memory(get_array(df, "a"), get_array(result, "a"))
         assert not np.shares_memory(get_array(df2, "b"), get_array(result, "b"))
+
+
+def test_join_on_key(using_copy_on_write):
+    """Test if DataFrame.join applies Copy-On-Write optimization.
+
+    GIVEN two DataFrame instances
+    WHEN DataFrame.join is called for one of them
+    THEN check that the result DataFrame instance
+        shares the same memory with original dataframes until it is edited.
+    """
+    df1 = DataFrame({"key": ["a", "b", "c"], "a": [1, 2, 3]})
+    df2 = DataFrame({"key": ["a", "b", "c"], "b": [4, 5, 6]})
+    df1_orig = df1.copy()
+    df2_orig = df2.copy()
+
+    result = df1.join(df2.set_index("key"), on="key")
+
+    if using_copy_on_write:
+        assert np.shares_memory(get_array(result, "a"), get_array(df1, "a"))
+        assert np.shares_memory(get_array(result, "b"), get_array(df2, "b"))
+        assert np.shares_memory(get_array(result, "key"), get_array(df1, "key"))
+        assert not np.shares_memory(get_array(result, "key"), get_array(df2, "key"))
+    else:
+        assert not np.shares_memory(get_array(result, "a"), get_array(df1, "a"))
+        assert not np.shares_memory(get_array(result, "b"), get_array(df2, "b"))
+
+    result.iloc[0, 1] = 0
+    if using_copy_on_write:
+        assert not np.shares_memory(get_array(result, "a"), get_array(df1, "a"))
+        assert np.shares_memory(get_array(result, "b"), get_array(df2, "b"))
+
+    result.iloc[0, 2] = 0
+    if using_copy_on_write:
+        assert not np.shares_memory(get_array(result, "b"), get_array(df2, "b"))
+    tm.assert_frame_equal(df1, df1_orig)
+    tm.assert_frame_equal(df2, df2_orig)
+
+
+def test_join_multiple_dataframes_on_key(using_copy_on_write):
+    """Test if DataFrame.join applies Copy-On-Write optimization.
+
+    GIVEN a DataFrame instance and a list of DataFrame instances to be joined
+    WHEN DataFrame.join is called for original DataFrame instance
+    THEN check that the result DataFrame instance
+        shares the same memory with original dataframes until it is edited.
+    """
+    df1 = DataFrame({"key": ["a", "b", "c"], "a": [1, 2, 3]}).set_index("key")
+    dfs_list = [
+        DataFrame({"key": ["a", "b", "c"], "b": [4, 5, 6]}).set_index("key"),
+        DataFrame({"key": ["a", "b", "c"], "c": [7, 8, 9]}).set_index("key"),
+    ]
+    df1_orig = df1.copy()
+    dfs_list_orig = [df.copy() for df in dfs_list]
+
+    result = df1.join(dfs_list)
+
+    if using_copy_on_write:
+        assert np.shares_memory(get_array(result, "a"), get_array(df1, "a"))
+        assert np.shares_memory(get_array(result, "b"), get_array(dfs_list[0], "b"))
+        assert np.shares_memory(get_array(result, "c"), get_array(dfs_list[1], "c"))
+        assert np.shares_memory(get_array(result.index), get_array(df1.index))
+        assert not np.shares_memory(
+            get_array(result.index), get_array(dfs_list[0].index)
+        )
+        assert not np.shares_memory(
+            get_array(result.index), get_array(dfs_list[1].index)
+        )
+    else:
+        assert not np.shares_memory(get_array(result, "a"), get_array(df1, "a"))
+        assert not np.shares_memory(get_array(result, "b"), get_array(dfs_list[0], "b"))
+        assert not np.shares_memory(get_array(result, "c"), get_array(dfs_list[1], "c"))
+
+    result.iloc[0, 0] = 0
+    if using_copy_on_write:
+        assert not np.shares_memory(get_array(result, "a"), get_array(df1, "a"))
+        assert np.shares_memory(get_array(result, "b"), get_array(dfs_list[0], "b"))
+        assert np.shares_memory(get_array(result, "c"), get_array(dfs_list[1], "c"))
+
+    result.iloc[0, 1] = 0
+    if using_copy_on_write:
+        assert not np.shares_memory(get_array(result, "b"), get_array(dfs_list[0], "b"))
+        assert np.shares_memory(get_array(result, "c"), get_array(dfs_list[1], "c"))
+
+    result.iloc[0, 2] = 0
+    if using_copy_on_write:
+        assert not np.shares_memory(get_array(result, "c"), get_array(dfs_list[1], "c"))
+
+    tm.assert_frame_equal(df1, df1_orig)
+    for df, df_orig in zip(dfs_list, dfs_list_orig):
+        tm.assert_frame_equal(df, df_orig)

From 14970640c94fbaa215137f182d98dcf40cd2c4f4 Mon Sep 17 00:00:00 2001
From: SecretLake <artem.vorobyev.ev@gmail.com>
Date: Thu, 20 Apr 2023 13:19:55 +0000
Subject: [PATCH 2/7] PR feedback

---
 pandas/tests/copy_view/test_functions.py | 22 +++++-----------------
 1 file changed, 5 insertions(+), 17 deletions(-)

diff --git a/pandas/tests/copy_view/test_functions.py b/pandas/tests/copy_view/test_functions.py
index caa9b700a69bd..a59d800e6bb06 100644
--- a/pandas/tests/copy_view/test_functions.py
+++ b/pandas/tests/copy_view/test_functions.py
@@ -3,6 +3,7 @@
 
 from pandas import (
     DataFrame,
+    Index,
     Series,
     concat,
     merge,
@@ -313,13 +314,6 @@ def test_merge_copy_keyword(using_copy_on_write, copy):
 
 
 def test_join_on_key(using_copy_on_write):
-    """Test if DataFrame.join applies Copy-On-Write optimization.
-
-    GIVEN two DataFrame instances
-    WHEN DataFrame.join is called for one of them
-    THEN check that the result DataFrame instance
-        shares the same memory with original dataframes until it is edited.
-    """
     df1 = DataFrame({"key": ["a", "b", "c"], "a": [1, 2, 3]})
     df2 = DataFrame({"key": ["a", "b", "c"], "b": [4, 5, 6]})
     df1_orig = df1.copy()
@@ -349,17 +343,11 @@ def test_join_on_key(using_copy_on_write):
 
 
 def test_join_multiple_dataframes_on_key(using_copy_on_write):
-    """Test if DataFrame.join applies Copy-On-Write optimization.
-
-    GIVEN a DataFrame instance and a list of DataFrame instances to be joined
-    WHEN DataFrame.join is called for original DataFrame instance
-    THEN check that the result DataFrame instance
-        shares the same memory with original dataframes until it is edited.
-    """
-    df1 = DataFrame({"key": ["a", "b", "c"], "a": [1, 2, 3]}).set_index("key")
+    df_index = Index(["a", "b", "c"], name="key")
+    df1 = DataFrame({"a": [1, 2, 3]}, index=df_index)
     dfs_list = [
-        DataFrame({"key": ["a", "b", "c"], "b": [4, 5, 6]}).set_index("key"),
-        DataFrame({"key": ["a", "b", "c"], "c": [7, 8, 9]}).set_index("key"),
+        DataFrame({"b": [4, 5, 6]}, index=df_index),
+        DataFrame({"c": [7, 8, 9]}, index=df_index),
     ]
     df1_orig = df1.copy()
     dfs_list_orig = [df.copy() for df in dfs_list]

From 4c6c1bf82a3d743a771b0c794bb43de00372b420 Mon Sep 17 00:00:00 2001
From: SecretLake <artem.vorobyev.ev@gmail.com>
Date: Thu, 20 Apr 2023 13:34:52 +0000
Subject: [PATCH 3/7] Integrate PR feedback for test_join_ok_key

---
 pandas/tests/copy_view/test_functions.py | 15 ++++++++++-----
 1 file changed, 10 insertions(+), 5 deletions(-)

diff --git a/pandas/tests/copy_view/test_functions.py b/pandas/tests/copy_view/test_functions.py
index a59d800e6bb06..d2debd29a125b 100644
--- a/pandas/tests/copy_view/test_functions.py
+++ b/pandas/tests/copy_view/test_functions.py
@@ -314,12 +314,15 @@ def test_merge_copy_keyword(using_copy_on_write, copy):
 
 
 def test_join_on_key(using_copy_on_write):
-    df1 = DataFrame({"key": ["a", "b", "c"], "a": [1, 2, 3]})
-    df2 = DataFrame({"key": ["a", "b", "c"], "b": [4, 5, 6]})
+    df_index = Index(["a", "b", "c"], name="key")
+
+    df1 = DataFrame({"a": [1, 2, 3]}, index=df_index)
+    df2 = DataFrame({"b": [4, 5, 6]}, index=df_index)
+
     df1_orig = df1.copy()
     df2_orig = df2.copy()
 
-    result = df1.join(df2.set_index("key"), on="key")
+    result = df1.join(df2, on="key")
 
     if using_copy_on_write:
         assert np.shares_memory(get_array(result, "a"), get_array(df1, "a"))
@@ -330,12 +333,12 @@ def test_join_on_key(using_copy_on_write):
         assert not np.shares_memory(get_array(result, "a"), get_array(df1, "a"))
         assert not np.shares_memory(get_array(result, "b"), get_array(df2, "b"))
 
-    result.iloc[0, 1] = 0
+    result.loc[0, 1] = 0
     if using_copy_on_write:
         assert not np.shares_memory(get_array(result, "a"), get_array(df1, "a"))
         assert np.shares_memory(get_array(result, "b"), get_array(df2, "b"))
 
-    result.iloc[0, 2] = 0
+    result.loc[0, 2] = 0
     if using_copy_on_write:
         assert not np.shares_memory(get_array(result, "b"), get_array(df2, "b"))
     tm.assert_frame_equal(df1, df1_orig)
@@ -344,11 +347,13 @@ def test_join_on_key(using_copy_on_write):
 
 def test_join_multiple_dataframes_on_key(using_copy_on_write):
     df_index = Index(["a", "b", "c"], name="key")
+
     df1 = DataFrame({"a": [1, 2, 3]}, index=df_index)
     dfs_list = [
         DataFrame({"b": [4, 5, 6]}, index=df_index),
         DataFrame({"c": [7, 8, 9]}, index=df_index),
     ]
+
     df1_orig = df1.copy()
     dfs_list_orig = [df.copy() for df in dfs_list]
 

From 967c46c2182e07d7865bc0ae60789b758bbd026f Mon Sep 17 00:00:00 2001
From: SecretLake <artem.vorobyev.ev@gmail.com>
Date: Thu, 20 Apr 2023 15:48:43 +0000
Subject: [PATCH 4/7] Fix tests

---
 pandas/tests/copy_view/test_functions.py | 22 ++++++++++------------
 1 file changed, 10 insertions(+), 12 deletions(-)

diff --git a/pandas/tests/copy_view/test_functions.py b/pandas/tests/copy_view/test_functions.py
index d2debd29a125b..07cedae3b74b0 100644
--- a/pandas/tests/copy_view/test_functions.py
+++ b/pandas/tests/copy_view/test_functions.py
@@ -314,10 +314,8 @@ def test_merge_copy_keyword(using_copy_on_write, copy):
 
 
 def test_join_on_key(using_copy_on_write):
-    df_index = Index(["a", "b", "c"], name="key")
-
-    df1 = DataFrame({"a": [1, 2, 3]}, index=df_index)
-    df2 = DataFrame({"b": [4, 5, 6]}, index=df_index)
+    df1 = DataFrame({"a": [1, 2, 3]}, index=Index(["a", "b", "c"], name="key"))
+    df2 = DataFrame({"b": [4, 5, 6]}, index=Index(["a", "b", "c"], name="key"))
 
     df1_orig = df1.copy()
     df2_orig = df2.copy()
@@ -327,31 +325,31 @@ def test_join_on_key(using_copy_on_write):
     if using_copy_on_write:
         assert np.shares_memory(get_array(result, "a"), get_array(df1, "a"))
         assert np.shares_memory(get_array(result, "b"), get_array(df2, "b"))
-        assert np.shares_memory(get_array(result, "key"), get_array(df1, "key"))
-        assert not np.shares_memory(get_array(result, "key"), get_array(df2, "key"))
+        assert np.shares_memory(get_array(result.index), get_array(df1.index))
+        assert not np.shares_memory(get_array(result.index), get_array(df2.index))
     else:
         assert not np.shares_memory(get_array(result, "a"), get_array(df1, "a"))
         assert not np.shares_memory(get_array(result, "b"), get_array(df2, "b"))
 
-    result.loc[0, 1] = 0
+    result.iloc[0, 0] = 0
     if using_copy_on_write:
         assert not np.shares_memory(get_array(result, "a"), get_array(df1, "a"))
         assert np.shares_memory(get_array(result, "b"), get_array(df2, "b"))
 
-    result.loc[0, 2] = 0
+    result.iloc[0, 1] = 0
     if using_copy_on_write:
         assert not np.shares_memory(get_array(result, "b"), get_array(df2, "b"))
+        
     tm.assert_frame_equal(df1, df1_orig)
     tm.assert_frame_equal(df2, df2_orig)
 
 
 def test_join_multiple_dataframes_on_key(using_copy_on_write):
-    df_index = Index(["a", "b", "c"], name="key")
+    df1 = DataFrame({"a": [1, 2, 3]}, index=Index(["a", "b", "c"], name="key"))
 
-    df1 = DataFrame({"a": [1, 2, 3]}, index=df_index)
     dfs_list = [
-        DataFrame({"b": [4, 5, 6]}, index=df_index),
-        DataFrame({"c": [7, 8, 9]}, index=df_index),
+        DataFrame({"b": [4, 5, 6]}, index=Index(["a", "b", "c"], name="key")),
+        DataFrame({"c": [7, 8, 9]}, index=Index(["a", "b", "c"], name="key")),
     ]
 
     df1_orig = df1.copy()

From 481ab5e2022dab04b56b470c639388af34e6894b Mon Sep 17 00:00:00 2001
From: SecretLake <artem.vorobyev.ev@gmail.com>
Date: Thu, 20 Apr 2023 15:54:58 +0000
Subject: [PATCH 5/7] Fix pre-commit

---
 pandas/tests/copy_view/test_functions.py | 1 -
 1 file changed, 1 deletion(-)

diff --git a/pandas/tests/copy_view/test_functions.py b/pandas/tests/copy_view/test_functions.py
index 07cedae3b74b0..09e44fee31c0d 100644
--- a/pandas/tests/copy_view/test_functions.py
+++ b/pandas/tests/copy_view/test_functions.py
@@ -319,7 +319,6 @@ def test_join_on_key(using_copy_on_write):
 
     df1_orig = df1.copy()
     df2_orig = df2.copy()
-
     result = df1.join(df2, on="key")
 
     if using_copy_on_write:

From c4e7f2ae26a90ff75f45a72000c8afd6aabfca16 Mon Sep 17 00:00:00 2001
From: SecretLake <artem.vorobyev.ev@gmail.com>
Date: Thu, 20 Apr 2023 15:55:39 +0000
Subject: [PATCH 6/7] Fix pre-commit

---
 pandas/tests/copy_view/test_functions.py | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/pandas/tests/copy_view/test_functions.py b/pandas/tests/copy_view/test_functions.py
index 09e44fee31c0d..d0cdb8af9b19b 100644
--- a/pandas/tests/copy_view/test_functions.py
+++ b/pandas/tests/copy_view/test_functions.py
@@ -319,6 +319,7 @@ def test_join_on_key(using_copy_on_write):
 
     df1_orig = df1.copy()
     df2_orig = df2.copy()
+
     result = df1.join(df2, on="key")
 
     if using_copy_on_write:
@@ -338,7 +339,7 @@ def test_join_on_key(using_copy_on_write):
     result.iloc[0, 1] = 0
     if using_copy_on_write:
         assert not np.shares_memory(get_array(result, "b"), get_array(df2, "b"))
-        
+
     tm.assert_frame_equal(df1, df1_orig)
     tm.assert_frame_equal(df2, df2_orig)
 

From 7f6635c27b74f376785eb787dd8af018f758d00b Mon Sep 17 00:00:00 2001
From: SecretLake <artem.vorobyev.ev@gmail.com>
Date: Thu, 20 Apr 2023 16:02:34 +0000
Subject: [PATCH 7/7] Copy index instead of setting it multiple times

---
 pandas/tests/copy_view/test_functions.py | 13 ++++++++-----
 1 file changed, 8 insertions(+), 5 deletions(-)

diff --git a/pandas/tests/copy_view/test_functions.py b/pandas/tests/copy_view/test_functions.py
index d0cdb8af9b19b..56e4b186350f2 100644
--- a/pandas/tests/copy_view/test_functions.py
+++ b/pandas/tests/copy_view/test_functions.py
@@ -314,8 +314,10 @@ def test_merge_copy_keyword(using_copy_on_write, copy):
 
 
 def test_join_on_key(using_copy_on_write):
-    df1 = DataFrame({"a": [1, 2, 3]}, index=Index(["a", "b", "c"], name="key"))
-    df2 = DataFrame({"b": [4, 5, 6]}, index=Index(["a", "b", "c"], name="key"))
+    df_index = Index(["a", "b", "c"], name="key")
+
+    df1 = DataFrame({"a": [1, 2, 3]}, index=df_index.copy(deep=True))
+    df2 = DataFrame({"b": [4, 5, 6]}, index=df_index.copy(deep=True))
 
     df1_orig = df1.copy()
     df2_orig = df2.copy()
@@ -345,11 +347,12 @@ def test_join_on_key(using_copy_on_write):
 
 
 def test_join_multiple_dataframes_on_key(using_copy_on_write):
-    df1 = DataFrame({"a": [1, 2, 3]}, index=Index(["a", "b", "c"], name="key"))
+    df_index = Index(["a", "b", "c"], name="key")
 
+    df1 = DataFrame({"a": [1, 2, 3]}, index=df_index.copy(deep=True))
     dfs_list = [
-        DataFrame({"b": [4, 5, 6]}, index=Index(["a", "b", "c"], name="key")),
-        DataFrame({"c": [7, 8, 9]}, index=Index(["a", "b", "c"], name="key")),
+        DataFrame({"b": [4, 5, 6]}, index=df_index.copy(deep=True)),
+        DataFrame({"c": [7, 8, 9]}, index=df_index.copy(deep=True)),
     ]
 
     df1_orig = df1.copy()