pandas-dev · nickeubank · Dec 16, 2015 · Dec 16, 2015 · Jan 13, 2016 · Sep 30, 2015
diff --git a/pandas/core/frame.py b/pandas/core/frame.py
@@ -179,7 +179,7 @@ class DataFrame(NDFrame):
         np.arange(n) if no column labels are provided
     dtype : dtype, default None
         Data type to force, otherwise infer
-    copy : boolean, default False
+    copy : boolean, default True
         Copy data from inputs. Only affects DataFrame / 2d ndarray input
 
     Examples
@@ -211,13 +211,17 @@ def _constructor_expanddim(self):
 
     def __init__(self, data=None, index=None, columns=None, dtype=None,
                  copy=False):
+
+        parent = None
+
         if data is None:
             data = {}
         if dtype is not None:
             dtype = self._validate_dtype(dtype)
 
         if isinstance(data, DataFrame):
-            data = data._data
+            parent = data
+            data = data._get_view()._data
 
         if isinstance(data, BlockManager):
             mgr = self._init_mgr(data, axes=dict(index=index, columns=columns),
@@ -306,6 +310,9 @@ def __init__(self, data=None, index=None, columns=None, dtype=None,
 
         NDFrame.__init__(self, mgr, fastpath=True)
 
+        if parent is not None:
+            parent._register_new_child(self)
+
     def _init_dict(self, data, index, columns, dtype=None):
         """
         Segregate Series based on type and coerce into matrices.
@@ -1963,8 +1970,10 @@ def __getitem__(self, key):
         # shortcut if we are an actual column
         is_mi_columns = isinstance(self.columns, MultiIndex)
         try:
-            if key in self.columns and not is_mi_columns:
-                return self._getitem_column(key)
+            if key in self.columns:
+                result = self._getitem_column(key)
+                result._is_column_view = True
+                return result
         except:
             pass
 
@@ -2338,7 +2347,6 @@ def __setitem__(self, key, value):
             self._set_item(key, value)
 
     def _setitem_slice(self, key, value):
-        self._check_setitem_copy()
         self.ix._setitem_with_indexer(key, value)
 
     def _setitem_array(self, key, value):
@@ -2349,7 +2357,6 @@ def _setitem_array(self, key, value):
                                  (len(key), len(self.index)))
             key = check_bool_indexer(self.index, key)
             indexer = key.nonzero()[0]
-            self._check_setitem_copy()
             self.ix._setitem_with_indexer(indexer, value)
         else:
             if isinstance(value, DataFrame):
@@ -2359,7 +2366,6 @@ def _setitem_array(self, key, value):
                     self[k1] = value[k2]
             else:
                 indexer = self.ix._convert_to_indexer(key, axis=1)
-                self._check_setitem_copy()
                 self.ix._setitem_with_indexer((slice(None), indexer), value)
 
     def _setitem_frame(self, key, value):
@@ -2369,7 +2375,6 @@ def _setitem_frame(self, key, value):
             raise TypeError('Must pass DataFrame with boolean values only')
 
         self._check_inplace_setting(value)
-        self._check_setitem_copy()
         self.where(-key, value, inplace=True)
 
     def _ensure_valid_index(self, value):
@@ -2405,11 +2410,6 @@ def _set_item(self, key, value):
         value = self._sanitize_column(key, value)
         NDFrame._set_item(self, key, value)
 
-        # check if we are modifying a copy
-        # try to set first as we want an invalid
-        # value exeption to occur first
-        if len(self):
-            self._check_setitem_copy()
 
     def insert(self, loc, column, value, allow_duplicates=False):
         """
@@ -4377,12 +4377,12 @@ def _join_compat(self, other, on=None, how='left', lsuffix='', rsuffix='',
     @Appender(_merge_doc, indents=2)
     def merge(self, right, how='inner', on=None, left_on=None, right_on=None,
               left_index=False, right_index=False, sort=False,
-              suffixes=('_x', '_y'), copy=True, indicator=False):
+              suffixes=('_x', '_y'), indicator=False):
         from pandas.tools.merge import merge
         return merge(self, right, how=how, on=on,
                      left_on=left_on, right_on=right_on,
                      left_index=left_index, right_index=right_index, sort=sort,
-                     suffixes=suffixes, copy=copy, indicator=indicator)
+                     suffixes=suffixes, indicator=indicator)
 
     def round(self, decimals=0, out=None):
         """
@@ -5227,6 +5227,9 @@ def combineMult(self, other):
                       FutureWarning, stacklevel=2)
         return self.mul(other, fill_value=1.)
 
+    def _get_view(self):
+        return self.loc[:,:]
+
 
 DataFrame._setup_axes(['index', 'columns'], info_axis=1, stat_axis=0,
                       axes_are_reversed=True, aliases={'rows': 0})

diff --git a/pandas/core/generic.py b/pandas/core/generic.py
@@ -83,14 +83,18 @@ class NDFrame(PandasObject):
     _internal_names = ['_data', '_cacher', '_item_cache', '_cache',
                        'is_copy', '_subtyp', '_index',
                        '_default_kind', '_default_fill_value', '_metadata',
-                       '__array_struct__', '__array_interface__']
+                       '__array_struct__', '__array_interface__', '_children',
+                       '_is_column_view', '_original_parent']
     _internal_names_set = set(_internal_names)
     _accessors = frozenset([])
     _metadata = []
     is_copy = None
-
+    _is_column_view = None
+    _original_parent = None
+    _children = None
+
     def __init__(self, data, axes=None, copy=False, dtype=None,
-                 fastpath=False):
+                 fastpath=False, ):
 
         if not fastpath:
             if dtype is not None:
@@ -105,6 +109,10 @@ def __init__(self, data, axes=None, copy=False, dtype=None,
         object.__setattr__(self, 'is_copy', None)
         object.__setattr__(self, '_data', data)
         object.__setattr__(self, '_item_cache', {})
+        object.__setattr__(self, '_children', weakref.WeakValueDictionary())
+        object.__setattr__(self, '_is_column_view', False)
+        object.__setattr__(self, '_original_parent', weakref.WeakValueDictionary())
+
 
     def _validate_dtype(self, dtype):
         """ validate the passed dtype """
@@ -469,7 +477,8 @@ def transpose(self, *args, **kwargs):
             raise TypeError('transpose() got an unexpected keyword '
                     'argument "{0}"'.format(list(kwargs.keys())[0]))
 
-        return self._constructor(new_values, **new_axes).__finalize__(self)
+        result = self._constructor(new_values, **new_axes).__finalize__(self)
+        return result.copy()
 
     def swapaxes(self, axis1, axis2, copy=True):
         """
@@ -1077,13 +1086,16 @@ def get(self, key, default=None):
         -------
         value : type of items contained in object
         """
+
         try:
             return self[key]
         except (KeyError, ValueError, IndexError):
             return default
 
     def __getitem__(self, item):
-        return self._get_item_cache(item)
+        result = self._get_item_cache(item)
+
+        return result
 
     def _get_item_cache(self, item):
         """ return the cached item, item represents a label indexer """
@@ -1177,9 +1189,6 @@ def _maybe_update_cacher(self, clear=False, verify_is_copy=True):
                 except:
                     pass
 
-        if verify_is_copy:
-            self._check_setitem_copy(stacklevel=5, t='referant')
-
         if clear:
             self._clear_item_cache()
 
@@ -1204,9 +1213,20 @@ def _slice(self, slobj, axis=0, kind=None):
         # but only in a single-dtyped view slicable case
         is_copy = axis!=0 or result._is_view
         result._set_is_copy(self, copy=is_copy)
+
+        self._register_new_child(result)
+
         return result
 
     def _set_item(self, key, value):
+
+        if hasattr(self, 'columns'):
+            if key in self.columns:
+                # If children are views, reset to copies before setting.
+                self._execute_copy_on_write()
+        else: 
+            self._execute_copy_on_write()
+
         self._data.set(key, value)
         self._clear_item_cache()
 
@@ -1219,104 +1239,22 @@ def _set_is_copy(self, ref=None, copy=True):
             else:
                 self.is_copy = None
 
-    def _check_is_chained_assignment_possible(self):
-        """
-        check if we are a view, have a cacher, and are of mixed type
-        if so, then force a setitem_copy check
-
-        should be called just near setting a value
-
-        will return a boolean if it we are a view and are cached, but a single-dtype
-        meaning that the cacher should be updated following setting
-        """
-        if self._is_view and self._is_cached:
-            ref = self._get_cacher()
-            if ref is not None and ref._is_mixed_type:
-                self._check_setitem_copy(stacklevel=4, t='referant', force=True)
-            return True
-        elif self.is_copy:
-            self._check_setitem_copy(stacklevel=4, t='referant')
-        return False
-
-    def _check_setitem_copy(self, stacklevel=4, t='setting', force=False):
-        """
-
-        Parameters
-        ----------
-        stacklevel : integer, default 4
-           the level to show of the stack when the error is output
-        t : string, the type of setting error
-        force : boolean, default False
-           if True, then force showing an error
-
-        validate if we are doing a settitem on a chained copy.
-
-        If you call this function, be sure to set the stacklevel such that the
-        user will see the error *at the level of setting*
-
-        It is technically possible to figure out that we are setting on
-        a copy even WITH a multi-dtyped pandas object. In other words, some blocks
-        may be views while other are not. Currently _is_view will ALWAYS return False
-        for multi-blocks to avoid having to handle this case.
-
-        df = DataFrame(np.arange(0,9), columns=['count'])
-        df['group'] = 'b'
-
-        # this technically need not raise SettingWithCopy if both are view (which is not
-        # generally guaranteed but is usually True
-        # however, this is in general not a good practice and we recommend using .loc
-        df.iloc[0:5]['group'] = 'a'
-
-        """
-
-        if force or self.is_copy:
-
-            value = config.get_option('mode.chained_assignment')
-            if value is None:
-                return
-
-            # see if the copy is not actually refererd; if so, then disolve
-            # the copy weakref
-            try:
-                gc.collect(2)
-                if not gc.get_referents(self.is_copy()):
-                    self.is_copy = None
-                    return
-            except:
-                pass
-
-            # we might be a false positive
-            try:
-                if self.is_copy().shape == self.shape:
-                    self.is_copy = None
-                    return
-            except:
-                pass
-
-            # a custom message
-            if isinstance(self.is_copy, string_types):
-                t = self.is_copy
-
-            elif t == 'referant':
-                t = ("\n"
-                     "A value is trying to be set on a copy of a slice from a "
-                     "DataFrame\n\n"
-                     "See the caveats in the documentation: "
-                     "http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy")
-
-            else:
-                t = ("\n"
-                     "A value is trying to be set on a copy of a slice from a "
-                     "DataFrame.\n"
-                     "Try using .loc[row_indexer,col_indexer] = value instead\n\n"
-                     "See the caveats in the documentation: "
-                     "http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy")
-
-            if value == 'raise':
-                raise SettingWithCopyError(t)
-            elif value == 'warn':
-                warnings.warn(t, SettingWithCopyWarning, stacklevel=stacklevel)
-
+    def _execute_copy_on_write(self):
+
+        # Don't set on views.         
+        if (self._is_view and not self._is_column_view) or len(self._children) is not 0:
+            self._data = self._data.copy()
+            self._children = weakref.WeakValueDictionary()
+
+
+    def _register_new_child(self, view_to_append):
+        self._children[id(view_to_append)] = view_to_append
+
+        if len(self._original_parent) is 0:
+            view_to_append._original_parent['parent'] = self
+        else:
+            self._original_parent['parent']._register_new_child(view_to_append)
+
     def __delitem__(self, key):
         """
         Delete item
@@ -2383,6 +2321,7 @@ def __finalize__(self, other, method=None, **kwargs):
         return self
 
     def __getattr__(self, name):
+
         """After regular attribute access, try looking up the name
         This allows simpler access to columns for interactive use.
         """
@@ -2405,6 +2344,10 @@ def __setattr__(self, name, value):
         # e.g. ``obj.x`` and ``obj.x = 4`` will always reference/modify
         # the same attribute.
 
+        if hasattr(self, 'columns'):
+            if name in self.columns:
+                self._execute_copy_on_write()
+
         try:
             object.__getattribute__(self, name)
             return object.__setattr__(self, name, value)