diff --git a/datatree/datatree.py b/datatree/datatree.py index 35830fa9..8ccd6c8b 100644 --- a/datatree/datatree.py +++ b/datatree/datatree.py @@ -125,6 +125,12 @@ def ds(self, data: Union[Dataset, DataArray] = None): ) if isinstance(data, DataArray): data = data.to_dataset() + if data is not None: + for var in list(data.variables): + if var in list(c.name for c in self.children): + raise KeyError( + f"Cannot add variable named {var}: node already has a child named {var}" + ) self._ds = data @property @@ -165,6 +171,30 @@ def _init_single_datatree_node( obj.ds = data return obj + def _pre_attach(self, parent: TreeNode) -> None: + """ + Method which superclass calls before setting parent, here used to prevent having two + children with duplicate names (or a data variable with the same name as a child). + """ + super()._pre_attach(parent) + if parent.has_data and self.name in list(parent.ds.variables): + raise KeyError( + f"parent {parent.name} already contains a data variable named {self.name}" + ) + + def add_child(self, child: TreeNode) -> None: + """ + Add a single child node below this node, without replacement. + + Will raise a KeyError if either a child or data variable already exists with this name. + """ + if child.name in list(c.name for c in self.children): + raise KeyError(f"Node already has a child named {child.name}") + elif self.has_data and child.name in list(self.ds.variables): + raise KeyError(f"Node already contains a data variable named {child.name}") + else: + child.parent = self + def __str__(self): """A printable representation of the structure of this entire subtree.""" renderer = anytree.RenderTree(self) diff --git a/datatree/tests/test_datatree.py b/datatree/tests/test_datatree.py index df73109a..83e77710 100644 --- a/datatree/tests/test_datatree.py +++ b/datatree/tests/test_datatree.py @@ -90,6 +90,34 @@ def test_has_data(self): assert not john.has_data +class TestVariablesChildrenNameCollisions: + def test_parent_already_has_variable_with_childs_name(self): + dt = DataNode("root", data=xr.Dataset({"a": [0], "b": 1})) + with pytest.raises(KeyError, match="already contains a data variable named a"): + DataNode("a", data=None, parent=dt) + + with pytest.raises(KeyError, match="already contains a data variable named a"): + dt.add_child(DataNode("a", data=None)) + + def test_assign_when_already_child_with_variables_name(self): + dt = DataNode("root", data=None) + DataNode("a", data=None, parent=dt) + with pytest.raises(KeyError, match="already has a child named a"): + dt.ds = xr.Dataset({"a": 0}) + + dt.ds = xr.Dataset() + with pytest.raises(KeyError, match="already has a child named a"): + dt.ds = dt.ds.assign(a=xr.DataArray(0)) + + @pytest.mark.xfail + def test_update_when_already_child_with_variables_name(self): + # See issue #38 + dt = DataNode("root", data=None) + DataNode("a", data=None, parent=dt) + with pytest.raises(KeyError, match="already has a child named a"): + dt.ds["a"] = xr.DataArray(0) + + class TestGetItems: def test_get_node(self): folder1 = DataNode("folder1")