From 4b7e367a60d18e1af6acc4f14c41257625a45904 Mon Sep 17 00:00:00 2001 From: TomNicholas Date: Wed, 31 Jul 2024 02:04:19 -0400 Subject: [PATCH 01/22] add tests --- xarray/tests/test_datatree.py | 12 ++++++++++++ 1 file changed, 12 insertions(+) diff --git a/xarray/tests/test_datatree.py b/xarray/tests/test_datatree.py index 31d77ca17e7..6711212a5b1 100644 --- a/xarray/tests/test_datatree.py +++ b/xarray/tests/test_datatree.py @@ -35,6 +35,18 @@ def test_bad_names(self): class TestFamilyTree: + def test_dont_modify_parent_inplace(self): + # GH issue 9196 + root = DataTree() + DataTree(name="child", parent=root) + assert root.children == {} + + def test_dont_modify_children_inplace(self): + # GH issue 9196 + child = DataTree() + DataTree(children={"child": child}) + assert child.parent is None + def test_setparent_unnamed_child_node_fails(self): john: DataTree = DataTree(name="john") with pytest.raises(ValueError, match="unnamed"): From 6c56b12d4d62f8549824cd698fcd5991c5205eaa Mon Sep 17 00:00:00 2001 From: TomNicholas Date: Wed, 31 Jul 2024 02:04:57 -0400 Subject: [PATCH 02/22] fix by shallow copying --- xarray/core/datatree.py | 8 +++++--- 1 file changed, 5 insertions(+), 3 deletions(-) diff --git a/xarray/core/datatree.py b/xarray/core/datatree.py index 65ff8667cb7..6167e175e35 100644 --- a/xarray/core/datatree.py +++ b/xarray/core/datatree.py @@ -463,8 +463,10 @@ def __init__( super().__init__(name=name) self._set_node_data(_coerce_to_dataset(data)) - self.parent = parent - self.children = children + + # shallow copy to avoid modifying arguments in-place (see GH issue #9196) + self.parent = parent.copy() if parent is not None else None + self.children = {name: child.copy() for name, child in children.items()} def _set_node_data(self, ds: Dataset): data_vars, coord_vars = _collect_data_and_coord_variables(ds) @@ -770,7 +772,7 @@ def _replace_node( if data is not _default: self._set_node_data(ds) - self._children = children + self.children = children def copy( self: DataTree, From 5db25bd19246d282e6b3a348bacded0f8dd40369 Mon Sep 17 00:00:00 2001 From: TomNicholas Date: Fri, 2 Aug 2024 15:02:25 -0400 Subject: [PATCH 03/22] correct first few tests --- xarray/tests/test_datatree.py | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/xarray/tests/test_datatree.py b/xarray/tests/test_datatree.py index 6711212a5b1..5689cfa57ae 100644 --- a/xarray/tests/test_datatree.py +++ b/xarray/tests/test_datatree.py @@ -82,7 +82,7 @@ class TestNames: def test_child_gets_named_on_attach(self): sue: DataTree = DataTree() mary: DataTree = DataTree(children={"Sue": sue}) # noqa - assert sue.name == "Sue" + assert mary.children["Sue"].name == "Sue" class TestPaths: @@ -90,14 +90,14 @@ def test_path_property(self): sue: DataTree = DataTree() mary: DataTree = DataTree(children={"Sue": sue}) john: DataTree = DataTree(children={"Mary": mary}) - assert sue.path == "/Mary/Sue" + assert john.children["Mary"].children["Sue"].path == "/Mary/Sue" assert john.path == "/" def test_path_roundtrip(self): sue: DataTree = DataTree() mary: DataTree = DataTree(children={"Sue": sue}) john: DataTree = DataTree(children={"Mary": mary}) - assert john[sue.path] is sue + assert john["/Mary/Sue"].name == "Sue" def test_same_tree(self): mary: DataTree = DataTree() From b28cb0caaac82e91e443f246757bc8cb4fd58b30 Mon Sep 17 00:00:00 2001 From: TomNicholas Date: Tue, 20 Aug 2024 11:14:31 -0400 Subject: [PATCH 04/22] replace constructors in tests with DataTree.from_dict --- xarray/tests/test_datatree.py | 86 +++++++++++++++++++++-------------- 1 file changed, 52 insertions(+), 34 deletions(-) diff --git a/xarray/tests/test_datatree.py b/xarray/tests/test_datatree.py index 5689cfa57ae..bb671a4aaad 100644 --- a/xarray/tests/test_datatree.py +++ b/xarray/tests/test_datatree.py @@ -87,32 +87,42 @@ def test_child_gets_named_on_attach(self): class TestPaths: def test_path_property(self): - sue: DataTree = DataTree() - mary: DataTree = DataTree(children={"Sue": sue}) - john: DataTree = DataTree(children={"Mary": mary}) - assert john.children["Mary"].children["Sue"].path == "/Mary/Sue" + john = DataTree.from_dict( + { + "/Mary/Sue": DataTree(), + } + ) + assert john["/Mary/Sue"].path == "/Mary/Sue" assert john.path == "/" def test_path_roundtrip(self): - sue: DataTree = DataTree() - mary: DataTree = DataTree(children={"Sue": sue}) - john: DataTree = DataTree(children={"Mary": mary}) + john = DataTree.from_dict( + { + "/Mary/Sue": DataTree(), + } + ) assert john["/Mary/Sue"].name == "Sue" def test_same_tree(self): - mary: DataTree = DataTree() - kate: DataTree = DataTree() - john: DataTree = DataTree(children={"Mary": mary, "Kate": kate}) # noqa - assert mary.same_tree(kate) + john = DataTree.from_dict( + { + "/Mary": DataTree(), + "/Kate": DataTree(), + } + ) + assert john["/Mary"].same_tree(john["/Kate"]) def test_relative_paths(self): - sue: DataTree = DataTree() - mary: DataTree = DataTree(children={"Sue": sue}) - annie: DataTree = DataTree() - john: DataTree = DataTree(children={"Mary": mary, "Annie": annie}) + john = DataTree.from_dict( + { + "/Mary/Sue": DataTree(), + "/Annie": DataTree(), + } + ) + sue = john["Mary/Sue"] + annie = john["Annie"] - result = sue.relative_to(john) - assert result == "Mary/Sue" + assert sue.relative_to(john) == "Mary/Sue" assert john.relative_to(sue) == "../.." assert annie.relative_to(sue) == "../../Annie" assert sue.relative_to(annie) == "../Mary/Sue" @@ -185,8 +195,12 @@ def test_parent_already_has_variable_with_childs_name(self): DataTree(name="a", data=None, parent=dt) def test_assign_when_already_child_with_variables_name(self): - dt: DataTree = DataTree(data=None) - DataTree(name="a", data=None, parent=dt) + dt = DataTree.from_dict( + { + "/a": DataTree(), + } + ) + with pytest.raises(ValueError, match="node already contains a variable"): dt.ds = xr.Dataset({"a": 0}) # type: ignore[assignment] @@ -202,11 +216,14 @@ class TestGet: ... class TestGetItem: def test_getitem_node(self): - folder1: DataTree = DataTree(name="folder1") - results: DataTree = DataTree(name="results", parent=folder1) - highres: DataTree = DataTree(name="highres", parent=results) - assert folder1["results"] is results - assert folder1["results/highres"] is highres + folder1 = DataTree.from_dict( + { + "/results/highres": DataTree(), + } + ) + + assert folder1["results"].name == "results" + assert folder1["results/highres"].name == "highres" def test_getitem_self(self): dt: DataTree = DataTree() @@ -219,9 +236,11 @@ def test_getitem_single_data_variable(self): def test_getitem_single_data_variable_from_node(self): data = xr.Dataset({"temp": [0, 50]}) - folder1: DataTree = DataTree(name="folder1") - results: DataTree = DataTree(name="results", parent=folder1) - DataTree(name="highres", parent=results, data=data) + folder1 = DataTree.from_dict( + { + "/results/highres": data, + } + ) assert_identical(folder1["results/highres/temp"], data["temp"]) def test_getitem_nonexistent_node(self): @@ -401,14 +420,13 @@ def test_setitem_unnamed_child_node_becomes_named(self): assert john2["sonny"].name == "sonny" def test_setitem_new_grandchild_node(self): - john: DataTree = DataTree(name="john") - mary: DataTree = DataTree(name="mary", parent=john) - rose: DataTree = DataTree(name="rose") - john["mary/rose"] = rose + john = DataTree.from_dict({"/Mary/Rose": DataTree()}) + new_rose = DataTree(data=xr.Dataset({"x": 0})) + john["Mary/Rose"] = new_rose - grafted_rose = john["mary/rose"] - assert grafted_rose.parent is mary - assert grafted_rose.name == "rose" + grafted_rose = john["Mary/Rose"] + assert grafted_rose.parent is john["/Mary"] + assert grafted_rose.name == "Rose" def test_grafted_subtree_retains_name(self): subtree: DataTree = DataTree(name="original_subtree_name") From bc543b1e6f8291edb873fdf3d04a402cdec7dddd Mon Sep 17 00:00:00 2001 From: TomNicholas Date: Tue, 20 Aug 2024 13:49:01 -0400 Subject: [PATCH 05/22] rewrite simple_datatree fixture to use DataTree.from_dict --- xarray/tests/conftest.py | 19 +++++++++++-------- xarray/tests/test_datatree.py | 24 ++++++++++++------------ 2 files changed, 23 insertions(+), 20 deletions(-) diff --git a/xarray/tests/conftest.py b/xarray/tests/conftest.py index a32b0e08bea..b8eeaf9f5d7 100644 --- a/xarray/tests/conftest.py +++ b/xarray/tests/conftest.py @@ -176,14 +176,17 @@ def _create_test_datatree(modify=lambda ds: ds): set2_data = modify(xr.Dataset({"a": ("x", [2, 3]), "b": ("x", [0.1, 0.2])})) root_data = modify(xr.Dataset({"a": ("y", [6, 7, 8]), "set0": ("x", [9, 10])})) - # Avoid using __init__ so we can independently test it - root: DataTree = DataTree(data=root_data) - set1: DataTree = DataTree(name="set1", parent=root, data=set1_data) - DataTree(name="set1", parent=set1) - DataTree(name="set2", parent=set1) - set2: DataTree = DataTree(name="set2", parent=root, data=set2_data) - DataTree(name="set1", parent=set2) - DataTree(name="set3", parent=root) + root = DataTree.from_dict( + { + "/": root_data, + "/set1": set1_data, + "/set1/set1": None, + "/set1/set2": None, + "/set2": set2_data, + "/set2/set1": None, + "/set3": None, + } + ) return root diff --git a/xarray/tests/test_datatree.py b/xarray/tests/test_datatree.py index bb671a4aaad..1c05ce17ad2 100644 --- a/xarray/tests/test_datatree.py +++ b/xarray/tests/test_datatree.py @@ -62,20 +62,20 @@ def test_create_two_children(self): DataTree(name="set2", parent=set1) def test_create_full_tree(self, simple_datatree): - root_data = xr.Dataset({"a": ("y", [6, 7, 8]), "set0": ("x", [9, 10])}) - set1_data = xr.Dataset({"a": 0, "b": 1}) - set2_data = xr.Dataset({"a": ("x", [2, 3]), "b": ("x", [0.1, 0.2])}) + d = simple_datatree.to_dict() + d_keys = list(d.keys()) - root: DataTree = DataTree(data=root_data) - set1: DataTree = DataTree(name="set1", parent=root, data=set1_data) - DataTree(name="set1", parent=set1) - DataTree(name="set2", parent=set1) - set2: DataTree = DataTree(name="set2", parent=root, data=set2_data) - DataTree(name="set1", parent=set2) - DataTree(name="set3", parent=root) + expected_keys = [ + "/", + "/set1", + "/set2", + "/set3", + "/set1/set1", + "/set1/set2", + "/set2/set1", + ] - expected = simple_datatree - assert root.identical(expected) + assert d_keys == expected_keys class TestNames: From a0806892a2ae785d164e6cbe9c5a0671b1557703 Mon Sep 17 00:00:00 2001 From: TomNicholas Date: Tue, 20 Aug 2024 13:53:04 -0400 Subject: [PATCH 06/22] fix incorrect creation of nested tree in formatting test --- xarray/tests/test_formatting.py | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) diff --git a/xarray/tests/test_formatting.py b/xarray/tests/test_formatting.py index 0bd8abc3a70..3e363e554af 100644 --- a/xarray/tests/test_formatting.py +++ b/xarray/tests/test_formatting.py @@ -657,8 +657,11 @@ def test_datatree_print_node_with_data(self): def test_datatree_printout_nested_node(self): dat = xr.Dataset({"a": [0, 2]}) - root: DataTree = DataTree(name="root") - DataTree(name="results", data=dat, parent=root) + root = DataTree.from_dict( + { + "/results": dat, + } + ) printout = str(root) assert printout.splitlines()[3].startswith(" ") From 2a6b88d942c2bdb5c6eaee173d1d45c123c7cdfd Mon Sep 17 00:00:00 2001 From: Matt Savoie Date: Mon, 2 Sep 2024 14:38:56 -0600 Subject: [PATCH 07/22] Update doctests for from_dict constructor --- xarray/core/datatree_render.py | 42 +++++++++++++++++----------------- xarray/core/iterators.py | 15 +++++------- 2 files changed, 27 insertions(+), 30 deletions(-) diff --git a/xarray/core/datatree_render.py b/xarray/core/datatree_render.py index 98cb4f91495..630c467d8cc 100644 --- a/xarray/core/datatree_render.py +++ b/xarray/core/datatree_render.py @@ -51,11 +51,13 @@ def __init__(self): >>> from xarray.core.datatree import DataTree >>> from xarray.core.datatree_render import RenderDataTree - >>> root = DataTree(name="root") - >>> s0 = DataTree(name="sub0", parent=root) - >>> s0b = DataTree(name="sub0B", parent=s0) - >>> s0a = DataTree(name="sub0A", parent=s0) - >>> s1 = DataTree(name="sub1", parent=root) + >>> root = DataTree.from_dict({ + ... "/": None, + ... "/sub0": None, + ... "/sub0/sub0B": None, + ... "/sub0/sub0A": None, + ... "/sub1" : None, + ... }, name="root") >>> print(RenderDataTree(root)) Group: / @@ -98,11 +100,13 @@ def __init__( >>> from xarray import Dataset >>> from xarray.core.datatree import DataTree >>> from xarray.core.datatree_render import RenderDataTree - >>> root = DataTree(name="root", data=Dataset({"a": 0, "b": 1})) - >>> s0 = DataTree(name="sub0", parent=root, data=Dataset({"c": 2, "d": 3})) - >>> s0b = DataTree(name="sub0B", parent=s0, data=Dataset({"e": 4})) - >>> s0a = DataTree(name="sub0A", parent=s0, data=Dataset({"f": 5, "g": 6})) - >>> s1 = DataTree(name="sub1", parent=root, data=Dataset({"h": 7})) + >>> root = DataTree.from_dict({ + ... "/": Dataset({"a": 0, "b": 1}), + ... "/sub0": Dataset({"c": 2, "d": 3}), + ... "/sub0/sub0B": Dataset({"e": 4}), + ... "/sub0/sub0A": Dataset({"f": 5, "g": 6}), + ... "/sub1": Dataset({"h": 7}) + ... },name="root") # Simple one line: @@ -208,17 +212,13 @@ def by_attr(self, attrname: str = "name") -> str: >>> from xarray import Dataset >>> from xarray.core.datatree import DataTree >>> from xarray.core.datatree_render import RenderDataTree - >>> root = DataTree(name="root") - >>> s0 = DataTree(name="sub0", parent=root) - >>> s0b = DataTree( - ... name="sub0B", parent=s0, data=Dataset({"foo": 4, "bar": 109}) - ... ) - >>> s0a = DataTree(name="sub0A", parent=s0) - >>> s1 = DataTree(name="sub1", parent=root) - >>> s1a = DataTree(name="sub1A", parent=s1) - >>> s1b = DataTree(name="sub1B", parent=s1, data=Dataset({"bar": 8})) - >>> s1c = DataTree(name="sub1C", parent=s1) - >>> s1ca = DataTree(name="sub1Ca", parent=s1c) + >>> root = DataTree.from_dict({ + ... "/sub0/sub0B": Dataset({"foo": 4, "bar": 109}), + ... "/sub0/sub0A": None, + ... "/sub1/sub1A" : None, + ... "/sub1/sub1B": Dataset({"bar": 8}), + ... "/sub1/sub1C/sub1Ca": None + ... }, name="root") >>> print(RenderDataTree(root).by_attr("name")) root ├── sub0 diff --git a/xarray/core/iterators.py b/xarray/core/iterators.py index 1ba3c6f1675..53d833ca22f 100644 --- a/xarray/core/iterators.py +++ b/xarray/core/iterators.py @@ -28,15 +28,12 @@ class LevelOrderIter(Iterator): -------- >>> from xarray.core.datatree import DataTree >>> from xarray.core.iterators import LevelOrderIter - >>> f = DataTree(name="f") - >>> b = DataTree(name="b", parent=f) - >>> a = DataTree(name="a", parent=b) - >>> d = DataTree(name="d", parent=b) - >>> c = DataTree(name="c", parent=d) - >>> e = DataTree(name="e", parent=d) - >>> g = DataTree(name="g", parent=f) - >>> i = DataTree(name="i", parent=g) - >>> h = DataTree(name="h", parent=i) + >>> f = DataTree.from_dict({ + ... "/b/a": None, + ... "/b/d/c": None, + ... "/b/d/e": None, + ... "/g/i/h": None + ... },name="f") >>> print(f) Group: / From 5ddcb444daeb0048330ee3e59e30f1df5608fdcf Mon Sep 17 00:00:00 2001 From: Matt Savoie Date: Mon, 2 Sep 2024 14:39:26 -0600 Subject: [PATCH 08/22] swap i and h in doctest example for clarity. --- xarray/core/iterators.py | 14 +++++++------- 1 file changed, 7 insertions(+), 7 deletions(-) diff --git a/xarray/core/iterators.py b/xarray/core/iterators.py index 53d833ca22f..c76e2abc3b5 100644 --- a/xarray/core/iterators.py +++ b/xarray/core/iterators.py @@ -32,7 +32,7 @@ class LevelOrderIter(Iterator): ... "/b/a": None, ... "/b/d/c": None, ... "/b/d/e": None, - ... "/g/i/h": None + ... "/g/h/i": None ... },name="f") >>> print(f) @@ -43,19 +43,19 @@ class LevelOrderIter(Iterator): │ ├── Group: /b/d/c │ └── Group: /b/d/e └── Group: /g - └── Group: /g/i - └── Group: /g/i/h + └── Group: /g/h + └── Group: /g/h/i >>> [node.name for node in LevelOrderIter(f)] - ['f', 'b', 'g', 'a', 'd', 'i', 'c', 'e', 'h'] + ['f', 'b', 'g', 'a', 'd', 'h', 'c', 'e', 'i'] >>> [node.name for node in LevelOrderIter(f, maxlevel=3)] - ['f', 'b', 'g', 'a', 'd', 'i'] + ['f', 'b', 'g', 'a', 'd', 'h'] >>> [ ... node.name ... for node in LevelOrderIter(f, filter_=lambda n: n.name not in ("e", "g")) ... ] - ['f', 'b', 'a', 'd', 'i', 'c', 'h'] + ['f', 'b', 'a', 'd', 'h', 'c', 'i'] >>> [node.name for node in LevelOrderIter(f, stop=lambda n: n.name == "d")] - ['f', 'b', 'g', 'a', 'i', 'h'] + ['f', 'b', 'g', 'a', 'h', 'i'] """ def __init__( From 6c496b9533f6afda2ae73d1d665e4c9c40517b89 Mon Sep 17 00:00:00 2001 From: "pre-commit-ci[bot]" <66853113+pre-commit-ci[bot]@users.noreply.github.com> Date: Mon, 2 Sep 2024 20:40:13 +0000 Subject: [PATCH 09/22] [pre-commit.ci] auto fixes from pre-commit.com hooks for more information, see https://pre-commit.ci --- xarray/core/datatree_render.py | 51 ++++++++++++++++++++-------------- xarray/core/iterators.py | 9 ++---- 2 files changed, 33 insertions(+), 27 deletions(-) diff --git a/xarray/core/datatree_render.py b/xarray/core/datatree_render.py index 630c467d8cc..47e0358588d 100644 --- a/xarray/core/datatree_render.py +++ b/xarray/core/datatree_render.py @@ -51,13 +51,16 @@ def __init__(self): >>> from xarray.core.datatree import DataTree >>> from xarray.core.datatree_render import RenderDataTree - >>> root = DataTree.from_dict({ - ... "/": None, - ... "/sub0": None, - ... "/sub0/sub0B": None, - ... "/sub0/sub0A": None, - ... "/sub1" : None, - ... }, name="root") + >>> root = DataTree.from_dict( + ... { + ... "/": None, + ... "/sub0": None, + ... "/sub0/sub0B": None, + ... "/sub0/sub0A": None, + ... "/sub1": None, + ... }, + ... name="root", + ... ) >>> print(RenderDataTree(root)) Group: / @@ -100,13 +103,16 @@ def __init__( >>> from xarray import Dataset >>> from xarray.core.datatree import DataTree >>> from xarray.core.datatree_render import RenderDataTree - >>> root = DataTree.from_dict({ - ... "/": Dataset({"a": 0, "b": 1}), - ... "/sub0": Dataset({"c": 2, "d": 3}), - ... "/sub0/sub0B": Dataset({"e": 4}), - ... "/sub0/sub0A": Dataset({"f": 5, "g": 6}), - ... "/sub1": Dataset({"h": 7}) - ... },name="root") + >>> root = DataTree.from_dict( + ... { + ... "/": Dataset({"a": 0, "b": 1}), + ... "/sub0": Dataset({"c": 2, "d": 3}), + ... "/sub0/sub0B": Dataset({"e": 4}), + ... "/sub0/sub0A": Dataset({"f": 5, "g": 6}), + ... "/sub1": Dataset({"h": 7}), + ... }, + ... name="root", + ... ) # Simple one line: @@ -212,13 +218,16 @@ def by_attr(self, attrname: str = "name") -> str: >>> from xarray import Dataset >>> from xarray.core.datatree import DataTree >>> from xarray.core.datatree_render import RenderDataTree - >>> root = DataTree.from_dict({ - ... "/sub0/sub0B": Dataset({"foo": 4, "bar": 109}), - ... "/sub0/sub0A": None, - ... "/sub1/sub1A" : None, - ... "/sub1/sub1B": Dataset({"bar": 8}), - ... "/sub1/sub1C/sub1Ca": None - ... }, name="root") + >>> root = DataTree.from_dict( + ... { + ... "/sub0/sub0B": Dataset({"foo": 4, "bar": 109}), + ... "/sub0/sub0A": None, + ... "/sub1/sub1A": None, + ... "/sub1/sub1B": Dataset({"bar": 8}), + ... "/sub1/sub1C/sub1Ca": None, + ... }, + ... name="root", + ... ) >>> print(RenderDataTree(root).by_attr("name")) root ├── sub0 diff --git a/xarray/core/iterators.py b/xarray/core/iterators.py index c76e2abc3b5..eeaeb35aa9c 100644 --- a/xarray/core/iterators.py +++ b/xarray/core/iterators.py @@ -28,12 +28,9 @@ class LevelOrderIter(Iterator): -------- >>> from xarray.core.datatree import DataTree >>> from xarray.core.iterators import LevelOrderIter - >>> f = DataTree.from_dict({ - ... "/b/a": None, - ... "/b/d/c": None, - ... "/b/d/e": None, - ... "/g/h/i": None - ... },name="f") + >>> f = DataTree.from_dict( + ... {"/b/a": None, "/b/d/c": None, "/b/d/e": None, "/g/h/i": None}, name="f" + ... ) >>> print(f) Group: / From 07d69040921f521967196c74c2fe53c6a340ad2d Mon Sep 17 00:00:00 2001 From: Matt Savoie Date: Mon, 2 Sep 2024 15:30:55 -0600 Subject: [PATCH 10/22] Fix a few mypy errors. --- xarray/tests/test_datatree.py | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/xarray/tests/test_datatree.py b/xarray/tests/test_datatree.py index 9cd524c144e..15e0fd6b7c0 100644 --- a/xarray/tests/test_datatree.py +++ b/xarray/tests/test_datatree.py @@ -37,13 +37,13 @@ def test_bad_names(self): class TestFamilyTree: def test_dont_modify_parent_inplace(self): # GH issue 9196 - root = DataTree() + root: DataTree = DataTree() DataTree(name="child", parent=root) assert root.children == {} def test_dont_modify_children_inplace(self): # GH issue 9196 - child = DataTree() + child: DataTree = DataTree() DataTree(children={"child": child}) assert child.parent is None @@ -422,7 +422,7 @@ def test_setitem_unnamed_child_node_becomes_named(self): def test_setitem_new_grandchild_node(self): john = DataTree.from_dict({"/Mary/Rose": DataTree()}) - new_rose = DataTree(data=xr.Dataset({"x": 0})) + new_rose: DataTree = DataTree(data=xr.Dataset({"x": 0})) john["Mary/Rose"] = new_rose grafted_rose = john["Mary/Rose"] From 015d2ce9710cdf39ade1633942f7a2b302ba67a3 Mon Sep 17 00:00:00 2001 From: Matt Savoie Date: Mon, 2 Sep 2024 15:50:18 -0600 Subject: [PATCH 11/22] Bonkers way to set type checking I will happily take something better. But this was the error I was getting xarray/tests/test_datatree.py:127: error: Argument 1 to "relative_to" of "NamedNode" has incompatible type "DataTree[Any] | DataArray"; expected "NamedNode[Any]" [arg-type] --- xarray/tests/test_datatree.py | 11 ++++++++--- 1 file changed, 8 insertions(+), 3 deletions(-) diff --git a/xarray/tests/test_datatree.py b/xarray/tests/test_datatree.py index 15e0fd6b7c0..e1ddf8795d7 100644 --- a/xarray/tests/test_datatree.py +++ b/xarray/tests/test_datatree.py @@ -113,14 +113,19 @@ def test_same_tree(self): assert john["/Mary"].same_tree(john["/Kate"]) def test_relative_paths(self): - john = DataTree.from_dict( + john: DataTree = DataTree.from_dict( { "/Mary/Sue": DataTree(), "/Annie": DataTree(), } ) - sue = john["Mary/Sue"] - annie = john["Annie"] + sue_result = john["Mary/Sue"] + if isinstance(sue_result, DataTree): + sue: DataTree = sue_result + + annie_result = john["Annie"] + if isinstance(annie_result, DataTree): + annie : DataTree = annie_result assert sue.relative_to(john) == "Mary/Sue" assert john.relative_to(sue) == "../.." From 847f238b66515f75faa8b899a2c0aa225cb9d576 Mon Sep 17 00:00:00 2001 From: "pre-commit-ci[bot]" <66853113+pre-commit-ci[bot]@users.noreply.github.com> Date: Mon, 2 Sep 2024 21:51:45 +0000 Subject: [PATCH 12/22] [pre-commit.ci] auto fixes from pre-commit.com hooks for more information, see https://pre-commit.ci --- xarray/tests/test_datatree.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/xarray/tests/test_datatree.py b/xarray/tests/test_datatree.py index e1ddf8795d7..7e6218f14f0 100644 --- a/xarray/tests/test_datatree.py +++ b/xarray/tests/test_datatree.py @@ -125,7 +125,7 @@ def test_relative_paths(self): annie_result = john["Annie"] if isinstance(annie_result, DataTree): - annie : DataTree = annie_result + annie: DataTree = annie_result assert sue.relative_to(john) == "Mary/Sue" assert john.relative_to(sue) == "../.." From 7c530f5a3c09c7331b30db6bb7e5a790c754e192 Mon Sep 17 00:00:00 2001 From: Matt Savoie Date: Mon, 2 Sep 2024 18:25:11 -0600 Subject: [PATCH 13/22] Removes parent keyword from DataTree constructor But it doesn't fix all the tests There's three tests that I don't fully know what should be tested or if they still make sense. --- xarray/core/datatree.py | 6 +-- xarray/tests/test_datatree.py | 99 +++++++++++++++++++++-------------- 2 files changed, 62 insertions(+), 43 deletions(-) diff --git a/xarray/core/datatree.py b/xarray/core/datatree.py index a9085d410b6..92089532502 100644 --- a/xarray/core/datatree.py +++ b/xarray/core/datatree.py @@ -424,7 +424,6 @@ class DataTree( def __init__( self, data: Dataset | DataArray | None = None, - parent: DataTree | None = None, children: Mapping[str, DataTree] | None = None, name: str | None = None, ): @@ -440,8 +439,6 @@ def __init__( data : Dataset, DataArray, or None, optional Data to store under the .ds attribute of this node. DataArrays will be promoted to Datasets. Default is None. - parent : DataTree, optional - Parent node to this node. Default is None. children : Mapping[str, DataTree], optional Any child nodes of this node. Default is None. name : str, optional @@ -462,7 +459,6 @@ def __init__( self._set_node_data(_coerce_to_dataset(data)) # shallow copy to avoid modifying arguments in-place (see GH issue #9196) - self.parent = parent.copy() if parent is not None else None self.children = {name: child.copy() for name, child in children.items()} def _set_node_data(self, ds: Dataset): @@ -1100,7 +1096,7 @@ def from_dict( obj = root_data.copy() obj.orphan() else: - obj = cls(name=name, data=root_data, parent=None, children=None) + obj = cls(name=name, data=root_data, children=None) def depth(item) -> int: pathstr, _ = item diff --git a/xarray/tests/test_datatree.py b/xarray/tests/test_datatree.py index 7e6218f14f0..9d4342ff523 100644 --- a/xarray/tests/test_datatree.py +++ b/xarray/tests/test_datatree.py @@ -55,7 +55,9 @@ def test_setparent_unnamed_child_node_fails(self): def test_create_two_children(self): root_data = xr.Dataset({"a": ("y", [6, 7, 8]), "set0": ("x", [9, 10])}) set1_data = xr.Dataset({"a": 0, "b": 1}) - + # root = DataTree.from_dict( + # {"/": root_data, "/set1": set1_data, "/set1/set2": None} + # ) root: DataTree = DataTree(data=root_data) set1: DataTree = DataTree(name="set1", parent=root, data=set1_data) DataTree(name="set1", parent=root) @@ -195,9 +197,13 @@ def test_to_dataset(self): class TestVariablesChildrenNameCollisions: def test_parent_already_has_variable_with_childs_name(self): - dt: DataTree = DataTree(data=xr.Dataset({"a": [0], "b": 1})) with pytest.raises(KeyError, match="already contains a variable named a"): - DataTree(name="a", data=None, parent=dt) + DataTree.from_dict({"/": xr.Dataset({"a": [0], "b": 1}), "/a": None}) + + def test_parent_already_has_variable_with_childs_name_update(self): + dt: DataTree = DataTree(data=xr.Dataset({"a": [0], "b": 1})) + with pytest.raises(ValueError, match="already contains a variable named a"): + dt.update({"a": DataTree()}) def test_assign_when_already_child_with_variables_name(self): dt = DataTree.from_dict( @@ -249,8 +255,7 @@ def test_getitem_single_data_variable_from_node(self): assert_identical(folder1["results/highres/temp"], data["temp"]) def test_getitem_nonexistent_node(self): - folder1: DataTree = DataTree(name="folder1") - DataTree(name="results", parent=folder1) + folder1: DataTree = DataTree.from_dict({"/results": DataTree()}, name="folder1") with pytest.raises(KeyError): folder1["results/highres"] @@ -448,10 +453,10 @@ def test_setitem_new_empty_node(self): assert_identical(mary.to_dataset(), xr.Dataset()) def test_setitem_overwrite_data_in_node_with_none(self): - john: DataTree = DataTree(name="john") - mary: DataTree = DataTree(name="mary", parent=john, data=xr.Dataset()) + john: DataTree = DataTree.from_dict({"/mary": xr.Dataset()}, name="john") + john["mary"] = DataTree() - assert_identical(mary.to_dataset(), xr.Dataset()) + assert_identical(john["mary"].to_dataset(), xr.Dataset()) john.ds = xr.Dataset() # type: ignore[assignment] with pytest.raises(ValueError, match="has no name"): @@ -633,8 +638,13 @@ def test_view_contents(self): def test_immutability(self): # See issue https://github.com/xarray-contrib/datatree/issues/38 - dt: DataTree = DataTree(name="root", data=None) - DataTree(name="a", data=None, parent=dt) + dt = DataTree.from_dict( + { + "/": None, + "/a": None, + }, + name="root", + ) with pytest.raises( AttributeError, match="Mutation of the DatasetView is not allowed" @@ -1087,44 +1097,51 @@ def test_filter(self): class TestDSMethodInheritance: def test_dataset_method(self): ds = xr.Dataset({"a": ("x", [1, 2, 3])}) - dt: DataTree = DataTree(data=ds) - DataTree(name="results", parent=dt, data=ds) + dt = DataTree.from_dict( + { + "/": ds, + "/results": ds, + } + ) - expected: DataTree = DataTree(data=ds.isel(x=1)) - DataTree(name="results", parent=expected, data=ds.isel(x=1)) + expected = DataTree.from_dict( + { + "/": ds.isel(x=1), + "/results": ds.isel(x=1), + } + ) result = dt.isel(x=1) assert_equal(result, expected) def test_reduce_method(self): ds = xr.Dataset({"a": ("x", [False, True, False])}) - dt: DataTree = DataTree(data=ds) - DataTree(name="results", parent=dt, data=ds) + dt = DataTree.from_dict({"/": ds, "/results": ds}) - expected: DataTree = DataTree(data=ds.any()) - DataTree(name="results", parent=expected, data=ds.any()) + expected = DataTree.from_dict({"/": ds.any(), "/results": ds.any()}) result = dt.any() assert_equal(result, expected) def test_nan_reduce_method(self): ds = xr.Dataset({"a": ("x", [1, 2, 3])}) - dt: DataTree = DataTree(data=ds) - DataTree(name="results", parent=dt, data=ds) + dt = DataTree.from_dict({"/": ds, "/results": ds}) - expected: DataTree = DataTree(data=ds.mean()) - DataTree(name="results", parent=expected, data=ds.mean()) + expected = DataTree.from_dict({"/": ds.mean(), "/results": ds.mean()}) result = dt.mean() assert_equal(result, expected) def test_cum_method(self): ds = xr.Dataset({"a": ("x", [1, 2, 3])}) - dt: DataTree = DataTree(data=ds) - DataTree(name="results", parent=dt, data=ds) + dt = DataTree.from_dict({"/": ds, "/results": ds}) - expected: DataTree = DataTree(data=ds.cumsum()) - DataTree(name="results", parent=expected, data=ds.cumsum()) + expected = DataTree.from_dict( + { + "/": ds.cumsum(), + "/results": ds.cumsum(), + } + ) result = dt.cumsum() assert_equal(result, expected) @@ -1134,11 +1151,9 @@ class TestOps: def test_binary_op_on_int(self): ds1 = xr.Dataset({"a": [5], "b": [3]}) ds2 = xr.Dataset({"x": [0.1, 0.2], "y": [10, 20]}) - dt: DataTree = DataTree(data=ds1) - DataTree(name="subnode", data=ds2, parent=dt) + dt = DataTree.from_dict({"/": ds1, "/subnode": ds2}) - expected: DataTree = DataTree(data=ds1 * 5) - DataTree(name="subnode", data=ds2 * 5, parent=expected) + expected = DataTree.from_dict({"/": ds1 * 5, "/subnode": ds2 * 5}) # TODO: Remove ignore when ops.py is migrated? result: DataTree = dt * 5 # type: ignore[assignment,operator] @@ -1147,12 +1162,21 @@ def test_binary_op_on_int(self): def test_binary_op_on_dataset(self): ds1 = xr.Dataset({"a": [5], "b": [3]}) ds2 = xr.Dataset({"x": [0.1, 0.2], "y": [10, 20]}) - dt: DataTree = DataTree(data=ds1) - DataTree(name="subnode", data=ds2, parent=dt) + dt = DataTree.from_dict( + { + "/": ds1, + "/subnode": ds2, + } + ) + other_ds = xr.Dataset({"z": ("z", [0.1, 0.2])}) - expected: DataTree = DataTree(data=ds1 * other_ds) - DataTree(name="subnode", data=ds2 * other_ds, parent=expected) + expected = DataTree.from_dict( + { + "/": ds1 * other_ds, + "/subnode": ds2 * other_ds, + } + ) result = dt * other_ds assert_equal(result, expected) @@ -1160,11 +1184,10 @@ def test_binary_op_on_dataset(self): def test_binary_op_on_datatree(self): ds1 = xr.Dataset({"a": [5], "b": [3]}) ds2 = xr.Dataset({"x": [0.1, 0.2], "y": [10, 20]}) - dt: DataTree = DataTree(data=ds1) - DataTree(name="subnode", data=ds2, parent=dt) - expected: DataTree = DataTree(data=ds1 * ds1) - DataTree(name="subnode", data=ds2 * ds2, parent=expected) + dt = DataTree.from_dict({"/": ds1, "/subnode": ds2}) + + expected = DataTree.from_dict({"/": ds1 * ds1, "/subnode": ds2 * ds2}) # TODO: Remove ignore when ops.py is migrated? result: DataTree = dt * dt # type: ignore[operator] From 9a004d4cf7a9aae7575f2ae0592d923c022f18b5 Mon Sep 17 00:00:00 2001 From: Matt Savoie Date: Mon, 2 Sep 2024 20:04:50 -0600 Subject: [PATCH 14/22] fix test_setparent_unnamed_child_node_fails --- xarray/tests/test_datatree.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/xarray/tests/test_datatree.py b/xarray/tests/test_datatree.py index 9d4342ff523..1a83c73e59c 100644 --- a/xarray/tests/test_datatree.py +++ b/xarray/tests/test_datatree.py @@ -50,7 +50,8 @@ def test_dont_modify_children_inplace(self): def test_setparent_unnamed_child_node_fails(self): john: DataTree = DataTree(name="john") with pytest.raises(ValueError, match="unnamed"): - DataTree(parent=john) + child = DataTree(name=None) + child.parent = john def test_create_two_children(self): root_data = xr.Dataset({"a": ("y", [6, 7, 8]), "set0": ("x", [9, 10])}) From fe5ae9c6d5b5e824fe9874efa052c3adcd88f5e0 Mon Sep 17 00:00:00 2001 From: Matt Savoie Date: Mon, 2 Sep 2024 20:21:15 -0600 Subject: [PATCH 15/22] fix test_dont_modify_parent_inplace -> bug? --- xarray/tests/test_datatree.py | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/xarray/tests/test_datatree.py b/xarray/tests/test_datatree.py index 1a83c73e59c..f1310a449b1 100644 --- a/xarray/tests/test_datatree.py +++ b/xarray/tests/test_datatree.py @@ -37,8 +37,9 @@ def test_bad_names(self): class TestFamilyTree: def test_dont_modify_parent_inplace(self): # GH issue 9196 - root: DataTree = DataTree() - DataTree(name="child", parent=root) + root: DataTree = DataTree(name="root") + child: DataTree = DataTree(name="child") + child.parent = root assert root.children == {} def test_dont_modify_children_inplace(self): From 7ce6b56a303922323781f2e1def2a1a09a1b2d7b Mon Sep 17 00:00:00 2001 From: TomNicholas Date: Sat, 7 Sep 2024 10:24:07 -0400 Subject: [PATCH 16/22] fix test_create_two_children --- xarray/tests/test_datatree.py | 12 +++++------- 1 file changed, 5 insertions(+), 7 deletions(-) diff --git a/xarray/tests/test_datatree.py b/xarray/tests/test_datatree.py index f1310a449b1..184a773098d 100644 --- a/xarray/tests/test_datatree.py +++ b/xarray/tests/test_datatree.py @@ -57,13 +57,11 @@ def test_setparent_unnamed_child_node_fails(self): def test_create_two_children(self): root_data = xr.Dataset({"a": ("y", [6, 7, 8]), "set0": ("x", [9, 10])}) set1_data = xr.Dataset({"a": 0, "b": 1}) - # root = DataTree.from_dict( - # {"/": root_data, "/set1": set1_data, "/set1/set2": None} - # ) - root: DataTree = DataTree(data=root_data) - set1: DataTree = DataTree(name="set1", parent=root, data=set1_data) - DataTree(name="set1", parent=root) - DataTree(name="set2", parent=set1) + root = DataTree.from_dict( + {"/": root_data, "/set1": set1_data, "/set1/set2": None} + ) + assert root["/set1"].name == "set1" + assert root["/set1/set2"].name == "set2" def test_create_full_tree(self, simple_datatree): d = simple_datatree.to_dict() From 897b589239311e590a6d12fb287120c904c7f7a2 Mon Sep 17 00:00:00 2001 From: TomNicholas Date: Sat, 7 Sep 2024 10:49:17 -0400 Subject: [PATCH 17/22] make .parent read-only, and remove tests which test the parent setter --- xarray/core/datatree.py | 8 +------- xarray/core/treenode.py | 6 ++++++ xarray/tests/test_datatree.py | 13 ------------- 3 files changed, 7 insertions(+), 20 deletions(-) diff --git a/xarray/core/datatree.py b/xarray/core/datatree.py index 382f67b1f58..5d848c6bcfd 100644 --- a/xarray/core/datatree.py +++ b/xarray/core/datatree.py @@ -500,12 +500,6 @@ def parent(self: DataTree) -> DataTree | None: """Parent of this node.""" return self._parent - @parent.setter - def parent(self: DataTree, new_parent: DataTree) -> None: - if new_parent and self.name is None: - raise ValueError("Cannot set an unnamed node as a child of another node") - self._set_parent(new_parent, self.name) - def _to_dataset_view(self, rebuild_dims: bool) -> DatasetView: variables = dict(self._data_variables) variables |= self._coord_variables @@ -894,7 +888,7 @@ def _set(self, key: str, val: DataTree | CoercibleValue) -> None: # create and assign a shallow copy here so as not to alter original name of node in grafted tree new_node = val.copy(deep=False) new_node.name = key - new_node.parent = self + new_node._set_parent(new_parent=self, child_name=key) else: if not isinstance(val, DataArray | Variable): # accommodate other types that can be coerced into Variables diff --git a/xarray/core/treenode.py b/xarray/core/treenode.py index 77e7ed23a51..23ebc10d96a 100644 --- a/xarray/core/treenode.py +++ b/xarray/core/treenode.py @@ -86,6 +86,12 @@ def parent(self) -> Tree | None: """Parent of this node.""" return self._parent + @parent.setter + def parent(self: Tree, new_parent: Tree) -> None: + raise AttributeError( + "Cannot set parent attribute directly, you must modify the children attribute of the other node instead" + ) + def _set_parent( self, new_parent: Tree | None, child_name: str | None = None ) -> None: diff --git a/xarray/tests/test_datatree.py b/xarray/tests/test_datatree.py index 184a773098d..ee1f470f8dd 100644 --- a/xarray/tests/test_datatree.py +++ b/xarray/tests/test_datatree.py @@ -35,25 +35,12 @@ def test_bad_names(self): class TestFamilyTree: - def test_dont_modify_parent_inplace(self): - # GH issue 9196 - root: DataTree = DataTree(name="root") - child: DataTree = DataTree(name="child") - child.parent = root - assert root.children == {} - def test_dont_modify_children_inplace(self): # GH issue 9196 child: DataTree = DataTree() DataTree(children={"child": child}) assert child.parent is None - def test_setparent_unnamed_child_node_fails(self): - john: DataTree = DataTree(name="john") - with pytest.raises(ValueError, match="unnamed"): - child = DataTree(name=None) - child.parent = john - def test_create_two_children(self): root_data = xr.Dataset({"a": ("y", [6, 7, 8]), "set0": ("x", [9, 10])}) set1_data = xr.Dataset({"a": 0, "b": 1}) From 9938f3b0c04e288893949000ed2d9aa7f3c5a3dd Mon Sep 17 00:00:00 2001 From: TomNicholas Date: Sat, 7 Sep 2024 11:01:16 -0400 Subject: [PATCH 18/22] update error message to reflect fact that .children is Frozen --- xarray/core/treenode.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/xarray/core/treenode.py b/xarray/core/treenode.py index 23ebc10d96a..9dfd346508a 100644 --- a/xarray/core/treenode.py +++ b/xarray/core/treenode.py @@ -89,7 +89,7 @@ def parent(self) -> Tree | None: @parent.setter def parent(self: Tree, new_parent: Tree) -> None: raise AttributeError( - "Cannot set parent attribute directly, you must modify the children attribute of the other node instead" + "Cannot set parent attribute directly, you must modify the children of the other node instead using dict-like syntax" ) def _set_parent( From cb7fdfafe3dc043196fa0ea2fa19d4dcbcd7f839 Mon Sep 17 00:00:00 2001 From: TomNicholas Date: Sat, 7 Sep 2024 11:03:02 -0400 Subject: [PATCH 19/22] fix another test --- xarray/tests/test_datatree_mapping.py | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) diff --git a/xarray/tests/test_datatree_mapping.py b/xarray/tests/test_datatree_mapping.py index b8b55613c4a..62fada4ab4f 100644 --- a/xarray/tests/test_datatree_mapping.py +++ b/xarray/tests/test_datatree_mapping.py @@ -74,10 +74,9 @@ def test_checking_from_root(self, create_test_datatree): dt1 = create_test_datatree() dt2 = create_test_datatree() real_root: DataTree = DataTree(name="real root") - dt2.name = "not_real_root" - dt2.parent = real_root + real_root["not_real_root"] = dt2 with pytest.raises(TreeIsomorphismError): - check_isomorphic(dt1, dt2, check_from_root=True) + check_isomorphic(dt1, real_root, check_from_root=True) class TestMapOverSubTree: From 1f651cb4040cf70c3cb7ecc11e02660f3079ce72 Mon Sep 17 00:00:00 2001 From: TomNicholas Date: Sat, 7 Sep 2024 11:11:59 -0400 Subject: [PATCH 20/22] add test that parent setter tells you to set children instead --- xarray/tests/test_treenode.py | 9 +++++++++ 1 file changed, 9 insertions(+) diff --git a/xarray/tests/test_treenode.py b/xarray/tests/test_treenode.py index a7de2e39af6..3996edba659 100644 --- a/xarray/tests/test_treenode.py +++ b/xarray/tests/test_treenode.py @@ -55,6 +55,15 @@ def test_parent_swap(self): assert steve.children["Mary"] is mary assert "Mary" not in john.children + def test_forbid_setting_parent_directly(self): + john: TreeNode = TreeNode() + mary: TreeNode = TreeNode() + + with pytest.raises( + AttributeError, match="Cannot set parent attribute directly" + ): + mary.parent = john + def test_multi_child_family(self): mary: TreeNode = TreeNode() kate: TreeNode = TreeNode() From 80b4bcdd63ad49c3d6af75f8248f436e7138dff5 Mon Sep 17 00:00:00 2001 From: TomNicholas Date: Sat, 7 Sep 2024 11:25:53 -0400 Subject: [PATCH 21/22] fix mypy error due to overriding settable property with read-only property --- xarray/core/datatree.py | 5 ----- 1 file changed, 5 deletions(-) diff --git a/xarray/core/datatree.py b/xarray/core/datatree.py index 5d848c6bcfd..3a3fb19daa4 100644 --- a/xarray/core/datatree.py +++ b/xarray/core/datatree.py @@ -495,11 +495,6 @@ def _dims(self) -> ChainMap[Hashable, int]: def _indexes(self) -> ChainMap[Hashable, Index]: return ChainMap(self._node_indexes, *(p._node_indexes for p in self.parents)) - @property - def parent(self: DataTree) -> DataTree | None: - """Parent of this node.""" - return self._parent - def _to_dataset_view(self, rebuild_dims: bool) -> DatasetView: variables = dict(self._data_variables) variables |= self._coord_variables From f4c7243f9cdb7d7514b3e1acea258251d3c4aadf Mon Sep 17 00:00:00 2001 From: TomNicholas Date: Sat, 7 Sep 2024 11:26:26 -0400 Subject: [PATCH 22/22] fix test by not trying to set parent via kwarg --- xarray/tests/test_datatree.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/xarray/tests/test_datatree.py b/xarray/tests/test_datatree.py index ee1f470f8dd..1a840dbb9e4 100644 --- a/xarray/tests/test_datatree.py +++ b/xarray/tests/test_datatree.py @@ -137,7 +137,7 @@ def test_create_with_data(self): assert_identical(john.to_dataset(), dat) with pytest.raises(TypeError): - DataTree(name="mary", parent=john, data="junk") # type: ignore[arg-type] + DataTree(name="mary", data="junk") # type: ignore[arg-type] def test_set_data(self): john: DataTree = DataTree(name="john")