From 4f2dd702e89e032ce21ae28ab8b7e3f81ce111ed Mon Sep 17 00:00:00 2001 From: Brian Zhang Date: Wed, 7 Apr 2021 17:36:29 -0400 Subject: [PATCH 1/3] Compress top-level metadata --- tests/test_compression.py | 10 ++++++++++ tszip/compression.py | 7 +++++++ 2 files changed, 17 insertions(+) diff --git a/tests/test_compression.py b/tests/test_compression.py index 2fb50c5..cbbad4a 100644 --- a/tests/test_compression.py +++ b/tests/test_compression.py @@ -113,6 +113,16 @@ def test_small_msprime_migration(self): self.assertGreater(ts.num_trees, 2) self.verify(ts) + def test_small_msprime_top_level_metadata(self): + ts = msprime.simulate(10, recombination_rate=2, mutation_rate=2, random_seed=2) + self.assertGreater(ts.num_sites, 2) + self.assertGreater(ts.num_trees, 2) + tables = ts.dump_tables() + top_level_schema = {'codec': 'json', 'properties': {'my_int': {'type': 'integer'}}} + tables.metadata_schema = tskit.MetadataSchema(top_level_schema) + tables.metadata = {"my_int": 1234} + self.verify(tables.tree_sequence()) + def test_small_msprime_individuals_metadata(self): ts = msprime.simulate(10, recombination_rate=1, mutation_rate=2, random_seed=2) self.assertGreater(ts.num_sites, 2) diff --git a/tszip/compression.py b/tszip/compression.py index 759264b..6478792 100644 --- a/tszip/compression.py +++ b/tszip/compression.py @@ -172,6 +172,9 @@ def compress_zarr(ts, root, variants_only=False): root.attrs["format_version"] = FORMAT_VERSION root.attrs["sequence_length"] = tables.sequence_length root.attrs["provenance"] = provenance_dict + if tables.metadata_schema.schema is not None: + root.attrs["metadata_schema"] = tables.metadata_schema.schema + root.attrs["metadata"] = tables.metadata columns = [ Column("coordinates", coordinates), @@ -280,6 +283,10 @@ def load_zarr(path): def decompress_zarr(root): tables = tskit.TableCollection(root.attrs["sequence_length"]) coordinates = root["coordinates"][:] + if "metadata_schema" in root.attrs: + tables.metadata_schema = tskit.MetadataSchema(root.attrs["metadata_schema"]) + if "metadata" in root.attrs: + tables.metadata = root.attrs["metadata"] tables.individuals.set_columns( flags=root["individuals/flags"], From ab27282616edfc8e4f20bc8c42ef008255ad42aa Mon Sep 17 00:00:00 2001 From: Brian Zhang Date: Wed, 7 Apr 2021 17:46:06 -0400 Subject: [PATCH 2/3] Minor change to decompression --- tszip/compression.py | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/tszip/compression.py b/tszip/compression.py index 6478792..41d430a 100644 --- a/tszip/compression.py +++ b/tszip/compression.py @@ -283,9 +283,8 @@ def load_zarr(path): def decompress_zarr(root): tables = tskit.TableCollection(root.attrs["sequence_length"]) coordinates = root["coordinates"][:] - if "metadata_schema" in root.attrs: + if "metadata_schema" in root.attrs and "metadata" in root.attrs: tables.metadata_schema = tskit.MetadataSchema(root.attrs["metadata_schema"]) - if "metadata" in root.attrs: tables.metadata = root.attrs["metadata"] tables.individuals.set_columns( From 45ec7c14e4d8790b2937bbd000ba4d1d73afafcf Mon Sep 17 00:00:00 2001 From: Brian Zhang Date: Wed, 7 Apr 2021 17:48:27 -0400 Subject: [PATCH 3/3] Adhere to 90-char line limit --- tests/test_compression.py | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/tests/test_compression.py b/tests/test_compression.py index cbbad4a..709f2cf 100644 --- a/tests/test_compression.py +++ b/tests/test_compression.py @@ -118,7 +118,10 @@ def test_small_msprime_top_level_metadata(self): self.assertGreater(ts.num_sites, 2) self.assertGreater(ts.num_trees, 2) tables = ts.dump_tables() - top_level_schema = {'codec': 'json', 'properties': {'my_int': {'type': 'integer'}}} + top_level_schema = { + 'codec': 'json', + 'properties': {'my_int': {'type': 'integer'}} + } tables.metadata_schema = tskit.MetadataSchema(top_level_schema) tables.metadata = {"my_int": 1234} self.verify(tables.tree_sequence())