Skip to content

Commit 06361aa

Browse files
committed
add vectorised metadata, closes #1676
1 parent 9cd4473 commit 06361aa

File tree

2 files changed

+44
-0
lines changed

2 files changed

+44
-0
lines changed

python/tests/test_tables.py

Lines changed: 25 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1112,6 +1112,31 @@ def test_set_columns_metadata_schema(self):
11121112
table.set_columns(**table2.asdict())
11131113
assert table.metadata_schema == ms
11141114

1115+
def test_metadata_vector(self):
1116+
table = self.table_class()
1117+
ms = tskit.MetadataSchema({"codec": "json"})
1118+
table.metadata_schema = ms
1119+
metadata_list = [
1120+
{"a": 4},
1121+
{"a": 10},
1122+
{"a": -3, "b": {"c": 1}},
1123+
{"b": {"c": 3.2}},
1124+
{},
1125+
]
1126+
for md in metadata_list:
1127+
table.add_row(
1128+
**{
1129+
**self.input_data_for_add_row(),
1130+
"metadata": md,
1131+
}
1132+
)
1133+
default_value = -1
1134+
for key in ["a", ["b", "c"]]:
1135+
assert np.equal(
1136+
[md.get(key, default=default_value) for md in metadata_list],
1137+
table.metadata_vector(key, default_value=default_value),
1138+
)
1139+
11151140

11161141
class AssertEqualsMixin:
11171142
def test_equal(self, table_5row, test_rows):

python/tskit/tables.py

Lines changed: 19 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -585,6 +585,25 @@ def _update_metadata_schema_cache_from_ll(self) -> None:
585585
self.ll_table.metadata_schema
586586
)
587587

588+
def metadata_vector(self, key, *, dtype=None, default_value=None):
589+
"""
590+
Returns a numpy array of metadata values obtained by extracting ``key``
591+
from each metadata entry, and inserting ``default_value`` if the key is
592+
not present. ``key`` may be a list, in which case nested values are returned.
593+
For instance, ``key = ["a", "x"]`` will return an array of
594+
``row.metadata["a"]["x"]`` values, iterated over rows in this table.
595+
596+
:param str key: The name, or a list of names, of metadata entries.
597+
:param dtype dtype: The dtype of the result (can usually be omitted).
598+
:param object default_value: The value to be inserted if the metadata key
599+
is not present. Note that for numeric columns, a default value of None
600+
will result in a non-numeric array.
601+
"""
602+
return np.fromiter(
603+
[row.metadata.get(key, default=default_value) for row in self],
604+
dtype=dtype,
605+
)
606+
588607

589608
class IndividualTable(BaseTable, MetadataMixin):
590609
"""

0 commit comments

Comments
 (0)