Skip to content

Commit 1fbb102

Browse files
committed
Don't sort individuals by default
1 parent 6142f7f commit 1fbb102

File tree

12 files changed

+101
-54
lines changed

12 files changed

+101
-54
lines changed

c/tests/test_tables.c

Lines changed: 15 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -6280,10 +6280,10 @@ test_sort_tables_offsets(void)
62806280
CU_ASSERT_EQUAL_FATAL(ret, 0);
62816281
CU_ASSERT_TRUE(tsk_table_collection_equals(&tables, &copy, 0));
62826282

6283-
/* Check that sorting would have had an effect */
6283+
/* Check that sorting would have had no effect as individuals not in default sort*/
62846284
ret = tsk_table_collection_sort(&tables, NULL, 0);
62856285
CU_ASSERT_EQUAL_FATAL(ret, 0);
6286-
CU_ASSERT_FALSE(tsk_table_collection_equals(&tables, &copy, 0));
6286+
CU_ASSERT_TRUE(tsk_table_collection_equals(&tables, &copy, 0));
62876287

62886288
tsk_memset(&bookmark, 0, sizeof(bookmark));
62896289
bookmark.individuals = tables.individuals.num_rows - 1;
@@ -6807,12 +6807,21 @@ test_sort_tables_individuals(void)
68076807
CU_ASSERT_EQUAL_FATAL(ret, 0);
68086808
tables.sequence_length = 1.0;
68096809
parse_individuals(individuals, &tables.individuals);
6810+
6811+
ret = tsk_table_collection_copy(&tables, &copy, 0);
6812+
CU_ASSERT_EQUAL_FATAL(ret, 0);
6813+
6814+
/* Table sort doesn't touch individuals by default*/
6815+
ret = tsk_table_collection_sort(&tables, NULL, 0);
6816+
CU_ASSERT_EQUAL_FATAL(ret, 0);
6817+
CU_ASSERT_FATAL(tsk_table_collection_equals(&tables, &copy, 0));
6818+
68106819
/* Not calling with TSK_CHECK_TREES so casting is safe */
68116820
ret = (int) tsk_table_collection_check_integrity(
68126821
&tables, TSK_CHECK_INDIVIDUAL_ORDERING);
68136822
CU_ASSERT_EQUAL_FATAL(ret, TSK_ERR_UNSORTED_INDIVIDUALS);
68146823

6815-
ret = tsk_table_collection_sort(&tables, NULL, 0);
6824+
ret = tsk_table_collection_individual_topological_sort(&tables, 0);
68166825
CU_ASSERT_EQUAL_FATAL(ret, 0);
68176826
ret = (int) tsk_table_collection_check_integrity(
68186827
&tables, TSK_CHECK_INDIVIDUAL_ORDERING);
@@ -6821,16 +6830,14 @@ test_sort_tables_individuals(void)
68216830
/* Check that the sort is stable */
68226831
ret = tsk_table_collection_copy(&tables, &copy, 0);
68236832
CU_ASSERT_EQUAL_FATAL(ret, 0);
6824-
CU_ASSERT_FATAL(tsk_table_collection_equals(&tables, &copy, 0));
6825-
6826-
ret = tsk_table_collection_sort(&tables, NULL, 0);
6833+
ret = tsk_table_collection_individual_topological_sort(&tables, 0);
68276834
CU_ASSERT_EQUAL_FATAL(ret, 0);
6828-
CU_ASSERT(tsk_table_collection_equals(&tables, &copy, 0));
6835+
CU_ASSERT_FATAL(tsk_table_collection_equals(&tables, &copy, 0));
68296836

68306837
/* Errors on cycle */
68316838
tsk_individual_table_clear(&tables.individuals);
68326839
parse_individuals(individuals_cycle, &tables.individuals);
6833-
ret = tsk_table_collection_sort(&tables, NULL, 0);
6840+
ret = tsk_table_collection_individual_topological_sort(&tables, 0);
68346841
CU_ASSERT_EQUAL(ret, TSK_ERR_INDIVIDUAL_PARENT_CYCLE);
68356842

68366843
tsk_table_collection_free(&tables);

c/tests/test_trees.c

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -2077,10 +2077,10 @@ test_simplest_bad_individuals(void)
20772077
tsk_treeseq_free(&ts);
20782078
tables.individuals.parents[0] = TSK_NULL;
20792079

2080-
/* Unsorted individuals */
2080+
/* Unsorted individuals are OK*/
20812081
tables.individuals.parents[0] = 1;
20822082
ret = tsk_treeseq_init(&ts, &tables, load_flags);
2083-
CU_ASSERT_EQUAL(ret, TSK_ERR_UNSORTED_INDIVIDUALS);
2083+
CU_ASSERT_EQUAL(ret, 0);
20842084
tsk_treeseq_free(&ts);
20852085
tables.individuals.parents[0] = TSK_NULL;
20862086

c/tskit/tables.c

Lines changed: 10 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -6143,15 +6143,16 @@ tsk_individual_table_topological_sort(
61436143
return ret;
61446144
}
61456145

6146-
static int
6147-
tsk_table_sorter_sort_individuals(tsk_table_sorter_t *self)
6146+
int
6147+
tsk_table_collection_individual_topological_sort(
6148+
tsk_table_collection_t *self, tsk_flags_t TSK_UNUSED(options))
61486149
{
61496150
int ret = 0;
61506151
tsk_id_t i, ret_id;
61516152
tsk_individual_table_t copy;
61526153
tsk_individual_t individual;
6153-
tsk_individual_table_t *individuals = &self->tables->individuals;
6154-
tsk_node_table_t *nodes = &self->tables->nodes;
6154+
tsk_individual_table_t *individuals = &self->individuals;
6155+
tsk_node_table_t *nodes = &self->nodes;
61556156
tsk_size_t num_individuals = individuals->num_rows;
61566157
tsk_id_t *traversal_order = tsk_malloc(num_individuals * sizeof(*traversal_order));
61576158
tsk_id_t *new_id_map = tsk_malloc(num_individuals * sizeof(*new_id_map));
@@ -6379,7 +6380,7 @@ tsk_table_sorter_run(tsk_table_sorter_t *self, const tsk_bookmark_t *start)
63796380
goto out;
63806381
}
63816382
}
6382-
if (!skip_individuals) {
6383+
if (!skip_individuals && self->sort_individuals) {
63836384
ret = self->sort_individuals(self);
63846385
if (ret != 0) {
63856386
goto out;
@@ -6415,7 +6416,8 @@ tsk_table_sorter_init(
64156416
/* Set the sort_edges and sort_mutations methods to the default. */
64166417
self->sort_edges = tsk_table_sorter_sort_edges;
64176418
self->sort_mutations = tsk_table_sorter_sort_mutations;
6418-
self->sort_individuals = tsk_table_sorter_sort_individuals;
6419+
/* Default sort doesn't touch individuals */
6420+
self->sort_individuals = NULL;
64196421
out:
64206422
return ret;
64216423
}
@@ -9722,11 +9724,10 @@ tsk_table_collection_check_integrity(
97229724
tsk_id_t ret = 0;
97239725

97249726
if (options & TSK_CHECK_TREES) {
9725-
/* Checking the trees implies all the other checks */
9727+
/* Checking the trees implies these checks */
97269728
options |= TSK_CHECK_EDGE_ORDERING | TSK_CHECK_SITE_ORDERING
97279729
| TSK_CHECK_SITE_DUPLICATES | TSK_CHECK_MUTATION_ORDERING
9728-
| TSK_CHECK_INDIVIDUAL_ORDERING | TSK_CHECK_MIGRATION_ORDERING
9729-
| TSK_CHECK_INDEXES;
9730+
| TSK_CHECK_MIGRATION_ORDERING | TSK_CHECK_INDEXES;
97309731
}
97319732

97329733
if (self->sequence_length <= 0) {

c/tskit/tables.h

Lines changed: 19 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -3672,17 +3672,33 @@ TSK_NO_CHECK_INTEGRITY
36723672
36733673
@endrst
36743674
3675-
@param self A pointer to a tsk_individual_table_t object.
3675+
@param self A pointer to a tsk_table_collection_t object.
36763676
@param start The position to begin sorting in each table; all rows less than this
36773677
position must fulfill the tree sequence sortedness requirements. If this is
36783678
NULL, sort all rows.
3679-
@param options Sort options. Currently unused; should be
3680-
set to zero to ensure compatibility with later versions of tskit.
3679+
@param options Sort options.
36813680
@return Return 0 on success or a negative value on failure.
36823681
*/
36833682
int tsk_table_collection_sort(
36843683
tsk_table_collection_t *self, const tsk_bookmark_t *start, tsk_flags_t options);
36853684

3685+
/**
3686+
@brief Sorts the indivdual table in this collection.
3687+
3688+
@rst
3689+
Sorts the individual table in place, so that parents come before children,
3690+
and the parent column is remapped as required. Node references to individuals
3691+
are also updated.
3692+
@endrst
3693+
3694+
@param self A pointer to a tsk_table_collection_t object.
3695+
@param options Sort options. Currently unused; should be
3696+
set to zero to ensure compatibility with later versions of tskit.
3697+
@return Return 0 on success or a negative value on failure.
3698+
*/
3699+
int tsk_table_collection_individual_topological_sort(
3700+
tsk_table_collection_t *self, tsk_flags_t options);
3701+
36863702
/**
36873703
@brief Puts the tables into canonical form.
36883704

docs/data-model.rst

Lines changed: 2 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -566,13 +566,8 @@ Individual requirements
566566
-----------------------
567567

568568
Individuals are a basic type in a tree sequence and are not defined with
569-
respect to any other tables. Individuals can have a reference to their parent
570-
individuals, if present these references must be valid or null (-1).
571-
572-
Individuals must be sorted such that parents are before children.
573-
Sorting a set of tables using :meth:`TableCollection.sort` will ensure that
574-
this requirement is fulfilled.
575-
569+
respect to any other tables. Individuals can have a reference to any number of
570+
their parent individuals, if present these references must be valid or null (-1).
576571

577572
.. _sec_node_requirements:
578573

python/_tskitmodule.c

Lines changed: 24 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -6632,6 +6632,26 @@ TableCollection_sort(TableCollection *self, PyObject *args, PyObject *kwds)
66326632
return ret;
66336633
}
66346634

6635+
static PyObject *
6636+
TableCollection_sort_individuals(TableCollection *self, PyObject *args, PyObject *kwds)
6637+
{
6638+
int err;
6639+
PyObject *ret = NULL;
6640+
6641+
if (TableCollection_check_state(self) != 0) {
6642+
goto out;
6643+
}
6644+
6645+
err = tsk_table_collection_individual_topological_sort(self->tables, 0);
6646+
if (err != 0) {
6647+
handle_library_error(err);
6648+
goto out;
6649+
}
6650+
ret = Py_BuildValue("");
6651+
out:
6652+
return ret;
6653+
}
6654+
66356655
static PyObject *
66366656
TableCollection_canonicalise(TableCollection *self, PyObject *args, PyObject *kwds)
66376657
{
@@ -7115,6 +7135,10 @@ static PyMethodDef TableCollection_methods[] = {
71157135
.ml_meth = (PyCFunction) TableCollection_sort,
71167136
.ml_flags = METH_VARARGS | METH_KEYWORDS,
71177137
.ml_doc = "Sorts the tables to satisfy tree sequence requirements." },
7138+
{ .ml_name = "sort_individuals",
7139+
.ml_meth = (PyCFunction) TableCollection_sort_individuals,
7140+
.ml_flags = METH_VARARGS | METH_KEYWORDS,
7141+
.ml_doc = "Sorts the individual table topologically" },
71187142
{ .ml_name = "canonicalise",
71197143
.ml_meth = (PyCFunction) TableCollection_canonicalise,
71207144
.ml_flags = METH_VARARGS | METH_KEYWORDS,

python/tests/data/svg/ts_multiroot.svg

Lines changed: 10 additions & 10 deletions
Loading

python/tests/test_lowlevel.py

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -400,6 +400,10 @@ def test_fromdict_bad_args(self):
400400
with pytest.raises(TypeError):
401401
tc.fromdict(bad_type)
402402

403+
def test_sort_individuals(self):
404+
tc = _tskit.TableCollection(0)
405+
tc.sort_individuals()
406+
403407

404408
class TestIbd:
405409
def test_uninitialised(self):

python/tests/test_tables.py

Lines changed: 4 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -3204,12 +3204,10 @@ def test_shuffled_individual_parent_mapping(self, wf_sim_with_individual_metadat
32043204
shuffle_migrations=False,
32053205
)
32063206
# Check we have a mixed up order
3207-
with pytest.raises(
3208-
tskit.LibraryError,
3209-
match="Individuals must be provided in an order where"
3210-
" children are after their parent individuals",
3211-
):
3212-
tables.tree_sequence()
3207+
tables2 = tables.copy()
3208+
tables2.sort_individuals()
3209+
with pytest.raises(AssertionError, match="IndividualTable row 0 differs"):
3210+
tables.assert_equals(tables2)
32133211

32143212
tables.simplify()
32153213
metadata = [

python/tests/test_text_formats.py

Lines changed: 1 addition & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -156,7 +156,7 @@ def test_single_family_map_parent_ids(self):
156156
FamEntry(iid="3", pat="1", mat="2"),
157157
]
158158
tb = self.get_parsed_fam(entries=entries)
159-
assert np.array_equal(tb[2].parents, [1, 0])
159+
assert np.array_equal(tb[2].parents, [0, 1])
160160

161161
def test_missing_parent_id(self):
162162
# KeyError raised if at least one parent (PAT) does not exist in dataset
@@ -243,7 +243,3 @@ def test_children_before_parents(self, tmp_path):
243243
for row in tb:
244244
tc.individuals.append(row)
245245
tc.tree_sequence() # creating tree sequence should succeed
246-
247-
for idx in range(2):
248-
assert np.array_equal(tb[idx].parents, [-1, -1])
249-
assert np.array_equal(tb[2].parents, [1, 0])

0 commit comments

Comments
 (0)