Skip to content

Commit bc0bc4f

Browse files
committed
Don't sort individuals by default
1 parent 6142f7f commit bc0bc4f

File tree

14 files changed

+128
-53
lines changed

14 files changed

+128
-53
lines changed

c/CHANGELOG.rst

Lines changed: 7 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -16,8 +16,15 @@
1616

1717
- FIXME breaking changes for tree API and virtual root
1818

19+
- Individuals are no longer guaranteed or required to be topologically sorted in a tree sequence.
20+
``tsk_table_collection_sort`` no longer sorts individuals.
21+
(:user:`benjeffery`, :issue:`1774`, :pr:`1789`)
22+
1923
**Features**
2024

25+
- Add ``tsk_table_collection_individual_topological_sort`` to sort the individuals as this is no longer done by the
26+
default sort. (:user:`benjeffery`, :issue:`1774`, :pr:`1789`)
27+
2128
- The default behaviour for table size growth is now to double the current size of the table,
2229
up to a threshold. To keep the previous behaviour, use (e.g.)
2330
``tsk_edge_table_set_max_rows_increment(tables->edges, 1024)``, which results in adding

c/tests/test_tables.c

Lines changed: 23 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -6280,10 +6280,10 @@ test_sort_tables_offsets(void)
62806280
CU_ASSERT_EQUAL_FATAL(ret, 0);
62816281
CU_ASSERT_TRUE(tsk_table_collection_equals(&tables, &copy, 0));
62826282

6283-
/* Check that sorting would have had an effect */
6283+
/* Check that sorting would have had no effect as individuals not in default sort*/
62846284
ret = tsk_table_collection_sort(&tables, NULL, 0);
62856285
CU_ASSERT_EQUAL_FATAL(ret, 0);
6286-
CU_ASSERT_FALSE(tsk_table_collection_equals(&tables, &copy, 0));
6286+
CU_ASSERT_TRUE(tsk_table_collection_equals(&tables, &copy, 0));
62876287

62886288
tsk_memset(&bookmark, 0, sizeof(bookmark));
62896289
bookmark.individuals = tables.individuals.num_rows - 1;
@@ -6802,17 +6802,28 @@ test_sort_tables_individuals(void)
68026802
const char *individuals_cycle = "1 0.2 2 0\n"
68036803
"2 0.5 0 1\n"
68046804
"3 0.3 1 2\n";
6805+
const tsk_id_t bad_parents[] = { 200 };
6806+
tsk_id_t ret_id;
68056807

68066808
ret = tsk_table_collection_init(&tables, 0);
68076809
CU_ASSERT_EQUAL_FATAL(ret, 0);
68086810
tables.sequence_length = 1.0;
68096811
parse_individuals(individuals, &tables.individuals);
6812+
6813+
ret = tsk_table_collection_copy(&tables, &copy, 0);
6814+
CU_ASSERT_EQUAL_FATAL(ret, 0);
6815+
6816+
/* Table sort doesn't touch individuals by default*/
6817+
ret = tsk_table_collection_sort(&tables, NULL, 0);
6818+
CU_ASSERT_EQUAL_FATAL(ret, 0);
6819+
CU_ASSERT_FATAL(tsk_table_collection_equals(&tables, &copy, 0));
6820+
68106821
/* Not calling with TSK_CHECK_TREES so casting is safe */
68116822
ret = (int) tsk_table_collection_check_integrity(
68126823
&tables, TSK_CHECK_INDIVIDUAL_ORDERING);
68136824
CU_ASSERT_EQUAL_FATAL(ret, TSK_ERR_UNSORTED_INDIVIDUALS);
68146825

6815-
ret = tsk_table_collection_sort(&tables, NULL, 0);
6826+
ret = tsk_table_collection_individual_topological_sort(&tables, 0);
68166827
CU_ASSERT_EQUAL_FATAL(ret, 0);
68176828
ret = (int) tsk_table_collection_check_integrity(
68186829
&tables, TSK_CHECK_INDIVIDUAL_ORDERING);
@@ -6821,16 +6832,21 @@ test_sort_tables_individuals(void)
68216832
/* Check that the sort is stable */
68226833
ret = tsk_table_collection_copy(&tables, &copy, 0);
68236834
CU_ASSERT_EQUAL_FATAL(ret, 0);
6835+
ret = tsk_table_collection_individual_topological_sort(&tables, 0);
6836+
CU_ASSERT_EQUAL_FATAL(ret, 0);
68246837
CU_ASSERT_FATAL(tsk_table_collection_equals(&tables, &copy, 0));
68256838

6826-
ret = tsk_table_collection_sort(&tables, NULL, 0);
6827-
CU_ASSERT_EQUAL_FATAL(ret, 0);
6828-
CU_ASSERT(tsk_table_collection_equals(&tables, &copy, 0));
6839+
/* Errors on bad table */
6840+
ret_id = tsk_individual_table_add_row(
6841+
&tables.individuals, 0, NULL, 0, bad_parents, 1, NULL, 0);
6842+
CU_ASSERT_EQUAL_FATAL(ret_id, 6);
6843+
ret = tsk_table_collection_individual_topological_sort(&tables, 0);
6844+
CU_ASSERT_EQUAL(ret, TSK_ERR_INDIVIDUAL_OUT_OF_BOUNDS);
68296845

68306846
/* Errors on cycle */
68316847
tsk_individual_table_clear(&tables.individuals);
68326848
parse_individuals(individuals_cycle, &tables.individuals);
6833-
ret = tsk_table_collection_sort(&tables, NULL, 0);
6849+
ret = tsk_table_collection_individual_topological_sort(&tables, 0);
68346850
CU_ASSERT_EQUAL(ret, TSK_ERR_INDIVIDUAL_PARENT_CYCLE);
68356851

68366852
tsk_table_collection_free(&tables);

c/tests/test_trees.c

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -2077,10 +2077,10 @@ test_simplest_bad_individuals(void)
20772077
tsk_treeseq_free(&ts);
20782078
tables.individuals.parents[0] = TSK_NULL;
20792079

2080-
/* Unsorted individuals */
2080+
/* Unsorted individuals are OK*/
20812081
tables.individuals.parents[0] = 1;
20822082
ret = tsk_treeseq_init(&ts, &tables, load_flags);
2083-
CU_ASSERT_EQUAL(ret, TSK_ERR_UNSORTED_INDIVIDUALS);
2083+
CU_ASSERT_EQUAL(ret, 0);
20842084
tsk_treeseq_free(&ts);
20852085
tables.individuals.parents[0] = TSK_NULL;
20862086

c/tskit/tables.c

Lines changed: 16 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -6143,15 +6143,16 @@ tsk_individual_table_topological_sort(
61436143
return ret;
61446144
}
61456145

6146-
static int
6147-
tsk_table_sorter_sort_individuals(tsk_table_sorter_t *self)
6146+
int
6147+
tsk_table_collection_individual_topological_sort(
6148+
tsk_table_collection_t *self, tsk_flags_t TSK_UNUSED(options))
61486149
{
61496150
int ret = 0;
61506151
tsk_id_t i, ret_id;
61516152
tsk_individual_table_t copy;
61526153
tsk_individual_t individual;
6153-
tsk_individual_table_t *individuals = &self->tables->individuals;
6154-
tsk_node_table_t *nodes = &self->tables->nodes;
6154+
tsk_individual_table_t *individuals = &self->individuals;
6155+
tsk_node_table_t *nodes = &self->nodes;
61556156
tsk_size_t num_individuals = individuals->num_rows;
61566157
tsk_id_t *traversal_order = tsk_malloc(num_individuals * sizeof(*traversal_order));
61576158
tsk_id_t *new_id_map = tsk_malloc(num_individuals * sizeof(*new_id_map));
@@ -6167,6 +6168,12 @@ tsk_table_sorter_sort_individuals(tsk_table_sorter_t *self)
61676168
goto out;
61686169
}
61696170

6171+
ret_id = tsk_table_collection_check_integrity(self, 0);
6172+
if (ret_id != 0) {
6173+
ret = (int) ret_id;
6174+
goto out;
6175+
}
6176+
61706177
ret = tsk_individual_table_clear(individuals);
61716178
if (ret != 0) {
61726179
goto out;
@@ -6379,7 +6386,7 @@ tsk_table_sorter_run(tsk_table_sorter_t *self, const tsk_bookmark_t *start)
63796386
goto out;
63806387
}
63816388
}
6382-
if (!skip_individuals) {
6389+
if (!skip_individuals && self->sort_individuals != NULL) {
63836390
ret = self->sort_individuals(self);
63846391
if (ret != 0) {
63856392
goto out;
@@ -6415,7 +6422,8 @@ tsk_table_sorter_init(
64156422
/* Set the sort_edges and sort_mutations methods to the default. */
64166423
self->sort_edges = tsk_table_sorter_sort_edges;
64176424
self->sort_mutations = tsk_table_sorter_sort_mutations;
6418-
self->sort_individuals = tsk_table_sorter_sort_individuals;
6425+
/* Default sort doesn't touch individuals */
6426+
self->sort_individuals = NULL;
64196427
out:
64206428
return ret;
64216429
}
@@ -9722,11 +9730,10 @@ tsk_table_collection_check_integrity(
97229730
tsk_id_t ret = 0;
97239731

97249732
if (options & TSK_CHECK_TREES) {
9725-
/* Checking the trees implies all the other checks */
9733+
/* Checking the trees implies these checks */
97269734
options |= TSK_CHECK_EDGE_ORDERING | TSK_CHECK_SITE_ORDERING
97279735
| TSK_CHECK_SITE_DUPLICATES | TSK_CHECK_MUTATION_ORDERING
9728-
| TSK_CHECK_INDIVIDUAL_ORDERING | TSK_CHECK_MIGRATION_ORDERING
9729-
| TSK_CHECK_INDEXES;
9736+
| TSK_CHECK_MIGRATION_ORDERING | TSK_CHECK_INDEXES;
97309737
}
97319738

97329739
if (self->sequence_length <= 0) {

c/tskit/tables.h

Lines changed: 19 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -3672,17 +3672,33 @@ TSK_NO_CHECK_INTEGRITY
36723672
36733673
@endrst
36743674
3675-
@param self A pointer to a tsk_individual_table_t object.
3675+
@param self A pointer to a tsk_table_collection_t object.
36763676
@param start The position to begin sorting in each table; all rows less than this
36773677
position must fulfill the tree sequence sortedness requirements. If this is
36783678
NULL, sort all rows.
3679-
@param options Sort options. Currently unused; should be
3680-
set to zero to ensure compatibility with later versions of tskit.
3679+
@param options Sort options.
36813680
@return Return 0 on success or a negative value on failure.
36823681
*/
36833682
int tsk_table_collection_sort(
36843683
tsk_table_collection_t *self, const tsk_bookmark_t *start, tsk_flags_t options);
36853684

3685+
/**
3686+
@brief Sorts the individual table in this collection.
3687+
3688+
@rst
3689+
Sorts the individual table in place, so that parents come before children,
3690+
and the parent column is remapped as required. Node references to individuals
3691+
are also updated.
3692+
@endrst
3693+
3694+
@param self A pointer to a tsk_table_collection_t object.
3695+
@param options Sort options. Currently unused; should be
3696+
set to zero to ensure compatibility with later versions of tskit.
3697+
@return Return 0 on success or a negative value on failure.
3698+
*/
3699+
int tsk_table_collection_individual_topological_sort(
3700+
tsk_table_collection_t *self, tsk_flags_t options);
3701+
36863702
/**
36873703
@brief Puts the tables into canonical form.
36883704

docs/data-model.rst

Lines changed: 2 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -566,13 +566,8 @@ Individual requirements
566566
-----------------------
567567

568568
Individuals are a basic type in a tree sequence and are not defined with
569-
respect to any other tables. Individuals can have a reference to their parent
570-
individuals, if present these references must be valid or null (-1).
571-
572-
Individuals must be sorted such that parents are before children.
573-
Sorting a set of tables using :meth:`TableCollection.sort` will ensure that
574-
this requirement is fulfilled.
575-
569+
respect to any other tables. Individuals can have a reference to any number of
570+
their parent individuals, if present these references must be valid or null (-1).
576571

577572
.. _sec_node_requirements:
578573

python/CHANGELOG.rst

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -27,9 +27,15 @@
2727
are multiple roots. Previously orders were defined locally for each root, but
2828
are now globally across all roots. (:user:`jeromekelleher`, :pr:`1704`).
2929

30+
- Individuals are no longer guaranteed or required to be topologically sorted in a tree sequence.
31+
``TableCollection.sort`` no longer sorts individuals.
32+
(:user:`benjeffery`, :issue:`1774`, :pr:`1789`)
3033

3134
**Features**
3235

36+
- Add ``TableCollection.sort_individuals`` to sort the individuals as this is no longer done by the
37+
default sort. (:user:`benjeffery`, :issue:`1774`, :pr:`1789`)
38+
3339
- Add ``__setitem__`` to all tables allowing single rows to be updated. For example
3440
``tables.nodes[0] = tables.nodes[0].replace(flags=tskit.NODE_IS_SAMPLE)``
3541
(:user:`jeromekelleher`, :user:`benjeffery`, :issue:`1545`, :pr:`1600`).

python/_tskitmodule.c

Lines changed: 24 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -6632,6 +6632,26 @@ TableCollection_sort(TableCollection *self, PyObject *args, PyObject *kwds)
66326632
return ret;
66336633
}
66346634

6635+
static PyObject *
6636+
TableCollection_sort_individuals(TableCollection *self, PyObject *args, PyObject *kwds)
6637+
{
6638+
int err;
6639+
PyObject *ret = NULL;
6640+
6641+
if (TableCollection_check_state(self) != 0) {
6642+
goto out;
6643+
}
6644+
6645+
err = tsk_table_collection_individual_topological_sort(self->tables, 0);
6646+
if (err != 0) {
6647+
handle_library_error(err);
6648+
goto out;
6649+
}
6650+
ret = Py_BuildValue("");
6651+
out:
6652+
return ret;
6653+
}
6654+
66356655
static PyObject *
66366656
TableCollection_canonicalise(TableCollection *self, PyObject *args, PyObject *kwds)
66376657
{
@@ -7115,6 +7135,10 @@ static PyMethodDef TableCollection_methods[] = {
71157135
.ml_meth = (PyCFunction) TableCollection_sort,
71167136
.ml_flags = METH_VARARGS | METH_KEYWORDS,
71177137
.ml_doc = "Sorts the tables to satisfy tree sequence requirements." },
7138+
{ .ml_name = "sort_individuals",
7139+
.ml_meth = (PyCFunction) TableCollection_sort_individuals,
7140+
.ml_flags = METH_VARARGS | METH_KEYWORDS,
7141+
.ml_doc = "Sorts the individual table topologically" },
71187142
{ .ml_name = "canonicalise",
71197143
.ml_meth = (PyCFunction) TableCollection_canonicalise,
71207144
.ml_flags = METH_VARARGS | METH_KEYWORDS,

python/tests/data/svg/ts_multiroot.svg

Lines changed: 10 additions & 10 deletions
Loading

python/tests/test_lowlevel.py

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -400,6 +400,10 @@ def test_fromdict_bad_args(self):
400400
with pytest.raises(TypeError):
401401
tc.fromdict(bad_type)
402402

403+
def test_sort_individuals(self):
404+
tc = _tskit.TableCollection(0)
405+
tc.sort_individuals()
406+
403407

404408
class TestIbd:
405409
def test_uninitialised(self):

0 commit comments

Comments
 (0)