Skip to content

Commit d13efbe

Browse files
committed
Add table collection index to LWT interface
1 parent f4a9523 commit d13efbe

File tree

10 files changed

+198
-4
lines changed

10 files changed

+198
-4
lines changed

c/CHANGELOG.rst

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -8,6 +8,9 @@
88
criteria (e.g., ignore top-level metadata and schema or provenance tables).
99
(:user:`mufernando`, :issue:`896`, :pr:`897`)
1010

11+
- Exposed ``tsk_table_collection_set_index`` to the API.
12+
(:user:`benjeffery`, :issue:`870`, :pr:`921`)
13+
1114
---------------------
1215
[0.99.7] - 2020-09-29
1316
---------------------

c/tskit/tables.c

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -7783,7 +7783,7 @@ tsk_table_collection_set_metadata_schema(tsk_table_collection_t *self,
77837783
metadata_schema, metadata_schema_length);
77847784
}
77857785

7786-
static int
7786+
int
77877787
tsk_table_collection_set_index(tsk_table_collection_t *self,
77887788
tsk_id_t *edge_insertion_order, tsk_id_t *edge_removal_order)
77897789
{

c/tskit/tables.h

Lines changed: 20 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -2721,6 +2721,26 @@ collection. Any existing index is first dropped using
27212721
*/
27222722
int tsk_table_collection_build_index(tsk_table_collection_t *self, tsk_flags_t options);
27232723

2724+
/**
2725+
@brief Sets the edge insertion/removal index for this table collection
2726+
2727+
@rst
2728+
This method sets the edge insertion/removal index for this table collection
2729+
The index arrays should have the same number of edges that are in the
2730+
edge table. The index is not checked for validity.
2731+
2732+
See the :ref:`sec_c_api_table_indexes` section for details on the index
2733+
life-cycle.
2734+
@endrst
2735+
2736+
@param self A pointer to a tsk_table_collection_t object.
2737+
@param edge_insertion_order Array of tsk_id_t edge ids
2738+
@param edge_removal_order Array of tsk_id_t edge ids
2739+
@return Return 0 on success or a negative value on failure.
2740+
*/
2741+
int tsk_table_collection_set_index(tsk_table_collection_t *self,
2742+
tsk_id_t *edge_insertion_order, tsk_id_t *edge_removal_order);
2743+
27242744
/**
27252745
@brief Runs integrity checks on this table collection.
27262746

python/CHANGELOG.rst

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -17,6 +17,9 @@
1717
- Added ``TableCollection.index`` for access to the edge insertion/removal order indexes.
1818
(:user:`benjeffery`, :issue:`4`, :pr:`916`)
1919

20+
- The dictionary representation of a TableCollection now contains its index.
21+
(:user:`benjeffery`, :issue:`870`, :pr:`921`)
22+
2023
**Breaking changes**
2124

2225
- The argument to ``ts.dump`` and ``tskit.load`` has been renamed `file` from `path`.

python/lwt_interface/CHANGELOG.rst

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,6 @@
1+
--------------------
2+
[0.1.2] - 2020-10-22
3+
--------------------
4+
5+
- Added optional top-level key ``index`` which has contains ``edge_insertion_order`` and
6+
``edge_removal_order``

python/lwt_interface/dict_encoding_testlib.py

Lines changed: 60 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -148,7 +148,7 @@ def get_example_tables():
148148
class TestEncodingVersion:
149149
def test_version(self):
150150
lwt = lwt_module.LightweightTableCollection()
151-
assert lwt.asdict()["encoding_version"] == (1, 1)
151+
assert lwt.asdict()["encoding_version"] == (1, 2)
152152

153153

154154
class TestRoundTrip:
@@ -270,6 +270,7 @@ def test_missing_tables(self):
270270
"metadata",
271271
"metadata_schema",
272272
"encoding_version",
273+
"index",
273274
}
274275
for table_name in table_names:
275276
d = tables.asdict()
@@ -292,6 +293,7 @@ def verify_columns(self, value):
292293
"metadata",
293294
"metadata_schema",
294295
"encoding_version",
296+
"index",
295297
}
296298
for table_name in table_names:
297299
table_dict = d[table_name]
@@ -336,6 +338,7 @@ def verify(self, num_rows):
336338
"metadata",
337339
"metadata_schema",
338340
"encoding_version",
341+
"index",
339342
}
340343
for table_name in sorted(table_names):
341344
table_dict = d[table_name]
@@ -354,6 +357,30 @@ def test_two_rows(self):
354357
def test_zero_rows(self):
355358
self.verify(0)
356359

360+
def test_bad_index_length(self):
361+
tables = get_example_tables()
362+
for col in ("insertion", "removal"):
363+
d = tables.asdict()
364+
d["index"][f"edge_{col}_order"] = d["index"][f"edge_{col}_order"][:-1]
365+
lwt = lwt_module.LightweightTableCollection()
366+
with pytest.raises(
367+
ValueError,
368+
match="^edge_insertion_order and"
369+
" edge_removal_order must be the same"
370+
" length$",
371+
):
372+
lwt.fromdict(d)
373+
d = tables.asdict()
374+
for col in ("insertion", "removal"):
375+
d["index"][f"edge_{col}_order"] = d["index"][f"edge_{col}_order"][:-1]
376+
lwt = lwt_module.LightweightTableCollection()
377+
with pytest.raises(
378+
ValueError,
379+
match="^edge_insertion_order and edge_removal_order must be"
380+
" the same length as the number of edges$",
381+
):
382+
lwt.fromdict(d)
383+
357384

358385
class TestRequiredAndOptionalColumns:
359386
"""
@@ -563,6 +590,38 @@ def test_provenances(self):
563590
["record", "record_offset", "timestamp", "timestamp_offset"],
564591
)
565592

593+
def test_index(self):
594+
tables = get_example_tables()
595+
d = tables.asdict()
596+
lwt = lwt_module.LightweightTableCollection()
597+
lwt.fromdict(d)
598+
other = lwt.asdict()
599+
assert np.array_equal(
600+
d["index"]["edge_insertion_order"], other["index"]["edge_insertion_order"]
601+
)
602+
assert np.array_equal(
603+
d["index"]["edge_removal_order"], other["index"]["edge_removal_order"]
604+
)
605+
606+
# index is optional
607+
d = tables.asdict()
608+
del d["index"]
609+
lwt = lwt_module.LightweightTableCollection()
610+
lwt.fromdict(d)
611+
612+
# Both columns must be provided, if one is
613+
for col in ("insertion", "removal"):
614+
d = tables.asdict()
615+
del d["index"][f"edge_{col}_order"]
616+
lwt = lwt_module.LightweightTableCollection()
617+
with pytest.raises(
618+
TypeError,
619+
match="^edge_insertion_order and "
620+
"edge_removal_order must be specified "
621+
"together$",
622+
):
623+
lwt.fromdict(d)
624+
566625
def test_top_level_metadata(self):
567626
tables = get_example_tables()
568627
d = tables.asdict()

python/lwt_interface/tskit_lwt_interface.h

Lines changed: 88 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1172,6 +1172,69 @@ parse_provenance_table_dict(
11721172
return ret;
11731173
}
11741174

1175+
static int
1176+
parse_index_dict(tsk_table_collection_t *tables, PyObject *dict)
1177+
{
1178+
int err;
1179+
int ret = -1;
1180+
size_t insertion_length, removal_length;
1181+
PyObject *insertion_input = NULL;
1182+
PyArrayObject *insertion_array = NULL;
1183+
PyObject *removal_input = NULL;
1184+
PyArrayObject *removal_array = NULL;
1185+
1186+
/* Get the inputs */
1187+
insertion_input = get_table_dict_value(dict, "edge_insertion_order", false);
1188+
if (insertion_input == NULL) {
1189+
goto out;
1190+
}
1191+
removal_input = get_table_dict_value(dict, "edge_removal_order", false);
1192+
if (removal_input == NULL) {
1193+
goto out;
1194+
}
1195+
1196+
if ((insertion_input == Py_None) != (removal_input == Py_None)) {
1197+
PyErr_SetString(PyExc_TypeError,
1198+
"edge_insertion_order and edge_removal_order must be specified together");
1199+
goto out;
1200+
}
1201+
1202+
if (insertion_input != Py_None) {
1203+
insertion_array = table_read_column_array(
1204+
insertion_input, NPY_INT32, &insertion_length, false);
1205+
if (insertion_array == NULL) {
1206+
goto out;
1207+
}
1208+
removal_array
1209+
= table_read_column_array(removal_input, NPY_INT32, &removal_length, false);
1210+
if (removal_array == NULL) {
1211+
goto out;
1212+
}
1213+
if (insertion_length != removal_length) {
1214+
PyErr_SetString(PyExc_ValueError,
1215+
"edge_insertion_order and edge_removal_order must be the same length");
1216+
goto out;
1217+
}
1218+
if (insertion_length != tables->edges.num_rows) {
1219+
PyErr_SetString(PyExc_ValueError,
1220+
"edge_insertion_order and edge_removal_order must be "
1221+
"the same length as the number of edges");
1222+
goto out;
1223+
}
1224+
err = tsk_table_collection_set_index(
1225+
tables, PyArray_DATA(insertion_array), PyArray_DATA(removal_array));
1226+
if (err != 0) {
1227+
handle_tskit_error(err);
1228+
goto out;
1229+
}
1230+
}
1231+
ret = 0;
1232+
out:
1233+
Py_XDECREF(insertion_array);
1234+
Py_XDECREF(removal_array);
1235+
return ret;
1236+
}
1237+
11751238
static int
11761239
parse_table_collection_dict(tsk_table_collection_t *tables, PyObject *tables_dict)
11771240
{
@@ -1339,6 +1402,21 @@ parse_table_collection_dict(tsk_table_collection_t *tables, PyObject *tables_dic
13391402
goto out;
13401403
}
13411404

1405+
/* index */
1406+
value = get_table_dict_value(tables_dict, "index", false);
1407+
if (value == NULL) {
1408+
goto out;
1409+
}
1410+
if (value != Py_None) {
1411+
if (!PyDict_Check(value)) {
1412+
PyErr_SetString(PyExc_TypeError, "not a dictionary");
1413+
goto out;
1414+
}
1415+
if (parse_index_dict(tables, value) != 0) {
1416+
goto out;
1417+
}
1418+
}
1419+
13421420
ret = 0;
13431421
out:
13441422
return ret;
@@ -1480,6 +1558,14 @@ write_table_arrays(tsk_table_collection_t *tables, PyObject *dict)
14801558
{ NULL },
14811559
};
14821560

1561+
struct table_col index_cols[] = {
1562+
{ "edge_insertion_order", (void *) tables->indexes.edge_insertion_order,
1563+
tables->indexes.num_edges, NPY_INT32 },
1564+
{ "edge_removal_order", (void *) tables->indexes.edge_removal_order,
1565+
tables->indexes.num_edges, NPY_INT32 },
1566+
{ NULL },
1567+
};
1568+
14831569
struct table_desc table_descs[] = {
14841570
{ "individuals", individual_cols, tables->individuals.metadata_schema,
14851571
tables->individuals.metadata_schema_length },
@@ -1496,6 +1582,7 @@ write_table_arrays(tsk_table_collection_t *tables, PyObject *dict)
14961582
{ "populations", population_cols, tables->populations.metadata_schema,
14971583
tables->populations.metadata_schema_length },
14981584
{ "provenances", provenance_cols, NULL, 0 },
1585+
{ "index", index_cols, NULL, 0 },
14991586
};
15001587

15011588
for (j = 0; j < sizeof(table_descs) / sizeof(*table_descs); j++) {
@@ -1557,7 +1644,7 @@ dump_tables_dict(tsk_table_collection_t *tables)
15571644
}
15581645

15591646
/* Dict representation version */
1560-
val = Py_BuildValue("ll", 1, 1);
1647+
val = Py_BuildValue("ll", 1, 2);
15611648
if (val == NULL) {
15621649
goto out;
15631650
}

python/tests/test_tables.py

Lines changed: 12 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1309,6 +1309,16 @@ def test_add_row_bad_data(self):
13091309
t.add_row(metadata=[0])
13101310

13111311

1312+
class TestTableCollectionIndex:
1313+
def test_index(self):
1314+
i = np.arange(20)
1315+
r = np.arange(20)[::-1]
1316+
index = tskit.TableCollectionIndex(edge_insertion_order=i, edge_removal_order=r)
1317+
assert index.edge_insertion_order is i
1318+
assert index.edge_removal_order is r
1319+
assert index.asdict() == {"edge_insertion_order": i, "edge_removal_order": r}
1320+
1321+
13121322
class TestSortTables:
13131323
"""
13141324
Tests for the TableCollection.sort() method.
@@ -2254,6 +2264,7 @@ def test_asdict(self):
22542264
"mutations": t.mutations.asdict(),
22552265
"migrations": t.migrations.asdict(),
22562266
"provenances": t.provenances.asdict(),
2267+
"index": t.index.asdict(),
22572268
}
22582269
d2 = t.asdict()
22592270
assert set(d1.keys()) == set(d2.keys())
@@ -2278,6 +2289,7 @@ def test_from_dict(self):
22782289
"mutations": t1.mutations.asdict(),
22792290
"migrations": t1.migrations.asdict(),
22802291
"provenances": t1.provenances.asdict(),
2292+
"index": t1.index.asdict(),
22812293
}
22822294
t2 = tskit.TableCollection.fromdict(d)
22832295
assert t1 == t2

python/tskit/tables.py

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -145,6 +145,9 @@ class TableCollectionIndex:
145145
edge_insertion_order: np.ndarray
146146
edge_removal_order: np.ndarray
147147

148+
def asdict(self):
149+
return attr.asdict(self)
150+
148151

149152
def keep_with_offset(keep, data, offset):
150153
"""
@@ -2110,6 +2113,7 @@ def asdict(self):
21102113
"mutations": self.mutations.asdict(),
21112114
"populations": self.populations.asdict(),
21122115
"provenances": self.provenances.asdict(),
2116+
"index": self.index.asdict(),
21132117
}
21142118

21152119
@property

python/tskit/util.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -248,7 +248,7 @@ def intervals_to_np_array(intervals, start, end):
248248
def negate_intervals(intervals, start, end):
249249
"""
250250
Returns the set of intervals *not* covered by the specified set of
251-
disjoint intervals in the specfied range.
251+
disjoint intervals in the specified range.
252252
"""
253253
intervals = intervals_to_np_array(intervals, start, end)
254254
other_intervals = []

0 commit comments

Comments
 (0)