Skip to content
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
26 commits
Select commit Hold shift + click to select a range
c99f067
Add metadata schema to the python C module
benjeffery Mar 23, 2020
aba4d12
Add metadata_schema to the python C module
benjeffery Mar 23, 2020
b8669fb
Add metadata_schema to python
benjeffery Mar 23, 2020
1cfd97d
fixup! add to other tables
benjeffery Apr 2, 2020
3091248
fixup! remove unused code
benjeffery Apr 2, 2020
5636e7d
fixup! more tests
benjeffery Apr 2, 2020
13f3b1a
fixup! validate and encode on add row
benjeffery Apr 2, 2020
9b2e572
fixup! bulk methods metadata
benjeffery Apr 2, 2020
2329b7a
fixup! dont 'import from'
benjeffery Apr 2, 2020
1940f53
fixup! return decoded metadata
benjeffery Apr 3, 2020
67e5104
fixup! raise a tskit exception
benjeffery Apr 3, 2020
77390d2
fixup! remove decorator smart-assery
benjeffery Apr 3, 2020
744f6a4
fixup! More testing
benjeffery Apr 3, 2020
5d61ed1
fixup! move metadata handling into a module and class
benjeffery Apr 6, 2020
c0ee6dc
fixup! more tests
benjeffery Apr 6, 2020
359acf3
fixup! test for absent metadata
benjeffery Apr 9, 2020
ae404ef
fixup! spike of ts.metadata_schemas
benjeffery Apr 13, 2020
318171f
fixup! TS.metadata_schemas complete
benjeffery Apr 14, 2020
f44625b
fixup! TreeSequene methods return classes with decoded metadata
benjeffery Apr 15, 2020
e53aada
Fix metadata_schema attribute delete
benjeffery Apr 16, 2020
338a045
Change metadata_schema to string from bytes in low-level API
benjeffery Apr 17, 2020
e3f97bd
Add low-level metadata schema tests
benjeffery Apr 17, 2020
d5881fa
fixup! use normal ret checking
benjeffery Apr 17, 2020
267ff13
fixup! copywrite
benjeffery Apr 17, 2020
d5e7eff
fixup! windows compile error fix
benjeffery Apr 17, 2020
290edf4
fixup! correct python c interface code
benjeffery Apr 17, 2020
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
399 changes: 398 additions & 1 deletion python/_tskitmodule.c

Large diffs are not rendered by default.

10 changes: 8 additions & 2 deletions python/tests/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -213,7 +213,10 @@ def make_mutation(id_):
node=node,
derived_state=derived_state,
parent=parent,
metadata=metadata,
encoded_metadata=metadata,
metadata_decoder=tskit.metadata.MetadataSchema.from_str(
ll_ts.get_metadata_schemas().mutation
).decode_row,
)

for j in range(tree_sequence.num_sites):
Expand All @@ -224,7 +227,10 @@ def make_mutation(id_):
position=pos,
ancestral_state=ancestral_state,
mutations=[make_mutation(ll_mut) for ll_mut in ll_mutations],
metadata=metadata,
encoded_metadata=metadata,
metadata_decoder=tskit.metadata.MetadataSchema.from_str(
ll_ts.get_metadata_schemas().site
).decode_row,
)
)

Expand Down
198 changes: 184 additions & 14 deletions python/tests/test_highlevel.py
Original file line number Diff line number Diff line change
Expand Up @@ -1332,6 +1332,84 @@ def test_sequence_iteration(self):
self.assertEqual(n.id, 0)


class TestTreeSequenceMetadata(unittest.TestCase):
metadata_tables = [
"node",
"edge",
"site",
"mutation",
"migration",
"individual",
"population",
]
metadata_schema = tskit.metadata.MetadataSchema(
encoding="json",
schema={
"title": "Example Metadata",
"type": "object",
"properties": {
"table": {"type": "string"},
"string_prop": {"type": "string"},
"num_prop": {"type": "number"},
},
"required": ["table", "string_prop", "num_prop"],
"additionalProperties": False,
},
)

def test_metadata_schemas(self):
ts = msprime.simulate(5)
tables = ts.dump_tables()
schemas = {
table: tskit.metadata.MetadataSchema(
encoding="json", schema={"TEST": f"{table}-SCHEMA"}
)
for table in self.metadata_tables
}
for table in self.metadata_tables:
getattr(tables, f"{table}s").metadata_schema = schemas[table]
ts = tskit.TreeSequence.load_tables(tables)
# Each table should get its own schema back
for table in self.metadata_tables:
self.assertEqual(
getattr(ts.metadata_schemas, table).to_str(), schemas[table].to_str(),
)

def test_metadata_round_trip_via_row_getters(self):
ts = msprime.simulate(8, random_seed=3, mutation_rate=1)
self.assertGreater(ts.num_sites, 2)
new_tables = ts.dump_tables()
tables_copy = ts.dump_tables()
for table in self.metadata_tables:
table_obj = getattr(new_tables, f"{table}s")
table_obj.metadata_schema = self.metadata_schema
table_obj.clear()
# Write back the rows, but adding unique metadata
for j, row in enumerate(getattr(tables_copy, f"{table}s")):
row_data = {k: v for k, v in zip(row._fields, row)}
row_data["metadata"] = {
"table": table,
"string_prop": f"Row number{j}",
"num_prop": j,
}
table_obj.add_row(**row_data)
new_ts = new_tables.tree_sequence()
for table in self.metadata_tables:
self.assertEqual(
getattr(new_ts, f"num_{table}s"), getattr(ts, f"num_{table}s")
)
for table in self.metadata_tables:
for row in getattr(new_ts, f"{table}s")():
self.assertDictEqual(
row.metadata,
{
"table": table,
"string_prop": f"Row number{row.id}",
"num_prop": row.id,
},
)


class TestPickle(HighLevelTestCase):
"""
Test pickling of a TreeSequence.
Expand Down Expand Up @@ -2437,44 +2515,115 @@ def test_repr(self):
self.assertGreater(len(repr(c)), 0)


class TestIndividualContainer(unittest.TestCase, SimpleContainersMixin):
class SimpleContainersWithMetadataMixin:
"""
Tests for the SimpleContainerWithMetadata classes.
"""

def test_metadata(self):
# Test decoding
instances = self.get_instances(5)
for j, inst in enumerate(instances):
self.assertEqual(inst.metadata, ("x" * j) + "decoded")

# Decoder doesn't effect equality
(inst,) = self.get_instances(1)
(inst2,) = self.get_instances(1)
self.assertTrue(inst == inst2)
inst._metadata_decoder = lambda m: "different decoder"
self.assertTrue(inst == inst2)

def test_decoder_run_once(self):
# For a given instance, the decoded metadata should be cached
(inst,) = self.get_instances(1)
times_run = 0

def decoder(m):
nonlocal times_run
times_run += 1
return m.decode() + "decoded"

inst._metadata_decoder = decoder
self.assertEqual(times_run, 0)
_ = inst.metadata
self.assertEqual(times_run, 1)
_ = inst.metadata
self.assertEqual(times_run, 1)


class TestIndividualContainer(
unittest.TestCase, SimpleContainersMixin, SimpleContainersWithMetadataMixin
):
def get_instances(self, n):
return [
tskit.Individual(id_=j, flags=j, location=[j], nodes=[j], metadata=b"x" * j)
tskit.Individual(
id_=j,
flags=j,
location=[j],
nodes=[j],
encoded_metadata=b"x" * j,
metadata_decoder=lambda m: m.decode() + "decoded",
)
for j in range(n)
]


class TestNodeContainer(unittest.TestCase, SimpleContainersMixin):
class TestNodeContainer(
unittest.TestCase, SimpleContainersMixin, SimpleContainersWithMetadataMixin
):
def get_instances(self, n):
return [
tskit.Node(
id_=j, flags=j, time=j, population=j, individual=j, metadata=b"x" * j
id_=j,
flags=j,
time=j,
population=j,
individual=j,
encoded_metadata=b"x" * j,
metadata_decoder=lambda m: m.decode() + "decoded",
)
for j in range(n)
]


class TestEdgeContainer(unittest.TestCase, SimpleContainersMixin):
class TestEdgeContainer(
unittest.TestCase, SimpleContainersMixin, SimpleContainersWithMetadataMixin
):
def get_instances(self, n):
return [tskit.Edge(left=j, right=j, parent=j, child=j, id_=j) for j in range(n)]
return [
tskit.Edge(
left=j,
right=j,
parent=j,
child=j,
encoded_metadata=b"x" * j,
metadata_decoder=lambda m: m.decode() + "decoded",
id_=j,
)
for j in range(n)
]


class TestSiteContainer(unittest.TestCase, SimpleContainersMixin):
class TestSiteContainer(
unittest.TestCase, SimpleContainersMixin, SimpleContainersWithMetadataMixin
):
def get_instances(self, n):
return [
tskit.Site(
id_=j,
position=j,
ancestral_state="A" * j,
mutations=TestMutationContainer().get_instances(j),
metadata=b"x" * j,
encoded_metadata=b"x" * j,
metadata_decoder=lambda m: m.decode() + "decoded",
)
for j in range(n)
]


class TestMutationContainer(unittest.TestCase, SimpleContainersMixin):
class TestMutationContainer(
unittest.TestCase, SimpleContainersMixin, SimpleContainersWithMetadataMixin
):
def get_instances(self, n):
return [
tskit.Mutation(
Expand All @@ -2483,23 +2632,44 @@ def get_instances(self, n):
node=j,
derived_state="A" * j,
parent=j,
metadata=b"x" * j,
encoded_metadata=b"x" * j,
metadata_decoder=lambda m: m.decode() + "decoded",
)
for j in range(n)
]


class TestMigrationContainer(unittest.TestCase, SimpleContainersMixin):
class TestMigrationContainer(
unittest.TestCase, SimpleContainersMixin, SimpleContainersWithMetadataMixin
):
def get_instances(self, n):
return [
tskit.Migration(left=j, right=j, node=j, source=j, dest=j, time=j)
tskit.Migration(
left=j,
right=j,
node=j,
source=j,
dest=j,
time=j,
encoded_metadata=b"x" * j,
metadata_decoder=lambda m: m.decode() + "decoded",
)
for j in range(n)
]


class TestPopulationContainer(unittest.TestCase, SimpleContainersMixin):
class TestPopulationContainer(
unittest.TestCase, SimpleContainersMixin, SimpleContainersWithMetadataMixin
):
def get_instances(self, n):
return [tskit.Population(id_=j, metadata="x" * j) for j in range(n)]
return [
tskit.Population(
id_=j,
encoded_metadata=b"x" * j,
metadata_decoder=lambda m: m.decode() + "decoded",
)
for j in range(n)
]


class TestProvenanceContainer(unittest.TestCase, SimpleContainersMixin):
Expand Down
82 changes: 82 additions & 0 deletions python/tests/test_lowlevel.py
Original file line number Diff line number Diff line change
Expand Up @@ -445,6 +445,36 @@ def test_mean_descendants(self):
A = ts.mean_descendants([focal[2:], focal[:2]])
self.assertEqual(A.shape, (ts.get_num_nodes(), 2))

def test_metadata_schemas(self):
tables = _tskit.TableCollection(1.0)
metadata_tables = [
"node",
"edge",
"site",
"individual",
"mutation",
"migration",
"population",
]
for table_name in metadata_tables:
table = getattr(tables, f"{table_name}s")
table.metadata_schema = f"{table_name} test metadata schema"
ts = _tskit.TreeSequence()
ts.load_tables(tables)
schemas = ts.get_metadata_schemas()
for table_name in metadata_tables:
self.assertEqual(
getattr(schemas, table_name), f"{table_name} test metadata schema"
)
for table_name in metadata_tables:
table = getattr(tables, f"{table_name}s")
table.metadata_schema = ""
ts = _tskit.TreeSequence()
ts.load_tables(tables)
schemas = ts.get_metadata_schemas()
for table_name in metadata_tables:
self.assertEqual(getattr(schemas, table_name), "")


class StatsInterfaceMixin:
"""
Expand Down Expand Up @@ -2013,6 +2043,58 @@ def test_map_mutations_errors(self):
self.assertRaises(_tskit.LibraryError, tree.map_mutations, genotypes)


class MetadataTestMixin:
tables = [
"nodes",
"edges",
"sites",
"mutations",
"migrations",
"individuals",
"populations",
]


class TestTableMetadataSchema(unittest.TestCase, MetadataTestMixin):
def test_metadata_schema_attribute(self):
tables = _tskit.TableCollection(1.0)
for table in self.tables:
table = getattr(tables, table)
self.assertEqual(table.metadata_schema, "")
example = "An example of metadata schema with unicode 🎄🌳🌴🌲🎋"
table.metadata_schema = example
self.assertEqual(table.metadata_schema, example)
with self.assertRaises(ValueError):
del table.metadata_schema
table.metadata_schema = ""
self.assertEqual(table.metadata_schema, "")
with self.assertRaises(TypeError):
table.metadata_schema = None


class TestMetadataSchemaNamedTuple(unittest.TestCase, MetadataTestMixin):
def test_named_tuple_init(self):
with self.assertRaises(TypeError):
metadata_schemas = _tskit.MetadataSchemas()
with self.assertRaises(TypeError):
metadata_schemas = _tskit.MetadataSchemas([])
with self.assertRaises(TypeError):
metadata_schemas = _tskit.MetadataSchemas(["test_schema"])
metadata_schemas = _tskit.MetadataSchemas(
f"{table}_test_schema" for table in self.tables
)
self.assertEqual(
metadata_schemas, tuple(f"{table}_test_schema" for table in self.tables)
)
for table in self.tables:
self.assertEqual(
getattr(metadata_schemas, table[:-1]), f"{table}_test_schema"
)
for table in self.tables:
with self.assertRaises(AttributeError):
setattr(metadata_schemas, table[:-1], "")


class TestModuleFunctions(unittest.TestCase):
"""
Tests for the module level functions.
Expand Down
6 changes: 6 additions & 0 deletions python/tests/test_metadata.py
Original file line number Diff line number Diff line change
Expand Up @@ -284,3 +284,9 @@ def test_populations(self):
expected = ["mno", ")(*&^%$#@!"]
for a, b in zip(expected, p):
self.assertEqual(a.encode("utf8"), b.metadata)


class TestMetadataSchema(unittest.TestCase):
"""
Tests that use the MetadataSchema Class
"""
Loading