From 34bc9e764db91f47b4ae0bed9396237cb15a3222 Mon Sep 17 00:00:00 2001 From: Ben Jeffery Date: Mon, 1 Feb 2021 15:58:04 +0000 Subject: [PATCH] Add docs for individual parents --- docs/data-model.rst | 21 +++++++++++++-------- python/tskit/tables.py | 25 ++++++++++++++++++++++++- python/tskit/trees.py | 5 ++++- 3 files changed, 41 insertions(+), 10 deletions(-) diff --git a/docs/data-model.rst b/docs/data-model.rst index 3a96673d44..5a18bf58b1 100644 --- a/docs/data-model.rst +++ b/docs/data-model.rst @@ -259,6 +259,7 @@ Column Type Description ================ ============== =========== flags uint32 Bitwise flags. location double Location in arbitrary dimensions +parents int32 Ids of parent individuals metadata binary Individual :ref:`sec_metadata_definition` ================ ============== =========== @@ -281,6 +282,10 @@ so different individuals can have locations with different dimensions (i.e., one individual may have location ``[]`` and another ``[0, 1, 0]``. This could therefore be used to store other quantities (e.g., phenotype). +The ``parents`` column stores the ids of other individuals that are the parents of +an individual. This column is :ref:`ragged ` such that an +individual can have any number of parents. + The ``metadata`` column provides a location for client code to store information about each individual. See the :ref:`sec_metadata_definition` section for more details on how metadata columns should be used. @@ -1078,26 +1083,26 @@ Individual text format ====================== The individual text format must contain a ``flags`` column. -Optionally, there may also be a ``location`` and +Optionally, there may also be ``location``, ``parents`` and ``metadata`` columns. See the :ref:`individual table definitions ` for details on these columns. Note that there are currently no globally defined ``flags``, but the column is still required; a value of ``0`` means that there are no flags set. -The ``location`` column should be a sequence of comma-separated numeric +The ``location`` and ``parents`` columns should be a sequence of comma-separated numeric values. They do not all have to be the same length. An example individual table:: - flags location - 0 0.5,1.2 - 0 1.0,3.4 + flags location parents + 0 0.5,1.2 -1, -1 + 0 1.0,3.4 0, -1 0 0 1.2 - 0 3.5,6.3 - 0 0.5,0.5 - 0 0.5 + 0 3.5,6.3 1,2 + 0 0.5,0.5 3,4 + 0 0.5 -1,-1 0 0.7,0.6,0.0 0 0.5,0.0 diff --git a/python/tskit/tables.py b/python/tskit/tables.py index a6974e09d3..4c3beca28f 100644 --- a/python/tskit/tables.py +++ b/python/tskit/tables.py @@ -457,6 +457,12 @@ class IndividualTable(BaseTable, MetadataMixin): :ivar location_offset: The array of offsets into the location column. See :ref:`sec_encoding_ragged_columns` for more details. :vartype location_offset: numpy.ndarray, dtype=np.uint32 + :ivar parents: The flattened array of parent individual ids. See + :ref:`sec_encoding_ragged_columns` for more details. + :vartype parents: numpy.ndarray, dtype=np.int32 + :ivar parents_offset: The array of offsets into the parents column. See + :ref:`sec_encoding_ragged_columns` for more details. + :vartype parents_offset: numpy.ndarray, dtype=np.uint32 :ivar metadata: The flattened array of binary metadata values. See :ref:`sec_tables_api_binary_columns` for more details. :vartype metadata: numpy.ndarray, dtype=np.int8 @@ -522,6 +528,8 @@ def add_row(self, flags=0, location=None, parents=None, metadata=None): :param array-like location: A list of numeric values or one-dimensional numpy array describing the location of this individual. If not specified or None, a zero-dimensional location is stored. + :param array-like parents: A list or array of ids of parent individuals. If not + specified an empty array is stored. :param object metadata: Any object that is valid metadata for the table's schema. Defaults to the default metadata value for the table's schema. This is typically ``{}``. For no schema, ``None``. @@ -555,6 +563,8 @@ def set_columns( the table will contain. The ``location`` and ``location_offset`` parameters must be supplied together, and meet the requirements for :ref:`sec_encoding_ragged_columns`. + The ``parents`` and ``parents_offset`` parameters must be supplied + together, and meet the requirements for :ref:`sec_encoding_ragged_columns`. The ``metadata`` and ``metadata_offset`` parameters must be supplied together, and meet the requirements for :ref:`sec_encoding_ragged_columns`. See :ref:`sec_tables_api_binary_columns` for more information and @@ -568,6 +578,12 @@ def set_columns( :type location: numpy.ndarray, dtype=np.float64 :param location_offset: The offsets into the ``location`` array. :type location_offset: numpy.ndarray, dtype=np.uint32. + :param parents: The flattened parents array. Must be specified along + with ``parents_offset``. If not specified or None, an empty parents array + is stored for each individual. + :type parents: numpy.ndarray, dtype=np.int32 + :param parents_offset: The offsets into the ``parents`` array. + :type parents_offset: numpy.ndarray, dtype=np.uint32. :param metadata: The flattened metadata array. Must be specified along with ``metadata_offset``. If not specified or None, an empty metadata value is stored for each individual. @@ -606,6 +622,8 @@ def append_columns( The ``flags`` array is mandatory and defines the number of extra individuals to add to the table. + The ``parents`` and ``parents_offset`` parameters must be supplied + together, and meet the requirements for :ref:`sec_encoding_ragged_columns`. The ``location`` and ``location_offset`` parameters must be supplied together, and meet the requirements for :ref:`sec_encoding_ragged_columns`. The ``metadata`` and ``metadata_offset`` parameters must be supplied @@ -624,6 +642,11 @@ def append_columns( :param metadata: The flattened metadata array. Must be specified along with ``metadata_offset``. If not specified or None, an empty metadata value is stored for each individual. + :param parents: The flattened parents array. Must be specified along + with ``parents_offset``. If not specified or None, an empty parents array + is stored for each individual. + :type parents: numpy.ndarray, dtype=np.int32 + :param parents_offset: The offsets into the ``parents`` array. :type metadata: numpy.ndarray, dtype=np.int8 :param metadata_offset: The offsets into the ``metadata`` array. :type metadata_offset: numpy.ndarray, dtype=np.uint32. @@ -663,7 +686,7 @@ def packset_parents(self, parents): must be equal to the number of rows in the table. :param list parents: A list of list of parent ids, interpreted as numpy int32 - arrays + arrays. """ packed, offset = util.pack_arrays(parents, np.int32) d = self.asdict() diff --git a/python/tskit/trees.py b/python/tskit/trees.py index 879f5d82a6..7f22a7c162 100644 --- a/python/tskit/trees.py +++ b/python/tskit/trees.py @@ -147,10 +147,13 @@ class Individual(SimpleContainerWithMetadata): :ivar location: The spatial location of this individual as a numpy array. The location is an empty array if no spatial location is defined. :vartype location: numpy.ndarray + :ivar parents: The parent individual ids of this individual as a numpy array. The + parents is an empty array if no parents are defined. + :vartype parents: numpy.ndarray :ivar nodes: The IDs of the nodes that are associated with this individual as a numpy array (dtype=np.int32). If no nodes are associated with the individual this array will be empty. - :vartype location: numpy.ndarray + :vartype nodes: numpy.ndarray :ivar metadata: The decoded :ref:`metadata ` for this individual. :vartype metadata: object