Skip to content

Commit

Permalink
Merge pull request #214 from jonls/unique-id
Browse files Browse the repository at this point in the history
Create unique IDs for exchange/transport reactions
  • Loading branch information
jonls committed Mar 2, 2017
2 parents ee2e9a6 + 79a00e0 commit 8a1b88c
Show file tree
Hide file tree
Showing 4 changed files with 90 additions and 74 deletions.
48 changes: 15 additions & 33 deletions psamm/datasource/sbml.py
Original file line number Diff line number Diff line change
Expand Up @@ -13,9 +13,9 @@
# You should have received a copy of the GNU General Public License
# along with PSAMM. If not, see <http://www.gnu.org/licenses/>.
#
# Copyright 2014-2016 Jon Lund Steffensen <jon_steffensen@uri.edu>
# Copyright 2014-2017 Jon Lund Steffensen <jon_steffensen@uri.edu>

"""Parser for SBML model files"""
"""Parser for SBML model files."""

from __future__ import unicode_literals

Expand All @@ -33,6 +33,7 @@
from .entry import (CompoundEntry as BaseCompoundEntry,
ReactionEntry as BaseReactionEntry)
from ..reaction import Reaction, Compound, Direction
from ..metabolicmodel import create_exchange_id
from ..expression.boolean import Expression, And, Or, Variable
from .. import util

Expand Down Expand Up @@ -703,24 +704,6 @@ def _make_safe_id(self, id):
id = re.sub(r'[^a-zA-Z0-9_]', '', id)
return id

def _create_unique_id(self, in_dict, orig_id):
"""Creates and returns a unique ID for reaction or compound.
Looks at the ID the thing would have and checks if it is a duplicate
of another in the dictionary and makes a new ID if so.
"""

if orig_id in in_dict:
suffix = 1
while True:
new_id = '{}_{}'.format(
orig_id, suffix)
if new_id not in in_dict:
orig_id = new_id
break
suffix += 1
return orig_id

def _get_flux_bounds(self, r_id, model, flux_limits, equation):
"""Read reaction's limits to set up strings for limits in the output file.
"""
Expand Down Expand Up @@ -768,8 +751,8 @@ def _add_gene_associations(self, r_id, r_genes, gene_ids, r_tag):
gene_tag = ET.SubElement(parent, _tag(
'geneProductRef', FBC_V2))
if current.symbol not in gene_ids:
id = 'g_' + self._create_unique_id(
gene_ids, self._make_safe_id(current.symbol))
id = 'g_' + util.create_unique_id(
self._make_safe_id(current.symbol), gene_ids)
gene_ids[id] = current.symbol
gene_tag.set(_tag('geneProduct', FBC_V2), id)
if isinstance(current, (Or, And)):
Expand Down Expand Up @@ -856,8 +839,8 @@ def write_model(self, file, model, pretty=False):
r.id not in model_reactions):
continue

reaction_id = self._create_unique_id(
reaction_properties, self._make_safe_id(r.id))
reaction_id = util.create_unique_id(
self._make_safe_id(r.id), reaction_properties)
if r.id == model.biomass_reaction:
biomass_id = reaction_id

Expand All @@ -869,10 +852,9 @@ def write_model(self, file, model, pretty=False):
for compound, reaction_id, lower, upper in model.parse_medium():
# Create exchange reaction
if reaction_id is None:
reaction_id = 'EX_{}_{}'.format(
compound.name, compound.compartment)
reaction_id = self._create_unique_id(
reaction_properties, self._make_safe_id(reaction_id))
reaction_id = create_exchange_id(reaction_properties, compound)
reaction_id = util.create_unique_id(
self._make_safe_id(reaction_id), reaction_properties)

reaction_properties[reaction_id] = {
'id': reaction_id,
Expand Down Expand Up @@ -921,15 +903,15 @@ def write_model(self, file, model, pretty=False):
'id': compound.name
}

compound_id = self._create_unique_id(
species_ids, self._make_safe_id(compound.name))
compound_id = util.create_unique_id(
self._make_safe_id(compound.name), species_ids)
model_species[compound] = compound_id
species_ids.add(compound_id)
if compound.compartment not in model_compartments:
model_compartments[
compound.compartment] = 'C_' + self._create_unique_id(
model_compartments, self._make_safe_id(
compound.compartment))
compound.compartment] = 'C_' + util.create_unique_id(
self._make_safe_id(compound.compartment),
model_compartments)

# Create list of compartments
compartments = ET.SubElement(
Expand Down
62 changes: 39 additions & 23 deletions psamm/metabolicmodel.py
Original file line number Diff line number Diff line change
Expand Up @@ -13,7 +13,7 @@
# You should have received a copy of the GNU General Public License
# along with PSAMM. If not, see <http://www.gnu.org/licenses/>.
#
# Copyright 2014-2015 Jon Lund Steffensen <jon_steffensen@uri.edu>
# Copyright 2014-2017 Jon Lund Steffensen <jon_steffensen@uri.edu>

"""Representation of metabolic network models."""

Expand All @@ -23,6 +23,18 @@

from .database import MetabolicDatabase, StoichiometricMatrixView
from .reaction import Reaction, Direction
from .util import create_unique_id


def create_exchange_id(existing_ids, compound):
"""Create unique ID for exchange of compound."""
return create_unique_id('EX_{}'.format(compound), existing_ids)


def create_transport_id(existing_ids, compound_1, compound_2):
"""Create unique ID for transport reaction of compounds."""
return create_unique_id(
'TP_{}_{}'.format(compound_1, compound_2), existing_ids)


class FluxBounds(object):
Expand Down Expand Up @@ -282,23 +294,25 @@ def add_all_exchange_reactions(self, compartment, allow_duplicates=False):
all_reactions[rx] = rxnid

added = set()
added_compounds = set()
initial_compounds = set(self.compounds)
for model_compound in initial_compounds:
compound = model_compound.in_compartment(compartment)
rxnid_ex = ('rxnex', compound)
if rxnid_ex in added:
if compound in added_compounds:
continue

if not self._database.has_reaction(rxnid_ex):
reaction_ex = Reaction(Direction.Both, {compound: -1})
if reaction_ex not in all_reactions:
self._database.set_reaction(rxnid_ex, reaction_ex)
else:
rxnid_ex = all_reactions[reaction_ex]
rxnid_ex = create_exchange_id(self._database.reactions, compound)

reaction_ex = Reaction(Direction.Both, {compound: -1})
if reaction_ex not in all_reactions:
self._database.set_reaction(rxnid_ex, reaction_ex)
else:
rxnid_ex = all_reactions[reaction_ex]

if rxnid_ex not in self._reaction_set:
added.add(rxnid_ex)
self.add_reaction(rxnid_ex)
added_compounds.add(compound)

return added

Expand Down Expand Up @@ -331,30 +345,32 @@ def add_all_transport_reactions(self, boundaries, allow_duplicates=False):
boundary_pairs.add(tuple(sorted((source, dest))))

added = set()
added_pairs = set()
initial_compounds = set(self.compounds)
for compound in initial_compounds:
for c1, c2 in boundary_pairs:
compound1 = compound.in_compartment(c1)
compound2 = compound.in_compartment(c2)
pair = compound1, compound2

rxnid_tp = ('rxntp',) + pair
if rxnid_tp in added:
if pair in added_pairs:
continue

if not self._database.has_reaction(rxnid_tp):
reaction_tp = Reaction(Direction.Both, {
compound1: -1,
compound2: 1
})
if reaction_tp not in all_reactions:
self._database.set_reaction(rxnid_tp, reaction_tp)
else:
rxnid_tp = all_reactions[reaction_tp]
rxnid_tp = create_transport_id(
self._database.reactions, compound1, compound2)

reaction_tp = Reaction(Direction.Both, {
compound1: -1,
compound2: 1
})
if reaction_tp not in all_reactions:
self._database.set_reaction(rxnid_tp, reaction_tp)
else:
rxnid_tp = all_reactions[reaction_tp]

if rxnid_tp not in self._reaction_set:
added.add(rxnid_tp)
self.add_reaction(rxnid_tp)
added_pairs.add(pair)

return added

Expand Down Expand Up @@ -402,8 +418,8 @@ def load_model(cls, database, reaction_iter=None, medium=None, limits=None,
for compound, reaction_id, lower, upper in medium:
# Create exchange reaction
if reaction_id is None:
reaction_id = 'EX_{}_{}'.format(
compound.name, compound.compartment)
reaction_id = create_exchange_id(
model.database.reactions, compound)
model.database.set_reaction(
reaction_id, Reaction(Direction.Both, {compound: -1}))
model.add_reaction(reaction_id)
Expand Down
34 changes: 17 additions & 17 deletions psamm/tests/test_fastgapfill.py
Original file line number Diff line number Diff line change
Expand Up @@ -13,7 +13,7 @@
# You should have received a copy of the GNU General Public License
# along with PSAMM. If not, see <http://www.gnu.org/licenses/>.
#
# Copyright 2016 Jon Lund Steffensen <jon_steffensen@uri.edu>
# Copyright 2016-2017 Jon Lund Steffensen <jon_steffensen@uri.edu>
# Copyright 2016 Chao liu <lcddzyx@gmail.com>

import os
Expand Down Expand Up @@ -91,27 +91,27 @@ def test_create_model_extended(self):
'rxn_4',
'rxn_5',
'rxn_6',
('rxntp', Compound('A', 'c'), Compound('A', 'e')),
('rxntp', Compound('B', 'c'), Compound('B', 'e')),
('rxntp', Compound('C', 'c'), Compound('C', 'e')),
('rxntp', Compound('D', 'c'), Compound('D', 'e')),
('rxnex', Compound('A', 'e')),
('rxnex', Compound('B', 'e')),
('rxnex', Compound('C', 'e')),
('rxnex', Compound('D', 'e'))
'TP_A[c]_A[e]',
'TP_B[c]_B[e]',
'TP_C[c]_C[e]',
'TP_D[c]_D[e]',
'EX_A[e]',
'EX_B[e]',
'EX_C[e]',
'EX_D[e]'
])

expected_weights = {
'rxn_3': 5.6,
'rxn_4': 1.0,
('rxntp', Compound('A', 'c'), Compound('A', 'e')): 3.0,
('rxntp', Compound('B', 'c'), Compound('B', 'e')): 3.0,
('rxntp', Compound('C', 'c'), Compound('C', 'e')): 3.0,
('rxntp', Compound('D', 'c'), Compound('D', 'e')): 3.0,
('rxnex', Compound('A', 'e')): 2.0,
('rxnex', Compound('B', 'e')): 2.0,
('rxnex', Compound('C', 'e')): 2.0,
('rxnex', Compound('D', 'e')): 2.0
'TP_A[c]_A[e]': 3.0,
'TP_B[c]_B[e]': 3.0,
'TP_C[c]_C[e]': 3.0,
'TP_D[c]_D[e]': 3.0,
'EX_A[e]': 2.0,
'EX_B[e]': 2.0,
'EX_C[e]': 2.0,
'EX_D[e]': 2.0
}
penalties = {'rxn_3': 5.6}

Expand Down
20 changes: 19 additions & 1 deletion psamm/util.py
Original file line number Diff line number Diff line change
Expand Up @@ -13,7 +13,7 @@
# You should have received a copy of the GNU General Public License
# along with PSAMM. If not, see <http://www.gnu.org/licenses/>.
#
# Copyright 2014-2016 Jon Lund Steffensen <jon_steffensen@uri.edu>
# Copyright 2014-2017 Jon Lund Steffensen <jon_steffensen@uri.edu>

"""Various utilities."""

Expand Down Expand Up @@ -164,6 +164,24 @@ def __len__(self):
return len(self.__d)


def create_unique_id(prefix, existing_ids):
"""Return a unique string ID from the prefix.
First check if the prefix is itself a unique ID in the set-like parameter
existing_ids. If not, try integers in ascending order appended to the
prefix until a unique ID is found.
"""
if prefix in existing_ids:
suffix = 1
while True:
new_id = '{}_{}'.format(prefix, suffix)
if new_id not in existing_ids:
return new_id
suffix += 1

return prefix


def git_try_describe(repo_path):
"""Try to describe the current commit of a Git repository.
Expand Down

0 comments on commit 8a1b88c

Please sign in to comment.