Skip to content

Commit

Permalink
Merge pull request #202 from jonls/duplicates-check
Browse files Browse the repository at this point in the history
Duplicates check
  • Loading branch information
jonls committed Feb 24, 2017
2 parents 86553e5 + 52c6dd3 commit e807aa4
Show file tree
Hide file tree
Showing 4 changed files with 123 additions and 0 deletions.
14 changes: 14 additions & 0 deletions docs/commands.rst
Original file line number Diff line number Diff line change
Expand Up @@ -344,6 +344,20 @@ constraints are imposed when considering whether reactions can take a non-zero
flux. This automatically removes internal flux loops but is also much more
time-consuming.

Reaction duplicates check (``duplicatescheck``)
-----------------------------------------------

This command simply checks whether multiple reactions exist in the model that
have the same or similar reaction equations. By default, this check will ignore
reaction directionality and stoichiometric values when considering whether
reactions are identical. The options ``--compare-direction`` and
``--compare-stoichiometry`` can be used to make the command consider these
properties as well.

.. code-block:: shell
$ psamm-model duplicatescheck
Gap check (``gapcheck``)
------------------------

Expand Down
98 changes: 98 additions & 0 deletions psamm/commands/duplicatescheck.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,98 @@
# This file is part of PSAMM.
#
# PSAMM is free software: you can redistribute it and/or modify
# it under the terms of the GNU General Public License as published by
# the Free Software Foundation, either version 3 of the License, or
# (at your option) any later version.
#
# PSAMM is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
# GNU General Public License for more details.
#
# You should have received a copy of the GNU General Public License
# along with PSAMM. If not, see <http://www.gnu.org/licenses/>.
#
# Copyright 2014-2017 Jon Lund Steffensen <jon_steffensen@uri.edu>

from __future__ import unicode_literals

import logging

from six import itervalues

from ..command import Command

logger = logging.getLogger(__name__)


def reaction_signature(eq, direction=False, stoichiometry=False):
"""Return unique signature object for :class:`Reaction`.
Signature objects are hashable, and compare equal only if the reactions
are considered the same according to the specified rules.
Args:
direction: Include reaction directionality when considering equality.
stoichiometry: Include stoichiometry when considering equality.
"""
def compounds_sig(compounds):
if stoichiometry:
return tuple(sorted(compounds))
else:
return tuple(sorted(compound for compound, _ in compounds))

left = compounds_sig(eq.left)
right = compounds_sig(eq.right)

if left < right:
reaction_sig = left, right
direction_sig = eq.direction
else:
reaction_sig = right, left
direction_sig = eq.direction.flipped()

if direction:
return reaction_sig, direction_sig
return reaction_sig


class DuplicatesCheck(Command):
"""Check for duplicated reactions in the model.
This command reports any reactions in the model that appear more than
once. Stoichiometry and reaction directionality is by default disregarded
when checking for duplicates but can be enabled using the options.
"""

@classmethod
def init_parser(cls, parser):
parser.add_argument(
'--compare-direction', action='store_true',
help='Take reaction directionality into consideration.')
parser.add_argument(
'--compare-stoichiometry', action='store_true',
help='Take stoichiometry into consideration.')
super(DuplicatesCheck, cls).init_parser(parser)

def run(self):
"""Run check for duplicates"""

# Create dictonary of signatures
database_signatures = {}
for entry in self._model.parse_reactions():
signature = reaction_signature(
entry.equation, direction=self._args.compare_direction,
stoichiometry=self._args.compare_stoichiometry)
database_signatures.setdefault(signature, set()).add(
(entry.id, entry.equation, entry.filemark))

for reaction_set in itervalues(database_signatures):
if len(reaction_set) > 1:
print('Found {} duplicate reactions:'.format(
len(reaction_set)))
for reaction, equation, filemark in reaction_set:
result = ' - {}: {}'.format(reaction, equation)
if filemark is not None:
result += ' (found in {})'.format(filemark)
print(result)
10 changes: 10 additions & 0 deletions psamm/tests/test_command.py
Original file line number Diff line number Diff line change
Expand Up @@ -34,6 +34,7 @@
from psamm.datasource.native import NativeModel

from psamm.commands.chargecheck import ChargeBalanceCommand
from psamm.commands.duplicatescheck import DuplicatesCheck
from psamm.commands.excelexport import ExcelExportCommand
from psamm.commands.fastgapfill import FastGapFillCommand
from psamm.commands.fba import FluxBalanceCommand
Expand Down Expand Up @@ -239,6 +240,15 @@ def test_invoke_version(self):
def test_run_chargecheck(self):
self.run_command(ChargeBalanceCommand)

def test_run_duplicatescheck(self):
self.run_command(DuplicatesCheck)

def test_run_duplicatescheck_compare_stoichiometry(self):
self.run_command(DuplicatesCheck, ['--compare-stoichiometry'])

def test_run_duplicatescheck_compare_direction(self):
self.run_command(DuplicatesCheck, ['--compare-direction'])

def test_run_excelexport(self):
dest = tempfile.mkdtemp()
try:
Expand Down
1 change: 1 addition & 0 deletions setup.py
Original file line number Diff line number Diff line change
Expand Up @@ -52,6 +52,7 @@
'psamm.commands': [
'chargecheck = psamm.commands.chargecheck:ChargeBalanceCommand',
'console = psamm.commands.console:ConsoleCommand',
'duplicatescheck = psamm.commands.duplicatescheck:DuplicatesCheck',
'excelexport = psamm.commands.excelexport:ExcelExportCommand',
'fastgapfill = psamm.commands.fastgapfill:FastGapFillCommand',
'fba = psamm.commands.fba:FluxBalanceCommand',
Expand Down

0 comments on commit e807aa4

Please sign in to comment.