In [1]:
# Manually update annotation and compare with original results
# First, use Biomodels 248? 

import libsbml
import os
import pickle
import sys

PROJ_DIR = "/Users/woosubs/Desktop/AutomateAnnotation/AnnotationRecommender/"
MOD_DIR = os.path.join(PROJ_DIR, "annotation_recommender")
sys.path.append(MOD_DIR)
BASE_DIR = '/Users/woosubs/Desktop/AutomateAnnotation/'
DATA_DIR = os.path.join(BASE_DIR, "DATA")
ALGO_DIR = os.path.join(DATA_DIR, "algo")
BIOMODEL_DIR = os.path.join(DATA_DIR, "biomodels/curated_biomodels_31mar2021")
BIGG_DIR = '/Users/woosubs/Desktop/AutomateAnnotation/DATA/bigg'
ecoli_fpath = os.path.join(BIGG_DIR, "e_coli_core.xml")

from annotation_recommender import species_annotation as sa
from annotation_recommender import reaction_annotation as ra
from annotation_recommender import constants as cn
from annotation_recommender import iterator as it
from annotation_recommender import tools

# load reference matrix
with open(os.path.join(ALGO_DIR, 'binary_ref_df.pickle'), 'rb') as handle:
    ref_mat = pickle.load(handle)
# check its shape
print(ref_mat.shape)

(13651, 3790)


In [2]:
# Create SpeciesAnnotation & ReactionAnnotation class instances
biomd = 'BIOMD0000000248.xml'
biomd_fpath = os.path.join(BIOMODEL_DIR, biomd)
species_an = sa.SpeciesAnnotation(libsbml_fpath=biomd_fpath)
reaction_an = ra.ReactionAnnotation(libsbml_fpath=biomd_fpath)

In [3]:
# Identify species IDs to predict
one_rid ='CreatineKinase'
comps = reaction_an.getReactionComponents(one_rid)
print("Species associated with reaction %s:" % one_rid, comps)

Species associated with reaction CreatineKinase: ['ATP', 'Cr', 'ADP', 'PCr']


In [4]:
# Original naming of species;
orig_species = {val:species_an.model.getSpecies(val).name for val in comps}
print(orig_species)
orig_spec_pred = species_an.predictAnnotationByName(specnames_dict=orig_species,
                                                    update=True)

{'ATP': 'ATP', 'Cr': 'Cr', 'ADP': 'ADP', 'PCr': 'PCr'}


In [14]:
species_an.match_score

{'Cr': 1.0, 'ADP': 1.0, 'ATP': 1.0, 'PCr': 1.0}

In [74]:
'Cr' in ref_mat.columns

True

In [15]:
ref_mat['C10N5O10P2'].corr(ref_mat['C10N5O13P3'])

0.720237034722825

In [7]:
ref_mat['Cr'].corr(ref_mat['C10N5O10P2'])

-0.0031368368954884074

In [8]:
pred_reaction = reaction_an.predictAnnotation(inp_spec_dict=species_an.formula,
                                              inp_reac_list=[one_rid])
print(reaction_an.candidates)
print(reaction_an.match_score)

{'CreatineKinase': Index(['RHEA:11603', 'RHEA:12840', 'RHEA:23455', 'RHEA:24677', 'RHEA:29806',
       'RHEA:29818', 'RHEA:32786', 'RHEA:38586', 'RHEA:44347', 'RHEA:45083',
       'RHEA:59899', 'RHEA:66275'],
      dtype='object')}
{'CreatineKinase': {'RHEA:11603': 0.75, 'RHEA:12840': 0.6, 'RHEA:23455': 0.6, 'RHEA:24677': 0.6, 'RHEA:29806': 0.5, 'RHEA:29818': 0.5, 'RHEA:32786': 0.5, 'RHEA:38586': 0.5, 'RHEA:44347': 0.42857142857142855, 'RHEA:45083': 0.42857142857142855, 'RHEA:59899': 0.42857142857142855, 'RHEA:66275': 0.5}}


In [9]:
reaction_an.getAccuracy()

0.0

In [10]:
upd_species = {'Cr': 'Creatine',
               'ADP': 'ADP',
               'ATP': 'ATP',
               'PCr': 'Phosphocreatine'}
upd_spec_pred = species_an.predictAnnotationByName(specnames_dict=upd_species,
                                                    update=True)

In [11]:
species_an.getAccuracy()

1.0

In [12]:
pred_reaction = reaction_an.predictAnnotation(inp_spec_dict=species_an.formula,
                                              inp_reac_list=[one_rid])
print(reaction_an.candidates)
print(reaction_an.match_score)

{'CreatineKinase': Index(['RHEA:17160'], dtype='object')}
{'CreatineKinase': {'RHEA:17160': 0.8}}


In [13]:
reaction_an.getAccuracy()

1.0

In [42]:
# Compared to the above, model is 691 (COPASI) is...
biomd = 'BIOMD0000000691.xml'
biomd_fpath = os.path.join(BIOMODEL_DIR, biomd)
reader = libsbml.SBMLReader()
document = reader.readSBML(biomd_fpath)
model = document.getModel()

In [43]:
print([val.getId() for val in model.getListOfSpecies()])
print()
print([val.name for val in model.getListOfSpecies()])

['S1__Cell_1_', 'S1__Cell_2_', 'S2__Cell_1_', 'S2__Cell_2_', 'S3__Cell_1_', 'S3__Cell_2_', 'S4__Cell_1_', 'S4__Cell_2_', 'N2__Cell_1_', 'N2__Cell_2_', 'A3__Cell_1_', 'A3__Cell_2_', 'S4_ex', 'A', 'N', 'N1__Cell_1_', 'N1__Cell_2_', 'A2__Cell_1_', 'A2__Cell_2_']

['S1', 'S1', 'S2', 'S2', 'S3', 'S3', 'S4', 'S4', 'N2', 'N2', 'A3', 'A3', 'S4_ex', 'A', 'N', 'N1', 'N1', 'A2', 'A2']


In [51]:
orig_spec_pred = species_an.predictAnnotationByName(inp_spec_list=['S1__Cell_1_'])

In [53]:
species_an.match_score

{'S1__Cell_1_': 0.5}

In [55]:
# feedback on species names? asking user to be more descriptive (asking for hints)? (system-user interaction/dialogue)

# 0. 'Matcher' class, adjust so that bqbio:is and bqbiol:isVersionOf can be both extracted
# 1. Collect information of results of both species & reactions
# 2. Update algorithm, identify suspicious names(cases) using multi-correlation and/or match score
# 3. create reports/json output, begin to use GUI tool of python