In [1]:
# create species annotation class
import numpy as np
import os
import pickle
import pandas as pd
import sys
PROJ_DIR = "/Users/woosubs/Desktop/AutomateAnnotation/AnnotationRecommender/"
MOD_DIR = os.path.join(PROJ_DIR, "annotation_recommender")
sys.path.append(MOD_DIR)

BIOMD_12 = 'BIOMD0000000012.xml'
BASE_DIR = '/Users/woosubs/Desktop/AutomateAnnotation/'
DATA_DIR = os.path.join(BASE_DIR, "DATA")
ALGO_DIR = os.path.join(DATA_DIR, "algo")
CHEBI_DIR = os.path.join(DATA_DIR, "chebi")
RHEA_DIR = os.path.join(DATA_DIR, "rhea")
BIOMODEL_DIR = os.path.join(DATA_DIR, "biomodels/curated_biomodels_31mar2021")
BIGG_DIR = '/Users/woosubs/Desktop/AutomateAnnotation/DATA/bigg'
ecoli_fpath = os.path.join(BIGG_DIR, "e_coli_core.xml")


from annotation_recommender import species_annotation as sa
from annotation_recommender import reaction_annotation as ra
from annotation_recommender import constants as cn
from annotation_recommender import iterator as it
from annotation_recommender import tools

# chebi to shortened formula
with open(os.path.join(CHEBI_DIR, 'chebi_shortened_formula_30apr2022.pickle'), 'rb') as f:
  ref_shortened_chebi_to_formula = pickle.load(f)
# shortened formula to chebi
with open(os.path.join(CHEBI_DIR, 'shortened_formula_to_chebis_20jul2022.pickle'), 'rb') as f:
  ref_shortened_formula_to_chebi = pickle.load(f)

with open(os.path.join(CHEBI_DIR, 'chebi_synonyms.pickle'), 'rb') as f:
  chebi_synonyms = pickle.load(f)
chebi_low_synonyms = dict()
for one_k in chebi_synonyms.keys():
  chebi_low_synonyms[one_k] = list(set([val.lower() for val in chebi_synonyms[one_k]]))

with open(os.path.join(RHEA_DIR, 'kegg2rhea_master.pickle'), 'rb') as handle:
  ref_kegg2rhea_master = pickle.load(handle)
with open(os.path.join(RHEA_DIR, 'kegg2rhea_bi.pickle'), 'rb') as handle:
  ref_kegg2rhea_bi = pickle.load(handle)

# load reference matrix
with open(os.path.join(ALGO_DIR, 'binary_ref_df.pickle'), 'rb') as handle:
    ref_mat = pickle.load(handle)
# check its shape
print(ref_mat.shape)

(13651, 3790)


In [2]:
specanot = sa.SpeciesAnnotation(libsbml_fpath=ecoli_fpath)
reacanot = ra.ReactionAnnotation(libsbml_fpath=ecoli_fpath)

spec_pred_annotations = specanot.predictAnnotationByName(inp_spec_list=None)
reac_pred_annotations = reacanot.predictAnnotation(inp_spec_dict=specanot.formula,
                                                   inp_reac_list=None,
                                                   inp_ref_mat=ref_mat,
                                                   update=True)

In [3]:
res = it.iterateAndGetUpdatedResults(spec_cl=specanot,
                                     reac_cl=reacanot,
                                     num_iter=10,
                                     show_message=True)

Initial match score: 82.94
*************************
Iteration 1
Updated match score: 89.97
*************************
Iteration 2
Updated match score: 90.17
*************************
Iteration 3
Updated match score: 90.17
Score not increasing. Quitting iteration...

Calculation finished.


In [4]:
print(res['sum_match_score'])

90.175


## Evaluate BioModels :) 

In [2]:
# Try one biomodel
one_biomd = 'BIOMD0000000691.xml'
one_biomd_fpath = os.path.join(BIOMODEL_DIR, one_biomd)
species_an = sa.SpeciesAnnotation(libsbml_fpath=one_biomd_fpath)
reaction_an = ra.ReactionAnnotation(libsbml_fpath=one_biomd_fpath)
pred_species = species_an.predictAnnotationByName()
pred_reaction = reaction_an.predictAnnotation(inp_spec_dict=species_an.formula)

In [3]:
res = it.iterateAndGetUpdatedResults(spec_cl=species_an,
                                     reac_cl=reaction_an,
                                     num_iter=10,
                                     show_message=True)

Initial match score: 7.89
*************************
Iteration 1
Updated match score: 8.52
*************************
Iteration 2
Updated match score: 8.52
Score not increasing. Quitting iteration...

Calculation finished.


In [None]:
# Update annotations using res, and compute accuracy; Repeat for all biomodels

In [5]:
res.keys()

dict_keys(['candidates', 'spec_formula', 'spec_chebi2update', 'sum_match_score', 'rep'])

In [6]:
res['candidates']

{'v1_cell_1': Index(['RHEA:23559'], dtype='object'),
 'v1_cell_2': Index(['RHEA:23559'], dtype='object'),
 'v2_cell_1': Index(['RHEA:11255', 'RHEA:11931', 'RHEA:33314', 'RHEA:49819'], dtype='object'),
 'v2_cell_2': Index(['RHEA:11255', 'RHEA:11931', 'RHEA:33314', 'RHEA:49819'], dtype='object'),
 'v3_cell_1': Index(['RHEA:10931', 'RHEA:11227', 'RHEA:11635', 'RHEA:11999', 'RHEA:12680',
        'RHEA:13472', 'RHEA:13924', 'RHEA:14416', 'RHEA:16712', 'RHEA:17152',
        ...
        'RHEA:65527', 'RHEA:65595', 'RHEA:65599', 'RHEA:65947', 'RHEA:65951',
        'RHEA:66691', 'RHEA:66875', 'RHEA:66891', 'RHEA:66895', 'RHEA:67035'],
       dtype='object', length=193),
 'v3_cell_2': Index(['RHEA:10931', 'RHEA:11227', 'RHEA:11635', 'RHEA:11999', 'RHEA:12680',
        'RHEA:13472', 'RHEA:13924', 'RHEA:14416', 'RHEA:16712', 'RHEA:17152',
        ...
        'RHEA:65527', 'RHEA:65595', 'RHEA:65599', 'RHEA:65947', 'RHEA:65951',
        'RHEA:66691', 'RHEA:66875', 'RHEA:66891', 'RHEA:66895', 'RHEA:6