In [1]:
# create species annotation class
import numpy as np
import os
import pickle
import pandas as pd
import sys
PROJ_DIR = "/Users/woosubs/Desktop/AutomateAnnotation/AnnotationRecommender/"
MOD_DIR = os.path.join(PROJ_DIR, "annotation_recommender")
sys.path.append(MOD_DIR)

BIOMD_12 = 'BIOMD0000000012.xml'
BASE_DIR = '/Users/woosubs/Desktop/AutomateAnnotation/'
DATA_DIR = os.path.join(BASE_DIR, "DATA")
ALGO_DIR = os.path.join(DATA_DIR, "algo")
CHEBI_DIR = os.path.join(DATA_DIR, "chebi")
RHEA_DIR = os.path.join(DATA_DIR, "rhea")
BIOMODEL_DIR = os.path.join(DATA_DIR, "biomodels/curated_biomodels_31mar2021")
BIGG_DIR = '/Users/woosubs/Desktop/AutomateAnnotation/DATA/bigg'
ecoli_fpath = os.path.join(BIGG_DIR, "e_coli_core.xml")


from annotation_recommender import species_annotation as sa
from annotation_recommender import reaction_annotation as ra
from annotation_recommender import constants as cn

# chebi to shortened formula
with open(os.path.join(CHEBI_DIR, 'chebi_shortened_formula_30apr2022.pickle'), 'rb') as f:
  ref_shortened_chebi_to_formula = pickle.load(f)
# shortened formula to chebi
with open(os.path.join(CHEBI_DIR, 'shortened_formula_to_chebis_20jul2022.pickle'), 'rb') as f:
  ref_shortened_formula_to_chebi = pickle.load(f)

with open(os.path.join(CHEBI_DIR, 'chebi_synonyms.pickle'), 'rb') as f:
  chebi_synonyms = pickle.load(f)
chebi_low_synonyms = dict()
for one_k in chebi_synonyms.keys():
  chebi_low_synonyms[one_k] = list(set([val.lower() for val in chebi_synonyms[one_k]]))

with open(os.path.join(RHEA_DIR, 'kegg2rhea_master.pickle'), 'rb') as handle:
  ref_kegg2rhea_master = pickle.load(handle)
with open(os.path.join(RHEA_DIR, 'kegg2rhea_bi.pickle'), 'rb') as handle:
  ref_kegg2rhea_bi = pickle.load(handle)

# load reference matrix
with open(os.path.join(ALGO_DIR, 'binary_ref_df.pickle'), 'rb') as handle:
    ref_mat = pickle.load(handle)
# check its shape
print(ref_mat.shape)

(13651, 3790)


In [2]:
specanot = sa.SpeciesAnnotation(libsbml_fpath=ecoli_fpath)
reacanot = ra.ReactionAnnotation(libsbml_fpath=ecoli_fpath)

In [3]:
print(reacanot.exist_annotation)
print([val.getId() for val in specanot.model.getListOfSpecies()])

{'R_PFK': ['RHEA:16112'], 'R_PFL': ['RHEA:11847'], 'R_PGK': ['RHEA:14804'], 'R_PGL': ['RHEA:12559'], 'R_ACALD': ['RHEA:23291'], 'R_AKGt2r': ['RHEA:29014'], 'R_PGM': ['RHEA:15904'], 'R_PIt2r': ['RHEA:29942'], 'R_ALCD2x': ['RHEA:25293'], 'R_ACKr': ['RHEA:11355'], 'R_PPC': ['RHEA:23075'], 'R_ACONTa': ['RHEA:10231'], 'R_ACONTb': ['RHEA:22147'], 'R_ATPM': ['RHEA:13068', 'RHEA:20855'], 'R_PPCK': ['RHEA:18620'], 'R_PPS': ['RHEA:11367'], 'R_ADK1': ['RHEA:12976'], 'R_AKGDH': ['RHEA:27789'], 'R_PTAr': ['RHEA:19524'], 'R_PYK': ['RHEA:18160'], 'R_RPE': ['RHEA:13680'], 'R_CS': ['RHEA:16848'], 'R_SUCCt2_2': ['RHEA:29306'], 'R_ENO': ['RHEA:10167'], 'R_SUCDi': ['RHEA:29190'], 'R_SUCOAS': ['RHEA:17664'], 'R_TALA': ['RHEA:17056'], 'R_TKT2': ['RHEA:27629'], 'R_TPI': ['RHEA:18588'], 'R_EX_ac_e': ['RHEA:27817'], 'R_EX_etoh_e': ['RHEA:35270'], 'R_EX_for_e': ['RHEA:29682'], 'R_EX_h_e': ['RHEA:34982'], 'R_EX_h2o_e': ['RHEA:29670'], 'R_EX_nh4_e': ['RHEA:28750'], 'R_EX_pi_e': ['RHEA:32826'], 'R_FBA': ['RHEA:147

In [4]:
one_comps = reacanot.getReactionComponents('R_PFK')
print(one_comps)
two_comps = reacanot.getReactionComponents(reacanot.model.getReaction('R_PFK'))
print(two_comps)

['M_adp_c', 'M_h_c', 'M_fdp_c', 'M_atp_c', 'M_f6p_c']
['M_adp_c', 'M_h_c', 'M_fdp_c', 'M_atp_c', 'M_f6p_c']


In [5]:
spec_pred_annotations = specanot.predictAnnotationByName(inp_spec_list=one_comps)

In [6]:
print(specanot.match_score)
print(specanot.chebi)
print(specanot.formula)

{'M_adp_c': 0.3529411764705882, 'M_h_c': 1.0, 'M_fdp_c': 1.0, 'M_atp_c': 0.4117647058823529, 'M_f6p_c': 1.0}
{'M_adp_c': ['CHEBI:135736', 'CHEBI:456216', 'CHEBI:147398', 'CHEBI:151629', 'CHEBI:152534', 'CHEBI:153980', 'CHEBI:167004', 'CHEBI:167672', 'CHEBI:182955', 'CHEBI:28498', 'CHEBI:31899', 'CHEBI:32411', 'CHEBI:36331', 'CHEBI:42870', 'CHEBI:63450', 'CHEBI:63452', 'CHEBI:68840', 'CHEBI:72990', 'CHEBI:75998', 'CHEBI:77390', 'CHEBI:78443', 'CHEBI:88249', 'CHEBI:89713', 'CHEBI:90217', 'CHEBI:90304', 'CHEBI:90695', 'CHEBI:93296'], 'M_h_c': ['CHEBI:15378'], 'M_fdp_c': ['CHEBI:16905', 'CHEBI:49299'], 'M_atp_c': ['CHEBI:135736', 'CHEBI:182955'], 'M_f6p_c': ['CHEBI:15946', 'CHEBI:57579', 'CHEBI:61553', 'CHEBI:61527']}
{'M_adp_c': ['C19N2O2S', 'C19O9P', 'C17Cl2F3N7O2S', 'C21ClN3O2', 'C28N6OS', 'C35N4O4', 'C101N7O75', 'C18N4O11', 'C20N2O5S', 'C19O2', 'C26FIN5O4', 'C9N4O5', 'C10N5O10P2', 'C16NO6', 'C26N7O2S', 'C27O5', 'C14N2O', 'C18O2', 'C20O4', 'C12', 'C115N8O85', 'C18ClN2O6S2', 'C8NO6', 'C1

In [7]:
reac_pred_annotations = reacanot.predictAnnotation(inp_spec_dict=specanot.formula,
                                                   inp_reac_list=['R_PFK'],
                                                   inp_ref_mat=ref_mat)

In [8]:
reacanot.sum_match_score

0.8

In [9]:
reacanot.updateSpeciesByAReaction(inp_rid='R_PFK', inp_spec_dict=specanot.formula,
                                 inp_rhea='RHEA:12423', inp_ref_mat=ref_mat)

{'M_atp_c': ['CHEBI:30616']}

In [7]:
reacanot = ra.ReactionAnnotation(libsbml_fpath=os.path.join(BIOMODEL_DIR, BIOMD_12))

In [8]:
print(reacanot.reac_dict_rhea)
print(reacanot.reac_dict_kegg)

{}
{}


{'M_glc__D_e': {'match_score': 1.0,
  'chebi': ['CHEBI:17634', 'CHEBI:4167', 'CHEBI:42758'],
  'formula2chebi': {'C6O6': ['CHEBI:17634', 'CHEBI:4167', 'CHEBI:42758']},
  'formula': ['C6O6']}}