In [1]:
# create species annotation class
import os
import pickle
import sys
PROJ_DIR = "/Users/woosubs/Desktop/AutomateAnnotation/AnnotationRecommender/"
MOD_DIR = os.path.join(PROJ_DIR, "annotation_recommender")
sys.path.append(MOD_DIR)

BIOMD_12 = 'BIOMD0000000012.xml'
BASE_DIR = '/Users/woosubs/Desktop/AutomateAnnotation/'
DATA_DIR = os.path.join(BASE_DIR, "DATA")
ALGO_DIR = os.path.join(DATA_DIR, "algo")
CHEBI_DIR = os.path.join(DATA_DIR, "chebi")
RHEA_DIR = os.path.join(DATA_DIR, "rhea")
BIOMODEL_DIR = os.path.join(DATA_DIR, "biomodels/curated_biomodels_31mar2021")
BIGG_DIR = '/Users/woosubs/Desktop/AutomateAnnotation/DATA/bigg'
ecoli_fpath = os.path.join(BIGG_DIR, "e_coli_core.xml")


from annotation_recommender import species_annotation as sa
from annotation_recommender import reaction_annotation as ra
from annotation_recommender import constants as cn

# chebi to shortened formula
with open(os.path.join(CHEBI_DIR, 'chebi_shortened_formula_30apr2022.pickle'), 'rb') as f:
  ref_shortened_chebi_to_formula = pickle.load(f)
# shortened formula to chebi
with open(os.path.join(CHEBI_DIR, 'shortened_formula_to_chebis_20jul2022.pickle'), 'rb') as f:
  ref_shortened_formula_to_chebi = pickle.load(f)

with open(os.path.join(CHEBI_DIR, 'chebi_synonyms.pickle'), 'rb') as f:
  chebi_synonyms = pickle.load(f)
chebi_low_synonyms = dict()
for one_k in chebi_synonyms.keys():
  chebi_low_synonyms[one_k] = list(set([val.lower() for val in chebi_synonyms[one_k]]))

with open(os.path.join(RHEA_DIR, 'kegg2rhea_master.pickle'), 'rb') as handle:
  ref_kegg2rhea_master = pickle.load(handle)
with open(os.path.join(RHEA_DIR, 'kegg2rhea_bi.pickle'), 'rb') as handle:
  ref_kegg2rhea_bi = pickle.load(handle)

# load reference matrix
with open(os.path.join(ALGO_DIR, 'binary_ref_df.pickle'), 'rb') as handle:
    ref_mat = pickle.load(handle)
# check its shape
print(ref_mat.shape)

(13651, 3790)


In [2]:
specanot = sa.SpeciesAnnotation(libsbml_fpath=ecoli_fpath)
reacanot = ra.ReactionAnnotation(libsbml_fpath=ecoli_fpath)

In [3]:
print(reacanot.exist_annotation)

{'R_PFK': ['RHEA:16112'], 'R_PFL': ['RHEA:11847'], 'R_PGK': ['RHEA:14804'], 'R_PGL': ['RHEA:12559'], 'R_ACALD': ['RHEA:23291'], 'R_AKGt2r': ['RHEA:29014'], 'R_PGM': ['RHEA:15904'], 'R_PIt2r': ['RHEA:29942'], 'R_ALCD2x': ['RHEA:25293'], 'R_ACKr': ['RHEA:11355'], 'R_PPC': ['RHEA:23075'], 'R_ACONTa': ['RHEA:10231'], 'R_ACONTb': ['RHEA:22147'], 'R_ATPM': ['RHEA:13068', 'RHEA:20855'], 'R_PPCK': ['RHEA:18620'], 'R_PPS': ['RHEA:11367'], 'R_ADK1': ['RHEA:12976'], 'R_AKGDH': ['RHEA:27789'], 'R_PTAr': ['RHEA:19524'], 'R_PYK': ['RHEA:18160'], 'R_RPE': ['RHEA:13680'], 'R_CS': ['RHEA:16848'], 'R_SUCCt2_2': ['RHEA:29306'], 'R_ENO': ['RHEA:10167'], 'R_SUCDi': ['RHEA:29190'], 'R_SUCOAS': ['RHEA:17664'], 'R_TALA': ['RHEA:17056'], 'R_TKT2': ['RHEA:27629'], 'R_TPI': ['RHEA:18588'], 'R_EX_ac_e': ['RHEA:27817'], 'R_EX_etoh_e': ['RHEA:35270'], 'R_EX_for_e': ['RHEA:29682'], 'R_EX_h_e': ['RHEA:34982'], 'R_EX_h2o_e': ['RHEA:29670'], 'R_EX_nh4_e': ['RHEA:28750'], 'R_EX_pi_e': ['RHEA:32826'], 'R_FBA': ['RHEA:147

In [4]:
print([val.getId() for val in specanot.model.getListOfSpecies()])

['M_glc__D_e', 'M_gln__L_c', 'M_gln__L_e', 'M_glu__L_c', 'M_glu__L_e', 'M_glx_c', 'M_h2o_c', 'M_h2o_e', 'M_h_c', 'M_h_e', 'M_icit_c', 'M_lac__D_c', 'M_lac__D_e', 'M_mal__L_c', 'M_mal__L_e', 'M_nad_c', 'M_nadh_c', 'M_nadp_c', 'M_nadph_c', 'M_nh4_c', 'M_13dpg_c', 'M_nh4_e', 'M_o2_c', 'M_2pg_c', 'M_o2_e', 'M_3pg_c', 'M_oaa_c', 'M_pep_c', 'M_6pgc_c', 'M_pi_c', 'M_6pgl_c', 'M_pi_e', 'M_ac_c', 'M_pyr_c', 'M_pyr_e', 'M_q8_c', 'M_q8h2_c', 'M_r5p_c', 'M_ru5p__D_c', 'M_ac_e', 'M_acald_c', 'M_s7p_c', 'M_acald_e', 'M_accoa_c', 'M_succ_c', 'M_succ_e', 'M_succoa_c', 'M_acon_C_c', 'M_xu5p__D_c', 'M_actp_c', 'M_adp_c', 'M_akg_c', 'M_akg_e', 'M_amp_c', 'M_atp_c', 'M_cit_c', 'M_co2_c', 'M_co2_e', 'M_coa_c', 'M_dhap_c', 'M_e4p_c', 'M_etoh_c', 'M_etoh_e', 'M_f6p_c', 'M_fdp_c', 'M_for_c', 'M_for_e', 'M_fru_e', 'M_fum_c', 'M_fum_e', 'M_g3p_c', 'M_g6p_c']


In [5]:
candidates_pred_annotations = specanot.predictAnnotationByName(inp_spec_list=['M_glc__D_e', 'M_gln__L_c'])
candidates_pred_annotations

{'M_glc__D_e': {'match_score': 1.0,
  'chebi': ['CHEBI:17634', 'CHEBI:4167', 'CHEBI:42758'],
  'formula': ['C6O6']},
 'M_gln__L_c': {'match_score': 1.0,
  'chebi': ['CHEBI:18050', 'CHEBI:58359', 'CHEBI:30011'],
  'formula': ['C5N2O3', 'C5N2O2']}}

In [6]:
print(specanot.match_score)
print(specanot.chebi)
print(specanot.formula)

{'M_glc__D_e': 1.0, 'M_gln__L_c': 1.0}
{'M_glc__D_e': ['CHEBI:17634', 'CHEBI:4167', 'CHEBI:42758'], 'M_gln__L_c': ['CHEBI:18050', 'CHEBI:58359', 'CHEBI:30011']}
{'M_glc__D_e': ['C6O6'], 'M_gln__L_c': ['C5N2O3', 'C5N2O2']}


In [7]:
reacanot = ra.ReactionAnnotation(libsbml_fpath=os.path.join(BIOMODEL_DIR, BIOMD_12))

In [8]:
print(reacanot.reac_dict_rhea)
print(reacanot.reac_dict_kegg)

{}
{}


{'M_glc__D_e': {'match_score': 1.0,
  'chebi': ['CHEBI:17634', 'CHEBI:4167', 'CHEBI:42758'],
  'formula2chebi': {'C6O6': ['CHEBI:17634', 'CHEBI:4167', 'CHEBI:42758']},
  'formula': ['C6O6']}}