In [1]:
from pytfa.io.json import load_json_model
model = load_json_model('./../models/TFA_GEM_Recon3_Lehninger_Curated_prepared.json')

2025-01-30 12:05:42,861 - thermomodel_Recon3thermoCurated - INFO - # Model initialized with units kcal/mol and temperature 298.15 K


In [2]:
# This is the models with the original annotation 
# TODO: Put the original annotation back into the TFA 
# (its lost becuse the TFA model was made in matTFA ... )
from cobra.io.json import load_json_model
recon3 = load_json_model('./../models/Recon3D.json')

In [3]:
# Add a defined medium 

medium_metabolites = {
    'EX_o2_e': -100,
    'EX_h2o_e': -100,
    'EX_pi_e': -100,
    'EX_nh4_e': -100,
    'EX_h_e': -100,
    'EX_so4_e': -100,
    'EX_hco3_e': -100,
    'EX_h2o2_e': -100,
    'EX_glc_D_e': -100,
    'EX_ac_e': -100,
    'EX_lac_L_e': -100,
    'EX_hdca_e': -100,
    'EX_lnlc_e': -100,
    'EX_ocdcea_e': -100,
    'EX_ala_L_e': -100,
    'EX_arg_L_e': -100,
    'EX_asn_L_e': -100,
    'EX_asp_L_e': -100,
    'EX_cys_L_e': -100,
    'EX_gln_L_e': -100,
    'EX_glu_L_e': -100,
    'EX_gly_e': -100,
    'EX_his_L_e': -100,
    'EX_ile_L_e': -100,
    'EX_leu_L_e': -100,
    'EX_lys_L_e': -100,
    'EX_met_L_e': -100,
    'EX_phe_L_e': -100,
    'EX_pro_L_e': -100,
    'EX_ser_L_e': -100,
    'EX_thr_L_e': -100,
    'EX_trp_L_e': -100,
    'EX_tyr_L_e': -100,
    'EX_val_L_e': -100,
    'EX_bhb_e': -100,
    'EX_acac_e': -100,
}

for m, b in medium_metabolites.items():
    model.reactions.get_by_id(m).lower_bound = b

# Also allow for cytostolic ATP to be produced
model.reactions.cyt_atp2adp.lower_bound = -100

In [4]:
# Read the met_label table 
import pandas as pd

met_labels = pd.read_excel('Met_labeling_metabolites.xlsx')

In [5]:
from metabolink.annotation import find_metabolites_from_annotation




def find_metabolites_from_annotation(id, model, id_type='kegg.compound'):
    """
    Find metabolites in a model based on a given annotation id

    Parameters
    ----------
    id : str
        The annotation id to search for
    model : cobra.Model
        The model to search in  
        
    """

    # Veryfi if id is a non empty string
    if id and isinstance(id, str):
        metabolites = [] 
        for met in model.metabolites:
            try:
                ids = met.annotation[id_type]
            except KeyError:
                continue

            # TODO fix HMDB old vs new
            if id_type == 'hmdb':
                # Compare only last 5 digits of HMDB ids
                if id[-5:] in [i[-5:] for i in ids]:
                    metabolites.append(met.id)

            # For all other id_types compare the full id
            else:
                if id in ids:
                    metabolites.append(met.id)

        if metabolites not in [[], None]:            
                return metabolites
        else:
                return []
    else:
        return []
    

# Try to dinf the metablie labels using KEGG and HMDB 
kegg_2_met = met_labels.apply(lambda x: find_metabolites_from_annotation(x['KEGG'], recon3, id_type='kegg.compound'), axis=1)
hmdb_2_met = met_labels.apply(lambda x: find_metabolites_from_annotation(x['HMDB'], recon3, id_type='hmdb'), axis=1)


In [6]:
metabolites = pd.concat([kegg_2_met , hmdb_2_met], axis=1, keys=['KEGG', 'HMDB'])

# Print the counts of non nan values  
print('KEGG:', kegg_2_met.count())
print('HMDB:', hmdb_2_met.count())

# Joun Kegg and HMDB columns using set logic
metabolites['metabolites'] = metabolites.apply(lambda x: set(x['KEGG']) | set(x['HMDB']) , axis=1)

KEGG: 23
HMDB: 23


In [7]:
met_labels['model_metabolites'] = metabolites['metabolites']

In [8]:
# Check only for the inputs 
defined_precursors = [ 'EX_glc_D_e', 'EX_ac_e', 'EX_lac_L_e', 'EX_hdca_e', 'EX_lnlc_e', 'EX_ocdcea_e', 
                      'EX_ala_L_e', 'EX_arg_L_e', 'EX_asn_L_e', 'EX_asp_L_e', 'EX_cys_L_e', 'EX_gln_L_e', 'EX_glu_L_e',
                      'EX_gly_e', 'EX_his_L_e', 'EX_ile_L_e', 'EX_leu_L_e', 'EX_lys_L_e', 'EX_met_L_e', 'EX_phe_L_e',
                     'EX_pro_L_e', 'EX_ser_L_e', 'EX_thr_L_e', 'EX_trp_L_e', 'EX_tyr_L_e', 'EX_val_L_e', 'EX_bhb_e', 'EX_acac_e']

In [9]:
from metabolink import find_precursor_sets

# find precursor sets for all metabolites found in RECON3
precursor_sets = {}
for i, row in met_labels.iterrows():
    # Check if the metabolite was found in the model
    if row['model_metabolites'] != set():
        try:
            # Find the metabolite sets accros compartments // bases on the identifiers
            metabolites = list(row['model_metabolites'])
            
            # Drop metablolites that are in the extracellular environment
            metabolites = [m for m in metabolites if not m.endswith('_e')]

            # Replace dobuble underscore with single underscore (map to TFA recon3 annotation)
            metabolites = [m.replace('__', '_') for m in metabolites]
            
            precursor_set = find_precursor_sets(metabolites, model, defined_precursors=defined_precursors, method='min')
            precursor_sets[i] = precursor_set
        except ValueError as e:
            print(f'Error for {metabolites}: {e}')


Metabolites ['3sala_c', '3sala_m'] can be simultanously produced 74.9


2025-01-30 12:08:29,606 - thermomodel_Recon3thermoCurated - INFO - # Model initialized with units kcal/mol and temperature 298.15 K


Timeout limit is 3600s


2025-01-30 12:10:07,083 - thermomodel_Recon3thermoCurated - INFO - # Model preparation starting...


Preparing sinks...


2025-01-30 12:10:35,821 - thermomodel_Recon3thermoCurated - INFO - # Model preparation done.
2025-01-30 12:10:35,822 - thermomodel_Recon3thermoCurated - INFO - # Model conversion starting...
2025-01-30 12:12:04,622 - thermomodel_Recon3thermoCurated - INFO - # Model conversion done.
2025-01-30 12:12:04,623 - thermomodel_Recon3thermoCurated - INFO - # Updating cobra_model variables...
2025-01-30 12:12:04,833 - thermomodel_Recon3thermoCurated - INFO - # cobra_model variables are up-to-date


Lumping method detected: min


met=3sala_m: 100%|██████████| 2/2 [02:02<00:00, 61.50s/it]


Metabolites ['5mta_c'] can be simultanously produced 55.263157894736786


2025-01-30 12:15:07,151 - thermomodel_Recon3thermoCurated - INFO - # Model initialized with units kcal/mol and temperature 298.15 K


Timeout limit is 3600s


2025-01-30 12:16:59,992 - thermomodel_Recon3thermoCurated - INFO - # Model preparation starting...


Preparing sinks...


2025-01-30 12:17:34,900 - thermomodel_Recon3thermoCurated - INFO - # Model preparation done.
2025-01-30 12:17:34,901 - thermomodel_Recon3thermoCurated - INFO - # Model conversion starting...
2025-01-30 12:19:10,178 - thermomodel_Recon3thermoCurated - INFO - # Model conversion done.
2025-01-30 12:19:10,179 - thermomodel_Recon3thermoCurated - INFO - # Updating cobra_model variables...
2025-01-30 12:19:10,397 - thermomodel_Recon3thermoCurated - INFO - # cobra_model variables are up-to-date


Lumping method detected: min


met=5mta_c: 100%|██████████| 1/1 [00:50<00:00, 50.83s/it]


Metabolites ['ach_n', 'ach_c'] can be simultanously produced 33.333333333333314


2025-01-30 12:21:11,061 - thermomodel_Recon3thermoCurated - INFO - # Model initialized with units kcal/mol and temperature 298.15 K


Timeout limit is 3600s


2025-01-30 12:22:49,209 - thermomodel_Recon3thermoCurated - INFO - # Model preparation starting...


Preparing sinks...


2025-01-30 12:23:22,599 - thermomodel_Recon3thermoCurated - INFO - # Model preparation done.
2025-01-30 12:23:22,600 - thermomodel_Recon3thermoCurated - INFO - # Model conversion starting...
2025-01-30 12:25:04,473 - thermomodel_Recon3thermoCurated - INFO - # Model conversion done.
2025-01-30 12:25:04,474 - thermomodel_Recon3thermoCurated - INFO - # Updating cobra_model variables...
2025-01-30 12:25:04,714 - thermomodel_Recon3thermoCurated - INFO - # cobra_model variables are up-to-date


Lumping method detected: min


met=ach_n: 100%|██████████| 2/2 [15:57<00:00, 478.85s/it]


Metabolites ['creat_c', 'creat_m'] can be simultanously produced 71.42857142857143


2025-01-30 12:41:55,844 - thermomodel_Recon3thermoCurated - INFO - # Model initialized with units kcal/mol and temperature 298.15 K


Timeout limit is 3600s


2025-01-30 12:43:30,987 - thermomodel_Recon3thermoCurated - INFO - # Model preparation starting...


Preparing sinks...


2025-01-30 12:44:01,344 - thermomodel_Recon3thermoCurated - INFO - # Model preparation done.
2025-01-30 12:44:01,345 - thermomodel_Recon3thermoCurated - INFO - # Model conversion starting...
2025-01-30 12:45:40,034 - thermomodel_Recon3thermoCurated - INFO - # Model conversion done.
2025-01-30 12:45:40,035 - thermomodel_Recon3thermoCurated - INFO - # Updating cobra_model variables...
2025-01-30 12:45:40,266 - thermomodel_Recon3thermoCurated - INFO - # cobra_model variables are up-to-date


Lumping method detected: min


met=creat_m: 100%|██████████| 2/2 [20:44<00:00, 622.42s/it]


Metabolites ['gthrd_c', 'gthrd_m', 'gthrd_r'] can be simultanously produced 60.0


2025-01-30 13:07:30,276 - thermomodel_Recon3thermoCurated - INFO - # Model initialized with units kcal/mol and temperature 298.15 K


Timeout limit is 3600s


2025-01-30 13:08:55,193 - thermomodel_Recon3thermoCurated - INFO - # Model preparation starting...


Preparing sinks...


2025-01-30 13:09:25,864 - thermomodel_Recon3thermoCurated - INFO - # Model preparation done.
2025-01-30 13:09:25,865 - thermomodel_Recon3thermoCurated - INFO - # Model conversion starting...
2025-01-30 13:10:54,177 - thermomodel_Recon3thermoCurated - INFO - # Model conversion done.
2025-01-30 13:10:54,179 - thermomodel_Recon3thermoCurated - INFO - # Updating cobra_model variables...
2025-01-30 13:10:54,397 - thermomodel_Recon3thermoCurated - INFO - # cobra_model variables are up-to-date


Lumping method detected: min


met=gthrd_r: 100%|██████████| 3/3 [02:46<00:00, 55.40s/it]


Metabolites ['g3pc_c'] can be simultanously produced 40.0


2025-01-30 13:14:41,405 - thermomodel_Recon3thermoCurated - INFO - # Model initialized with units kcal/mol and temperature 298.15 K


Timeout limit is 3600s


2025-01-30 13:16:16,075 - thermomodel_Recon3thermoCurated - INFO - # Model preparation starting...


Preparing sinks...


2025-01-30 13:16:46,367 - thermomodel_Recon3thermoCurated - INFO - # Model preparation done.
2025-01-30 13:16:46,368 - thermomodel_Recon3thermoCurated - INFO - # Model conversion starting...
2025-01-30 13:18:25,290 - thermomodel_Recon3thermoCurated - INFO - # Model conversion done.
2025-01-30 13:18:25,292 - thermomodel_Recon3thermoCurated - INFO - # Updating cobra_model variables...
2025-01-30 13:18:25,523 - thermomodel_Recon3thermoCurated - INFO - # cobra_model variables are up-to-date


Lumping method detected: min


met=g3pc_c: 100%|██████████| 1/1 [08:21<00:00, 501.18s/it]


Error for ['acrn_c', 'acrn_r', 'acrn_x', 'acrn_m']: Metabolites cannot be produced
Error for ['crn_m', 'crn_x', 'crn_c', 'crn_r']: Metabolites cannot be produced
Metabolites ['met_L_m', 'met_L_c'] can be simultanously produced 50.0


2025-01-30 13:27:44,978 - thermomodel_Recon3thermoCurated - INFO - # Model initialized with units kcal/mol and temperature 298.15 K


Timeout limit is 3600s


2025-01-30 13:29:20,096 - thermomodel_Recon3thermoCurated - INFO - # Model preparation starting...


Preparing sinks...


2025-01-30 13:29:48,806 - thermomodel_Recon3thermoCurated - INFO - # Model preparation done.
2025-01-30 13:29:48,807 - thermomodel_Recon3thermoCurated - INFO - # Model conversion starting...
2025-01-30 13:31:34,138 - thermomodel_Recon3thermoCurated - INFO - # Model conversion done.
2025-01-30 13:31:34,139 - thermomodel_Recon3thermoCurated - INFO - # Updating cobra_model variables...
2025-01-30 13:31:34,363 - thermomodel_Recon3thermoCurated - INFO - # cobra_model variables are up-to-date


Lumping method detected: min


met=met_L_m: 100%|██████████| 2/2 [01:02<00:00, 31.25s/it]


Metabolites ['HC00900_m', 'HC00900_c'] can be simultanously produced 195.83824106792306


2025-01-30 13:33:48,775 - thermomodel_Recon3thermoCurated - INFO - # Model initialized with units kcal/mol and temperature 298.15 K


Timeout limit is 3600s


2025-01-30 13:35:20,649 - thermomodel_Recon3thermoCurated - INFO - # Model preparation starting...


Preparing sinks...


2025-01-30 13:35:53,241 - thermomodel_Recon3thermoCurated - INFO - # Model preparation done.
2025-01-30 13:35:53,242 - thermomodel_Recon3thermoCurated - INFO - # Model conversion starting...
2025-01-30 13:37:27,209 - thermomodel_Recon3thermoCurated - INFO - # Model conversion done.
2025-01-30 13:37:27,210 - thermomodel_Recon3thermoCurated - INFO - # Updating cobra_model variables...
2025-01-30 13:37:27,432 - thermomodel_Recon3thermoCurated - INFO - # cobra_model variables are up-to-date


Lumping method detected: min


met=HC00900_m: 100%|██████████| 2/2 [14:44<00:00, 442.11s/it]


Metabolites ['C02712_m', 'C02712_c'] can be simultanously produced 50.0


2025-01-30 13:53:03,055 - thermomodel_Recon3thermoCurated - INFO - # Model initialized with units kcal/mol and temperature 298.15 K


Timeout limit is 3600s


2025-01-30 13:54:35,606 - thermomodel_Recon3thermoCurated - INFO - # Model preparation starting...


Preparing sinks...


2025-01-30 13:55:06,010 - thermomodel_Recon3thermoCurated - INFO - # Model preparation done.
2025-01-30 13:55:06,011 - thermomodel_Recon3thermoCurated - INFO - # Model conversion starting...
2025-01-30 13:56:30,028 - thermomodel_Recon3thermoCurated - INFO - # Model conversion done.
2025-01-30 13:56:30,030 - thermomodel_Recon3thermoCurated - INFO - # Updating cobra_model variables...
2025-01-30 13:56:30,245 - thermomodel_Recon3thermoCurated - INFO - # cobra_model variables are up-to-date


Lumping method detected: min


met=C02712_m: 100%|██████████| 2/2 [01:47<00:00, 53.62s/it]


Metabolites ['cholp_l', 'cholp_g', 'cholp_n', 'cholp_c'] can be simultanously produced 15.000000000000007


2025-01-30 13:59:22,828 - thermomodel_Recon3thermoCurated - INFO - # Model initialized with units kcal/mol and temperature 298.15 K


Timeout limit is 3600s


2025-01-30 14:00:54,605 - thermomodel_Recon3thermoCurated - INFO - # Model preparation starting...


Preparing sinks...


2025-01-30 14:01:25,292 - thermomodel_Recon3thermoCurated - INFO - # Model preparation done.
2025-01-30 14:01:25,293 - thermomodel_Recon3thermoCurated - INFO - # Model conversion starting...
2025-01-30 14:02:51,329 - thermomodel_Recon3thermoCurated - INFO - # Model conversion done.
2025-01-30 14:02:51,330 - thermomodel_Recon3thermoCurated - INFO - # Updating cobra_model variables...
2025-01-30 14:02:51,551 - thermomodel_Recon3thermoCurated - INFO - # cobra_model variables are up-to-date


Lumping method detected: min


met=cholp_n: 100%|██████████| 4/4 [32:58<00:00, 494.70s/it]


Metabolites ['ahcys_r', 'ahcys_c', 'ahcys_m'] can be simultanously produced 18.42105263157899


2025-01-30 14:36:46,909 - thermomodel_Recon3thermoCurated - INFO - # Model initialized with units kcal/mol and temperature 298.15 K


Timeout limit is 3600s


2025-01-30 14:38:23,703 - thermomodel_Recon3thermoCurated - INFO - # Model preparation starting...


Preparing sinks...


2025-01-30 14:38:54,694 - thermomodel_Recon3thermoCurated - INFO - # Model preparation done.
2025-01-30 14:38:54,696 - thermomodel_Recon3thermoCurated - INFO - # Model conversion starting...
2025-01-30 14:40:33,562 - thermomodel_Recon3thermoCurated - INFO - # Model conversion done.
2025-01-30 14:40:33,564 - thermomodel_Recon3thermoCurated - INFO - # Updating cobra_model variables...
2025-01-30 14:40:33,786 - thermomodel_Recon3thermoCurated - INFO - # cobra_model variables are up-to-date


Lumping method detected: min


met=ahcys_r: 100%|██████████| 3/3 [02:00<00:00, 40.32s/it]


In [10]:
from metabolink.io import extract_precursor_sets
table = extract_precursor_sets(precursor_sets)

  df.loc[k, list(p)] = True


ValueError: cannot reindex on an axis with duplicate labels

In [11]:
precursor_sets

{1: {'3sala_c': [defaultdict(int, {'cys_L_e': 1.4}),
   defaultdict(int, {'met_L_e': 83.961769})],
  '3sala_m': [defaultdict(int, {'cys_L_e': 100.0001}),
   defaultdict(int, {'met_L_e': 66.666733})]},
 2: {'5mta_c': [defaultdict(int, {'met_L_e': 15.060606})]},
 3: {'ach_c': [defaultdict(int, {'ser_L_e': 100.0001}),
   defaultdict(int, {'asn_L_e': 15.384615}),
   defaultdict(int, {'gln_L_e': 100.0001}),
   defaultdict(int, {'his_L_e': 100.0001}),
   defaultdict(int, {'glc_D_e': 4.0}),
   defaultdict(int, {'met_L_e': 30.638889}),
   defaultdict(int, {'thr_L_e': 100.0001}),
   defaultdict(int, {'gly_e': 100.0001}),
   defaultdict(int, {'lac_L_e': 26.0}),
   defaultdict(int, {'leu_L_e': 5.0}),
   defaultdict(int, {'pro_L_e': 100.0001}),
   defaultdict(int, {'cys_L_e': 7.05}),
   defaultdict(int, {'glu_L_e': 100.0001}),
   defaultdict(int, {'tyr_L_e': 75.0}),
   defaultdict(int, {'ile_L_e': 6.0}),
   defaultdict(int, {'lys_L_e': 100.0001}),
   defaultdict(int, {'ocdcea_e': 1.550186}),
   de

In [12]:
met_labels

Unnamed: 0,compound,formula,m/z,Met,validate,HMDB,KEGG,model_metabolites
0,3-Methylhistidine,C7H11N3O2,168.08,1,'5',HMDB0000479,C01152,{}
1,3-Sulfinoalanine,C3H7NO4S,152.0,3,'0',HMDB0000996,C00606,"{3sala_c, 3sala_m}"
2,5'-Methylthioadenosine,C11H15N5O3S,296.08,1,'1',HMDB0001173,C00170,"{5mta_c, 5mta_e}"
3,Acetylcholine,C7H15NO2,144.1,123,'0',HMDB0000895,C01996,"{ach_e, ach_c, ach_n}"
4,Anserine,C10H16N4O3,239.11,1,'0',HMDB0000194,C01262,{}
5,Asymmetric dimethylarginine,C8H18N4O2,201.14,12,'0',HMDB0001539,C03626,{}
6,Creatine,C4H9N3O2,130.06,1,'1',HMDB0000064,C00300,{}
7,Creatinine,C4H7N3O,112.05,1,'0',HMDB0000562,C00791,"{creat_e, creat_c, creat_m}"
8,Glutarylcarnitine,C12H21NO6,274.13,123,'0',HMDB0013130,,{}
9,Glutathione,C10H17N3O6S,306.08,3,'10',HMDB0000125,C00051,"{gthrd_c, gthrd_m, gthrd_e, gthrd_r}"
