In [None]:
pip install requests
pip install lxml
pip install pandas
pip install openpyxl
pip install cobra

# for windows only, download the file from https://www.lfd.uci.edu/~gohlke/pythonlibs/#pyeda firstly
# pip install pyeda-0.28.0-cp37-cp37m-win_amd64.whl  
pip install modelseedpy


# build model from modelseed

## Auto build model from genome by modelseed

In [None]:
import modelseedpy
from modelseedpy import MSBuilder, MSGenome

org_full_name = 'Methylocystis_bryophila'
# download genome from NCBI https://www.ncbi.nlm.nih.gov/data-hub/genome/GCA_027925445.1/
genome = MSGenome.from_fasta('GCA_0279254451_ASM2792544v1_protein.faa',split=' ')
driftmodel = MSBuilder.build_metabolic_model(org_full_name, genome, classic_biomass=True)

'''
or build this model by Kbase

'''

## import drift model and initialize new model

In [None]:
org_name = 'mko'
org_full_name = 'Methylomonas_koyamae'

In [1]:
org_name = 'mbry'
org_full_name = 'Methylocystis_bryophila'

In [2]:
import cobra

#drift_model_path = org_name+'_model\\ModelSeed_model_DSMZ_21852.xml'
drift_model_path = org_name+'_model\\ModelSeed_model_'+org_full_name+'.xml'
drift_model = cobra.io.read_sbml_model(drift_model_path)
new_model = cobra.Model(org_full_name)

new_model

0,1
Name,Methylocystis_bryophila
Memory address,26e69b3fb20
Number of metabolites,0
Number of reactions,0
Number of genes,0
Number of groups,0
Objective expression,0
Compartments,


## KEGG ID mapping to modelseed

In [3]:
import pandas as pd

#org_name = 'mbry'
df_reaction = pd.read_excel(org_name+'_model\\'+org_name+'_clean_kegg_reaction.xlsx', index_col=1)  
titlename= ['entry_name','pathway_title','reaction_type','reaction_substrates','reaction_products']
df_keggID_modelseedID = pd.read_csv('reaction_keggID_modelseedID.txt', sep='\t')
mapping_reaction = pd.merge(df_reaction,df_keggID_modelseedID,how='left',on='Kegg_ID').drop_duplicates(subset=['Kegg_ID'], keep='first')[['ModelSeed_ID','Kegg_ID']+titlename]
mapping_reaction.to_excel(org_name+'_model\\'+org_name+'_mapping_reaction.xlsx', sheet_name='Sheet1', header=True)

print('done!')

done!


## add reactions from drift model

In [None]:
import cobra
import pandas as pd
'''
org_name = 'mbry'
org_full_name = 'Methylocystis_bryophila'
drift_model_path = org_name+'_model\\ModelSeed_model_'+org_full_name+'.xml'
drift_model = cobra.io.read_sbml_model(drift_model_path)
new_model = cobra.io.read_sbml_model(org_name+'_model\\'+org_name+'_blank.xml')
'''

mapping_reaction = pd.read_excel(org_name+'_model\\'+org_name+'_mapping_reaction.xlsx', index_col=0)  
modelseed_id_list = list(mapping_reaction['ModelSeed_ID'].dropna(axis = 0, how = 'any'))  # reaction ID that need to be added

added_rxn_id_list = []
unadded_rxn_id_list = []
for rxn_id in modelseed_id_list:
    try:
        reaction = drift_model.reactions.get_by_id(rxn_id+'_c0')
        new_model.add_reactions([reaction])
    except:
        unadded_rxn_id_list.append(rxn_id)
        continue
    added_rxn_id_list.append(rxn_id)
'''
bioreaction = drift_model.reactions.get_by_id('bio1_biomass')
new_model.add_reactions([bioreaction])
new_model.objective = new_model.problem.Objective(bioreaction.flux_expression, direction='max')
'''
print(str(len(added_rxn_id_list)) + ' / ' + str(len(modelseed_id_list)) + ' reactions and biomass reaction are added, which are copied from drift model')


#cobra.io.write_sbml_model(new_model,filename=org_name+'_copyfrom_DriftModel.xml')

In [23]:
unadded_rxn_id_list

['rxn40277',
 'rxn40276',
 'rxn40037',
 'rxn40273',
 'rxn40254',
 'rxn40065',
 'rxn39352',
 'rxn40102',
 'rxn40015',
 'rxn40016',
 'rxn40018',
 'rxn40019',
 'rxn40020',
 'rxn40014',
 'rxn39964',
 'rxn40041',
 'rxn40040']

### add reactions not included in drift model

In [17]:
import cobra
import pandas as pd

import modelseedpy
modelseed_path = 'C:\\Users\\vickenlee\\ModelSEEDDatabase'
modelseed = modelseedpy.biochem.from_local(modelseed_path)


print('======================== \n %d reactions begin to added \n'%len(unadded_rxn_id_list))

i,j = 0,0
for rxn_id in unadded_rxn_id_list:
    reaction_type = mapping_reaction.query('ModelSeed_ID == "'+ rxn_id +'"')['reaction_type'].values[0]
    if reaction_type == 'reversible':
        direction = 'reversible'
    elif reaction_type == 'irreversible':
        direction = 'forward'
    try:
        add_ms_reaction(new_model, rxn_id = rxn_id, modelseed = modelseed, compartment = 'c0', direction = direction)
        print(rxn_id+' added')
        i+=1
    except:
        print(rxn_id+' added wrong !!!')
        j+=1

print('\n %d / %d reactions added successfully \n ====================='%(i,i+j))

for metabolite in new_model.metabolites:
    metaid = [meta for meta in metabolite.id.split('_') if meta.startswith('cpd')]
    if metaid:
        ms_metabolite = modelseed.get_seed_compound(metaid[0])
        if  ms_metabolite.formula == ms_metabolite.formula:
            metabolite.formula = ms_metabolite.formula
        metabolite.charge = ms_metabolite.data['charge']

# add gene
mapping_reaction = pd.read_excel(org_name+'_mapping_reaction.xlsx', index_col=0)  
i=1
for reaction in new_model.reactions:
    rxnid = [rxn for rxn in reaction.id.split('_') if rxn.startswith('rxn')]
    if rxnid:
        gene_list_str = '( '+mapping_reaction.query('ModelSeed_ID == "'+ rxnid[0] +'"')['entry_name'].values[0].replace(' ', ' or ')+' )'
        reaction.gene_reaction_rule = gene_list_str
    if i%100 == 0:
        print('%d / %d reactions added succesfully'%(i,len(new_model.reactions)))
    i+=1
print('%d / %d reactions added succesfully'%(len(new_model.reactions),len(new_model.reactions)))




cobra.io.write_sbml_model(new_model,filename=org_name+'_copyfrom_DriftModel_and_add_from_ModelSEED.xml')

 433 reactions begin to added 

rxn15116 added
rxn00536 added
rxn15494 added
rxn15364 added
rxn15249 added
rxn15694 added
rxn15314 added
rxn06120 added
rxn15989 added
rxn13974 added
rxn00175 added
rxn15298 added
rxn00250 added
rxn06109 added
rxn15271 added
rxn29919 added
rxn00778 added
rxn08647 added
rxn03643 added
rxn16622 added
rxn00211 added
rxn01042 added
rxn15081 added
rxn15270 added
rxn02100 added
rxn01008 added
rxn07578 added
rxn07576 added
rxn07577 added
rxn05336 added
rxn05342 added
rxn05340 added
rxn05338 added
rxn05341 added
rxn05337 added
rxn05339 added
rxn00533 added
rxn08766 added
rxn05322 added
rxn05326 added
rxn05325 added
rxn05327 added
rxn05324 added
rxn05323 added
rxn05328 added
rxn07455 added
rxn00947 added
rxn06237 added
rxn05994 added
rxn06238 added
rxn05830 added
rxn17757 added
rxn05996 added
rxn05871 added
rxn14029 added
rxn11642 added
rxn11643 added
rxn16584 added
rxn07335 added
rxn05004 added
rxn16585 added
rxn11156 added
rxn11994 added
rxn40277 added
rxn40276

## add reactions from ModelSeed Database

In [4]:
def add_ms_reaction(
    model,
    rxn_id,
    modelseed,
    compartment = 'c0',
    direction="forward",
):  # Xinli modified from modelseedpy.core.mseditorapi

    modelseed_reaction = modelseed.get_seed_reaction(rxn_id)
    reaction_stoich = modelseed_reaction.cstoichiometry
    cobra_reaction = cobra.Reaction(rxn_id+'_'+ compartment)
    cobra_reaction.name = str(modelseed_reaction.data["name"])+'_'+compartment

    metabolites_to_add = {}
    for metabolite, stoich in reaction_stoich.items():
        id = metabolite[0]
        compound = modelseed.get_seed_compound(id).data
        if int(metabolite[1]) == 0:
            compartment_string = 'c0'
        elif int(metabolite[1]) == 1:
            compartment_string = 'e0'
        else:
            compartment_string = ''
            print(str(id)+' compartment wrong')

        metabolites_to_add[
            cobra.Metabolite(
                id+'_'+compartment_string, name=compound["name"]+'_'+compartment_string, compartment=compartment_string, 
                # formula = compound['formula']
            )
        ] = stoich
    cobra_reaction.add_metabolites(metabolites_to_add)
    cobra_reaction.reaction
    if direction == "reversible":
        cobra_reaction.lower_bound = -1000
    elif direction == "backward":
        cobra_reaction.lower_bound = -1000
        cobra_reaction.upper_bound = 0

    model.add_reactions([cobra_reaction])

In [5]:
import cobra
import pandas as pd

mapping_reaction = pd.read_excel(org_name+'_model\\'+org_name+'_mapping_reaction.xlsx', index_col=0)  
modelseed_id_list = list(mapping_reaction['ModelSeed_ID'].dropna(axis = 0, how = 'any'))  # reaction ID that need to be added


import modelseedpy
modelseed_path = 'C:\\Users\\vickenlee\\ModelSEEDDatabase'
modelseed = modelseedpy.biochem.from_local(modelseed_path)

print('======================== \n %d reactions begin to added \n'%len(modelseed_id_list))

i,j = 0,0
for rxn_id in modelseed_id_list:
    reaction_type = mapping_reaction.query('ModelSeed_ID == "'+ rxn_id +'"')['reaction_type'].values[0]
    if reaction_type == 'reversible':
        direction = 'reversible'
    elif reaction_type == 'irreversible':
        direction = 'forward'
    try:
        add_ms_reaction(new_model, rxn_id = rxn_id, modelseed = modelseed, compartment = 'c0', direction = direction)
        print(rxn_id+' added')
        i+=1
    except:
        print(rxn_id+' added wrong !!!')
        j+=1

print('\n %d / %d reactions added successfully \n ====================='%(i,i+j))

for metabolite in new_model.metabolites:
    metaid = [meta for meta in metabolite.id.split('_') if meta.startswith('cpd')]
    if metaid:
        ms_metabolite = modelseed.get_seed_compound(metaid[0])
        if  ms_metabolite.formula == ms_metabolite.formula:
            metabolite.formula = ms_metabolite.formula
        metabolite.charge = ms_metabolite.data['charge']

# add gene
print('======================== \n genes of %d reactions begin to added \n'%i)
mapping_reaction = pd.read_excel(org_name+'_mapping_reaction.xlsx', index_col=0)  
i=1
for reaction in new_model.reactions:
    rxnid = [rxn for rxn in reaction.id.split('_') if rxn.startswith('rxn')]
    if rxnid:
        gene_list_str = '( '+mapping_reaction.query('ModelSeed_ID == "'+ rxnid[0] +'"')['entry_name'].values[0].replace(' ', ' or ').replace('mbry:', '')+' )'
        reaction.gene_reaction_rule = gene_list_str
    if i%100 == 0:
        print('%d / %d reactions genes added succesfully'%(i,len(new_model.reactions)))
    i+=1
print('%d / %d reactions genes added succesfully'%(len(new_model.reactions),len(new_model.reactions)))



cobra.io.write_sbml_model(new_model,filename=org_name+'_model\\'+org_name+'_add_c0_from_modelseed.xml')

 920 reactions begin to added 

rxn15116 added
rxn00506 added
rxn00536 added
rxn00543 added
rxn00011 added
rxn02342 added
rxn01871 added
rxn00148 added
rxn00459 added
rxn01106 added
rxn00781 added
rxn00747 added
rxn15493 added
rxn15364 added
rxn00704 added
rxn02380 added
rxn01169 added
rxn01977 added
rxn15249 added
rxn15694 added
rxn01100 added
rxn15989 added
rxn00247 added
rxn00175 added
rxn00507 added
rxn15298 added
rxn00151 added
rxn00147 added
rxn00441 added
rxn02376 added
rxn01872 added
rxn00285 added
rxn00199 added
rxn01387 added
rxn00799 added
rxn01388 added
rxn00974 added
rxn00256 added
rxn00248 added
rxn06109 added
rxn00770 added
rxn01200 added
rxn00777 added
rxn01116 added
rxn15271 added
rxn01115 added
rxn01476 added
rxn01975 added
rxn29919 added
rxn00778 added
rxn08647 added
rxn01187 added
rxn11040 added
rxn00213 added
rxn00211 added
rxn06201 added
rxn15081 added
rxn15270 added
rxn01329 added
rxn00642 added
rxn00641 added
rxn01870 added
rxn02438 added
rxn02314 added
rxn00214

In [21]:
bioreaction = drift_model.reactions.get_by_id('bio1_biomass')
new_model.add_reactions([bioreaction])
new_model.objective = new_model.problem.Objective(bioreaction.flux_expression, direction='max')
exreaction = [rea for rea in drift_model.reactions if rea.id.startswith('EX')]
new_model.add_reactions(exreaction)
cobra.io.write_sbml_model(new_model,filename=org_name+'_model\\'+org_name+'_add_from_modelseed.xml')

In [22]:
new_model

0,1
Name,Methylomonas_koyamae
Memory address,126d8a9c130
Number of metabolites,1006
Number of reactions,871
Number of genes,0
Number of groups,0
Objective expression,1.0*bio1_biomass - 1.0*bio1_biomass_reverse_6e711
Compartments,"c0, e0"


## check balance

In [26]:
for me in driftmodel.metabolites:
    if me.formula:
        print(me.id+'\t'+me.name+'\t'+me.formula)
    else:
        print(me.id+'\t'+me.name+'\t')

cpd00443_c0	ABEE_c0	C7H6NO2
cpd02920_c0	2-Amino-4-hydroxy-6-hydroxymethyl-7-8-dihydropteridinediphosphate_c0	C7H9N5O8P2
cpd00012_c0	PPi_c0	HO7P2
cpd00067_c0	H+_c0	H
cpd00683_c0	Dihydropteroate_c0	C14H13N6O3
cpd00002_c0	ATP_c0	C10H13N5O13P3
cpd00033_c0	Glycine_c0	C2H5NO2
cpd00506_c0	gamma-Glutamylcysteine_c0	C8H13N2O5S
cpd00008_c0	ADP_c0	C10H13N5O10P2
cpd00009_c0	Phosphate_c0	HO4P
cpd00042_c0	GSH_c0	C10H16N3O6S
cpd00213_c0	Lipoamide_c0	C8H15NOS2
cpd14700_c0	2-Methyl-1-hydroxypropyl-TPP_c0	C16H25N4O8P2S
cpd00056_c0	TPP_c0	C12H17N4O7P2S
cpd02700_c0	S-(2-Methylpropionyl)-dihydrolipoamide_c0	C12H23NO2S2
cpd00114_c0	IMP_c0	C10H11N4O8P
cpd00103_c0	PRPP_c0	C5H9O14P3
cpd00226_c0	HYXN_c0	C5H4N4O
cpd00022_c0	Acetyl-CoA_c0	C23H34N7O17P3S
cpd00054_c0	L-Serine_c0	C3H7NO3
cpd00010_c0	CoA_c0	C21H32N7O16P3S
cpd00722_c0	O-Acetyl-L-serine_c0	C5H9NO4
cpd00046_c0	CMP_c0	C9H12N3O8P
cpd00096_c0	CDP_c0	C9H13N3O11P2
cpd00067_e0	H+_e0	H
cpd00106_e0	Fumarate_e0	C4H2O4
cpd00106_c0	Fumarate_c0	C4H2O4
cpd00037_c0	U

In [23]:
def check_balance(reaction,H_metabolite=None):
    try:
        feedback = reaction.check_mass_balance()
        if feedback :
            print(reaction.id + ': ' + str(feedback))
            print('      '+ reaction.reaction)
            if H_metabolite:
                if 'H' in feedback and feedback.get('H') == feedback.get('charge'):
                    H_to_add={H_metabolite:-1*feedback.get('H')}
                    reaction.add_metabolites(H_to_add)
                    print('      H is added')
                    return(check_balance(reaction,H_metabolite))
            return int(0)
        else:
            #print(reaction.id + ': banlance')
            #print('      '+ reaction.reaction)
            return int(1)
    except:
        print(reaction.id + ': error')
        print('      '+ reaction.reaction)
        return int(0)



import cobra
import pandas as pd

H_metabolite = new_model.metabolites.get_by_id('cpd00067_c0')

i=0
for reaction in new_model.reactions:
    i=i+check_balance(reaction,H_metabolite)
print('After add H+, %d / %d reactions are balanced'%(i,len(new_model.reactions)))

#cobra.io.write_sbml_model(new_model,filename=org_name+'_c0_balanced.xml')

rxn15314_c0: {'charge': -2.0, 'H': -1.0, 'O': 3.0, 'P': 1.0}
      cpd19001_c0 --> cpd19006_c0
rxn06120_c0: {'charge': -2.0, 'H': -1.0, 'O': 3.0, 'P': 1.0}
      cpd00190_c0 --> cpd00863_c0
rxn15989_c0: {'H': -2.0}
      cpd00363_c0 + 2.0 cpd19499_c0 <=> cpd00071_c0 + 2.0 cpd19500_c0
rxn13974_c0: {'charge': 2.0}
      cpd00011_c0 + cpd00022_c0 + cpd00067_c0 + 2.0 cpd11620_c0 <=> cpd00010_c0 + cpd00020_c0 + 2.0 cpd11621_c0
rxn07578_c0: {'charge': -2.0, 'C': 11.0, 'H': 21.0, 'O': 7.0, 'N': 2.0, 'P': 1.0}
      cpd14939_c0 <=> cpd00001_c0 + cpd14940_c0
rxn07576_c0: {'charge': -1.0}
      cpd00067_c0 + cpd11476_c0 + cpd11492_c0 --> cpd00011_c0 + cpd11493_c0 + cpd14938_c0
rxn07577_c0: {'charge': 2.0, 'C': -11.0, 'H': -21.0, 'N': -2.0, 'O': -7.0, 'P': -1.0}
      cpd00005_c0 + cpd00067_c0 + cpd14938_c0 <=> cpd00006_c0 + cpd14939_c0
rxn05350_c0: {'charge': -1.0}
      cpd00067_c0 + cpd11472_c0 + cpd11492_c0 --> cpd00011_c0 + cpd11490_c0 + cpd11493_c0
rxn05336_c0: {'charge': -3.0}
      cpd000

In [24]:
for reaction in new_model.reactions:
    if check_balance(reaction)==0:
        print('           https://modelseed.org/solr/reactions/select?wt=json&q=id:'+reaction.id.replace('_c0',''))
        for meta, sto in reaction.metabolites.items():
            print('           '+meta.id+'    '+ str(meta.formula)+'    '+str(meta.charge))


        #new_model.metabolites.get_by_id('C00042_c0').charge



rxn15314_c0: {'charge': -2.0, 'H': -1.0, 'O': 3.0, 'P': 1.0}
      cpd19001_c0 --> cpd19006_c0
           https://modelseed.org/solr/reactions/select?wt=json&q=id:rxn15314
           cpd19001_c0    C6H12O6    0
           cpd19006_c0    C6H11O9P    -2
rxn06120_c0: {'charge': -2.0, 'H': -1.0, 'O': 3.0, 'P': 1.0}
      cpd00190_c0 --> cpd00863_c0
           https://modelseed.org/solr/reactions/select?wt=json&q=id:rxn06120
           cpd00190_c0    C6H12O6    0
           cpd00863_c0    C6H11O9P    -2
rxn15989_c0: {'H': -2.0}
      cpd00363_c0 + 2.0 cpd19499_c0 <=> cpd00071_c0 + 2.0 cpd19500_c0
           https://modelseed.org/solr/reactions/select?wt=json&q=id:rxn15989
           cpd19499_c0    None    0
           cpd19500_c0    None    0
           cpd00363_c0    C2H6O    0
           cpd00071_c0    C2H4O    0
rxn13974_c0: {'charge': 2.0}
      cpd00011_c0 + cpd00022_c0 + cpd00067_c0 + 2.0 cpd11620_c0 <=> cpd00010_c0 + cpd00020_c0 + 2.0 cpd11621_c0
           https://modelseed.org/solr