In [None]:
pip install requests
pip install lxml
pip install pandas
pip install openpyxl
pip install cobra
pip install cobrakbase
# for windows only, download the file from https://www.lfd.uci.edu/~gohlke/pythonlibs/#pyeda firstly
# pip install pyeda-0.28.0-cp37-cp37m-win_amd64.whl  
pip install modelseedpy


cobrakbase 0.3.0


In [7]:
org_name = 'mko'
org_full_name = 'Methylomonas_koyamae'

In [20]:
org_name = 'mbry'
org_full_name = 'Methylocystis_bryophila'

# build model from modelseed

## Auto build model from genome by modelseed

In [21]:
import modelseedpy
from modelseedpy import MSBuilder, MSGenome
import os
import cobra
os.environ["HOME"] = "1"
import cobrakbase

# download genome from NCBI https://www.ncbi.nlm.nih.gov/data-hub/genome/GCA_027925445.1/
for root, dirs, files in os.walk(org_full_name+'_genome'):
    if 'protein.faa' in files:
        genome_path = root+'\\protein.faa'

genome = MSGenome.from_fasta(genome_path,split=' ')

kbase_api = cobrakbase.KBaseAPI()
kb_template = kbase_api.get_from_ws("GramNegModelTemplateV4","NewKBaseModelTemplates")
driftmodel = MSBuilder.build_metabolic_model(org_full_name, genome, template = kb_template, allow_all_non_grp_reactions = True)
driftmodel
cobra.io.write_sbml_model(driftmodel,filename=org_name+'_model\\'+org_name+'_drift_model_from_modelseed.xml')

'''
or build this model by Kbase

'''

Exception: missing token value or ~/.kbase/token file

In [22]:
from cobrakbase.core.kbasefba.newmodeltemplate_builder import NewModelTemplateBuilder
from modelseedpy.helpers import get_template, get_classifier
template_core = get_template('template_core')
template_gramneg = get_template('template_gram_neg')
template_core = NewModelTemplateBuilder.from_dict(template_core, None).build()
template_gramneg = NewModelTemplateBuilder.from_dict(template_gramneg, None).build()



In [25]:
model_builder = MSBuilder(genome, template_gramneg)

In [26]:
base_model = model_builder.build('model', '0', allow_all_non_grp_reactions=True, annotate_with_rast=False)
from modelseedpy.core.msbuilder import build_biomass, core_atp
base_model.add_reactions([build_biomass('bio2', base_model, template_core, core_atp, '0')])

AttributeError: 'AttrDict' object has no attribute 'build_biomass'

In [None]:
import os
os.environ["HOME"] = "1"
import cobrakbase
kbase_api = cobrakbase.KBaseAPI()
kb_template = kbase_api.get_from_ws("GramNegModelTemplateV4","NewKBaseModelTemplates")
model = MSBuilder.build_metabolic_model('salmonella infantis', genome, template = kb_template, allow_all_non_grp_reactions = True)

In [9]:
solution = driftmodel.optimize()

KeyError: 'rxn05294_c0'

In [10]:
solution.objective_value

NameError: name 'solution' is not defined

## Initialize new model

In [8]:
import cobra

new_model = cobra.Model(org_full_name)
new_model

0,1
Name,Methylocystis_bryophila
Memory address,2bc64471340
Number of metabolites,0
Number of reactions,0
Number of genes,0
Number of groups,0
Objective expression,0
Compartments,


## KEGG ID mapping to modelseed

In [3]:
import pandas as pd

#org_name = 'mbry'
df_reaction = pd.read_excel(org_name+'_model\\'+org_name+'_clean_kegg_reaction.xlsx', index_col=1)  
titlename= ['entry_name','pathway_title','reaction_type','reaction_substrates','reaction_products']
df_keggID_modelseedID = pd.read_csv('reaction_keggID_modelseedID.txt', sep='\t')
mapping_reaction = pd.merge(df_reaction,df_keggID_modelseedID,how='left',on='Kegg_ID').drop_duplicates(subset=['Kegg_ID'], keep='first')[['ModelSeed_ID','Kegg_ID']+titlename]
mapping_reaction.to_excel(org_name+'_model\\'+org_name+'_mapping_reaction.xlsx', sheet_name='Sheet1', header=True)

print('done!')

done!


## add reactions from drift model

In [None]:
import cobra
import pandas as pd
'''
org_name = 'mbry'
org_full_name = 'Methylocystis_bryophila'
drift_model_path = org_name+'_model\\ModelSeed_model_'+org_full_name+'.xml'
drift_model = cobra.io.read_sbml_model(drift_model_path)
new_model = cobra.io.read_sbml_model(org_name+'_model\\'+org_name+'_blank.xml')
'''

mapping_reaction = pd.read_excel(org_name+'_model\\'+org_name+'_mapping_reaction.xlsx', index_col=0)  
modelseed_id_list = list(mapping_reaction['ModelSeed_ID'].dropna(axis = 0, how = 'any'))  # reaction ID that need to be added

added_rxn_id_list = []
unadded_rxn_id_list = []
for rxn_id in modelseed_id_list:
    try:
        reaction = drift_model.reactions.get_by_id(rxn_id+'_c0')
        new_model.add_reactions([reaction])
    except:
        unadded_rxn_id_list.append(rxn_id)
        continue
    added_rxn_id_list.append(rxn_id)
'''
bioreaction = drift_model.reactions.get_by_id('bio1_biomass')
new_model.add_reactions([bioreaction])
new_model.objective = new_model.problem.Objective(bioreaction.flux_expression, direction='max')
'''
print(str(len(added_rxn_id_list)) + ' / ' + str(len(modelseed_id_list)) + ' reactions and biomass reaction are added, which are copied from drift model')


#cobra.io.write_sbml_model(new_model,filename=org_name+'_copyfrom_DriftModel.xml')

In [23]:
unadded_rxn_id_list

['rxn40277',
 'rxn40276',
 'rxn40037',
 'rxn40273',
 'rxn40254',
 'rxn40065',
 'rxn39352',
 'rxn40102',
 'rxn40015',
 'rxn40016',
 'rxn40018',
 'rxn40019',
 'rxn40020',
 'rxn40014',
 'rxn39964',
 'rxn40041',
 'rxn40040']

### add reactions not included in drift model

In [17]:
import cobra
import pandas as pd

import modelseedpy
modelseed_path = 'C:\\Users\\vickenlee\\ModelSEEDDatabase'
modelseed = modelseedpy.biochem.from_local(modelseed_path)


print('======================== \n %d reactions begin to added \n'%len(unadded_rxn_id_list))

i,j = 0,0
for rxn_id in unadded_rxn_id_list:
    reaction_type = mapping_reaction.query('ModelSeed_ID == "'+ rxn_id +'"')['reaction_type'].values[0]
    if reaction_type == 'reversible':
        direction = 'reversible'
    elif reaction_type == 'irreversible':
        direction = 'forward'
    try:
        add_ms_reaction(new_model, rxn_id = rxn_id, modelseed = modelseed, compartment = 'c0', direction = direction)
        print(rxn_id+' added')
        i+=1
    except:
        print(rxn_id+' added wrong !!!')
        j+=1

print('\n %d / %d reactions added successfully \n ====================='%(i,i+j))

for metabolite in new_model.metabolites:
    metaid = [meta for meta in metabolite.id.split('_') if meta.startswith('cpd')]
    if metaid:
        ms_metabolite = modelseed.get_seed_compound(metaid[0])
        if  ms_metabolite.formula == ms_metabolite.formula:
            metabolite.formula = ms_metabolite.formula
        metabolite.charge = ms_metabolite.data['charge']

# add gene
mapping_reaction = pd.read_excel(org_name+'_mapping_reaction.xlsx', index_col=0)  
i=1
for reaction in new_model.reactions:
    rxnid = [rxn for rxn in reaction.id.split('_') if rxn.startswith('rxn')]
    if rxnid:
        gene_list_str = '( '+mapping_reaction.query('ModelSeed_ID == "'+ rxnid[0] +'"')['entry_name'].values[0].replace(' ', ' or ')+' )'
        reaction.gene_reaction_rule = gene_list_str
    if i%100 == 0:
        print('%d / %d reactions added succesfully'%(i,len(new_model.reactions)))
    i+=1
print('%d / %d reactions added succesfully'%(len(new_model.reactions),len(new_model.reactions)))




cobra.io.write_sbml_model(new_model,filename=org_name+'_copyfrom_DriftModel_and_add_from_ModelSEED.xml')

 433 reactions begin to added 

rxn15116 added
rxn00536 added
rxn15494 added
rxn15364 added
rxn15249 added
rxn15694 added
rxn15314 added
rxn06120 added
rxn15989 added
rxn13974 added
rxn00175 added
rxn15298 added
rxn00250 added
rxn06109 added
rxn15271 added
rxn29919 added
rxn00778 added
rxn08647 added
rxn03643 added
rxn16622 added
rxn00211 added
rxn01042 added
rxn15081 added
rxn15270 added
rxn02100 added
rxn01008 added
rxn07578 added
rxn07576 added
rxn07577 added
rxn05336 added
rxn05342 added
rxn05340 added
rxn05338 added
rxn05341 added
rxn05337 added
rxn05339 added
rxn00533 added
rxn08766 added
rxn05322 added
rxn05326 added
rxn05325 added
rxn05327 added
rxn05324 added
rxn05323 added
rxn05328 added
rxn07455 added
rxn00947 added
rxn06237 added
rxn05994 added
rxn06238 added
rxn05830 added
rxn17757 added
rxn05996 added
rxn05871 added
rxn14029 added
rxn11642 added
rxn11643 added
rxn16584 added
rxn07335 added
rxn05004 added
rxn16585 added
rxn11156 added
rxn11994 added
rxn40277 added
rxn40276

## add c0 reactions from ModelSeed Database

In [6]:
def add_ms_reaction(
    model,
    rxn_id,
    modelseed,
    compartment = 'c0',
    direction="forward",
):  # Xinli modified from modelseedpy.core.mseditorapi

    modelseed_reaction = modelseed.get_seed_reaction(rxn_id)
    reaction_stoich = modelseed_reaction.cstoichiometry
    cobra_reaction = cobra.Reaction(rxn_id+'_'+ compartment)
    cobra_reaction.name = str(modelseed_reaction.data["name"])+'_'+compartment

    metabolites_to_add = {}
    for metabolite, stoich in reaction_stoich.items():
        id = metabolite[0]
        compound = modelseed.get_seed_compound(id).data
        if int(metabolite[1]) == 0:
            compartment_string = 'c0'
        elif int(metabolite[1]) == 1:
            compartment_string = 'e0'
        else:
            compartment_string = ''
            print(str(id)+' compartment wrong')

        metabolites_to_add[
            cobra.Metabolite(
                id+'_'+compartment_string, name=compound["name"]+'_'+compartment_string, compartment=compartment_string, 
                # formula = compound['formula']
            )
        ] = stoich
    cobra_reaction.add_metabolites(metabolites_to_add)
    cobra_reaction.reaction
    if direction == "reversible":
        cobra_reaction.lower_bound = -1000
    elif direction == "backward":
        cobra_reaction.lower_bound = -1000
        cobra_reaction.upper_bound = 0

    model.add_reactions([cobra_reaction])

In [9]:
import cobra
import pandas as pd

mapping_reaction = pd.read_excel(org_name+'_model\\'+org_name+'_mapping_reaction.xlsx', index_col=0)  
modelseed_id_list = list(mapping_reaction['ModelSeed_ID'].dropna(axis = 0, how = 'any'))  # reaction ID that need to be added


import modelseedpy
modelseed_path = 'C:\\Users\\vickenlee\\ModelSEEDDatabase'
modelseed = modelseedpy.biochem.from_local(modelseed_path)

print('======================== \n %d reactions begin to added \n'%len(modelseed_id_list))

i,j = 0,0
for rxn_id in modelseed_id_list:
    reaction_type = mapping_reaction.query('ModelSeed_ID == "'+ rxn_id +'"')['reaction_type'].values[0]
    if reaction_type == 'reversible':
        direction = 'reversible'
    elif reaction_type == 'irreversible':
        direction = 'forward'
    try:
        add_ms_reaction(new_model, rxn_id = rxn_id, modelseed = modelseed, compartment = 'c0', direction = direction)
        print(rxn_id+' added')
        i+=1
    except:
        print(rxn_id+' added wrong !!!')
        j+=1

print('\n %d / %d reactions added successfully \n ====================='%(i,i+j))

for metabolite in new_model.metabolites:
    metaid = [meta for meta in metabolite.id.split('_') if meta.startswith('cpd')]
    if metaid:
        ms_metabolite = modelseed.get_seed_compound(metaid[0])
        if  ms_metabolite.formula == ms_metabolite.formula:
            metabolite.formula = ms_metabolite.formula
        metabolite.charge = ms_metabolite.data['charge']

# add gene
print('======================== \n genes of %d reactions begin to added \n'%i)
#mapping_reaction = pd.read_excel(org_name+'_model\\'+org_name+'_mapping_reaction.xlsx', index_col=0)  
i=1
for reaction in new_model.reactions:
    rxnid = [rxn for rxn in reaction.id.split('_') if rxn.startswith('rxn')]
    if rxnid:
        gene_list_str = '( '+mapping_reaction.query('ModelSeed_ID == "'+ rxnid[0] +'"')['entry_name'].values[0].replace(' ', ' or ').replace('mbry:', '')+' )'
        reaction.gene_reaction_rule = gene_list_str
    if i%100 == 0:
        print('%d / %d reactions genes added succesfully'%(i,len(new_model.reactions)))
    i+=1
print('%d / %d reactions genes added succesfully'%(len(new_model.reactions),len(new_model.reactions)))



# cobra.io.write_sbml_model(new_model,filename=org_name+'_model\\'+org_name+'_add_c0_from_modelseed.xml')

 920 reactions begin to added 

rxn15116 added
rxn00506 added
rxn00536 added
rxn00543 added
rxn00011 added
rxn02342 added
rxn01871 added
rxn00148 added
rxn00459 added
rxn01106 added
rxn00781 added
rxn00747 added
rxn15493 added
rxn15364 added
rxn00704 added
rxn02380 added
rxn01169 added
rxn01977 added
rxn15249 added
rxn15694 added
rxn01100 added
rxn15989 added
rxn00247 added
rxn00175 added
rxn00507 added
rxn15298 added
rxn00151 added
rxn00147 added
rxn00441 added
rxn02376 added
rxn01872 added
rxn00285 added
rxn00199 added
rxn01387 added
rxn00799 added
rxn01388 added
rxn00974 added
rxn00256 added
rxn00248 added
rxn06109 added
rxn00770 added
rxn01200 added
rxn00777 added
rxn01116 added
rxn15271 added
rxn01115 added
rxn01476 added
rxn01975 added
rxn29919 added
rxn00778 added
rxn08647 added
rxn01187 added
rxn11040 added
rxn00213 added
rxn00211 added
rxn06201 added
rxn15081 added
rxn15270 added
rxn01329 added
rxn00642 added
rxn00641 added
rxn01870 added
rxn02438 added
rxn02314 added
rxn00214

#### check balance

In [10]:
def check_balance(reaction,H_metabolite=None):
    try:
        feedback = reaction.check_mass_balance()
        if feedback :
            print(reaction.id + ': ' + str(feedback))
            print('      '+ reaction.reaction)
            if H_metabolite:
                if 'H' in feedback and feedback.get('H') == feedback.get('charge'):
                    H_to_add={H_metabolite:-1*feedback.get('H')}
                    reaction.add_metabolites(H_to_add)
                    print('      H is added')
                    return(check_balance(reaction,H_metabolite))
            return int(0)
        else:
            #print(reaction.id + ': banlance')
            #print('      '+ reaction.reaction)
            return int(1)
    except:
        print(reaction.id + ': error')
        print('      '+ reaction.reaction)
        return int(0)



import cobra
import pandas as pd

H_metabolite = new_model.metabolites.get_by_id('cpd00067_c0')

i=0
for reaction in new_model.reactions:
    i=i+check_balance(reaction,H_metabolite)
print('After add H+, %d / %d reactions are balanced'%(i,len(new_model.reactions)))

cobra.io.write_sbml_model(new_model,filename=org_name+'_model\\'+org_name+'_add_c0_from_modelseed.xml')

rxn15989_c0: {'H': -2.0}
      cpd00363_c0 + 2.0 cpd19499_c0 <=> cpd00071_c0 + 2.0 cpd19500_c0
rxn05929_c0: {'C': 6.0, 'H': 10.0, 'O': 5.0}
      cpd00001_c0 --> cpd00108_c0
rxn07579_c0: {'charge': 2.0, 'C': -11.0, 'H': -21.0, 'N': -2.0, 'O': -7.0, 'P': -1.0, 'R': -1.0}
      cpd00004_c0 + cpd00067_c0 + cpd14940_c0 <=> cpd00003_c0 + cpd12458_c0
rxn07578_c0: {'charge': -2.0, 'C': 11.0, 'H': 21.0, 'O': 7.0, 'N': 2.0, 'P': 1.0}
      cpd14939_c0 <=> cpd00001_c0 + cpd14940_c0
rxn07576_c0: {'charge': -1.0}
      cpd00067_c0 + cpd11476_c0 + cpd11492_c0 --> cpd00011_c0 + cpd11493_c0 + cpd14938_c0
rxn07577_c0: {'charge': 2.0, 'C': -11.0, 'H': -21.0, 'N': -2.0, 'O': -7.0, 'P': -1.0}
      cpd00005_c0 + cpd00067_c0 + cpd14938_c0 <=> cpd00006_c0 + cpd14939_c0
rxn05350_c0: {'charge': -1.0}
      cpd00067_c0 + cpd11472_c0 + cpd11492_c0 --> cpd00011_c0 + cpd11490_c0 + cpd11493_c0
rxn05336_c0: {'charge': -3.0}
      cpd00006_c0 + 2.0 cpd00067_c0 + cpd11481_c0 <=> cpd00005_c0 + cpd11485_c0
rxn05323_c0

#### print unbalanced

In [11]:
for reaction in new_model.reactions:
    if check_balance(reaction)==0:
        print('           https://modelseed.org/solr/reactions/select?wt=json&q=id:'+reaction.id.replace('_c0',''))
        for meta, sto in reaction.metabolites.items():
            print('           '+meta.id+'    '+ str(meta.formula)+'    '+str(meta.charge))

        #new_model.metabolites.get_by_id('C00042_c0').charge


rxn15989_c0: {'H': -2.0}
      cpd00363_c0 + 2.0 cpd19499_c0 <=> cpd00071_c0 + 2.0 cpd19500_c0
           https://modelseed.org/solr/reactions/select?wt=json&q=id:rxn15989
           cpd19499_c0    None    0
           cpd19500_c0    None    0
           cpd00363_c0    C2H6O    0
           cpd00071_c0    C2H4O    0
rxn05929_c0: {'C': 6.0, 'H': 10.0, 'O': 5.0}
      cpd00001_c0 --> cpd00108_c0
           https://modelseed.org/solr/reactions/select?wt=json&q=id:rxn05929
           cpd00108_c0    C6H12O6    0
           cpd00001_c0    H2O    0
rxn07579_c0: {'charge': 2.0, 'C': -11.0, 'H': -21.0, 'N': -2.0, 'O': -7.0, 'P': -1.0, 'R': -1.0}
      cpd00004_c0 + cpd00067_c0 + cpd14940_c0 <=> cpd00003_c0 + cpd12458_c0
           https://modelseed.org/solr/reactions/select?wt=json&q=id:rxn07579
           cpd14940_c0    C29H54N2O8PRS    -2
           cpd12458_c0    C18H35OS    0
           cpd00004_c0    C21H27N7O14P2    -2
           cpd00067_c0    H    1
           cpd00003_c0    C21H26N7O14

In [13]:
new_model

0,1
Name,Methylocystis_bryophila
Memory address,2bc64471340
Number of metabolites,1017
Number of reactions,920
Number of genes,691
Number of groups,0
Objective expression,0
Compartments,c0


## add bio and e0 reactions from Drift model

In [12]:
import cobra

#drift_model_path = org_name+'_model\\ModelSeed_model_DSMZ_21852.xml'
drift_model_path = org_name+'_model\\ModelSeed_model_'+org_full_name+'.xml'
drift_model = cobra.io.read_sbml_model(drift_model_path)
drift_model


0,1
Name,Metabolic_model_Methylocystis_bryophila
Memory address,2bc6d97e6a0
Number of metabolites,1160
Number of reactions,1205
Number of genes,837
Number of groups,0
Objective expression,1.0*bio1_biomass - 1.0*bio1_biomass_reverse_6e711
Compartments,"c0, e0"


In [14]:
new_model = cobra.io.read_sbml_model(org_name+'_model\\'+org_name+'_add_c0_from_modelseed.xml')

bioreaction = drift_model.reactions.get_by_id('bio1_biomass')
new_model.add_reactions([bioreaction])
new_model.objective = new_model.problem.Objective(bioreaction.flux_expression, direction='max')
exreaction = [rea for rea in drift_model.reactions if rea.id.startswith('EX')]
new_model.add_reactions(exreaction)
cobra.io.write_sbml_model(new_model,filename=org_name+'_model\\'+org_name+'_add_from_modelseed.xml')

No objective coefficients in model. Unclear what should be optimized


In [15]:
new_model

0,1
Name,Methylocystis_bryophila
Memory address,2bc6d97e460
Number of metabolites,1140
Number of reactions,1024
Number of genes,691
Number of groups,0
Objective expression,1.0*bio1_biomass - 1.0*bio1_biomass_reverse_6e711
Compartments,"c0, e0"


## add reactions and metabolites note

https://narrative.kbase.us/#catalog/apps/kb_uploadmethods/import_file_as_fba_model_from_staging