In [None]:
pip install requests
pip install lxml
pip install pandas
pip install openpyxl
pip install cobra
pip install cobrakbase
# for windows only, download the file from https://www.lfd.uci.edu/~gohlke/pythonlibs/#pyeda firstly
# pip install pyeda-0.28.0-cp37-cp37m-win_amd64.whl  
pip install modelseedpy


In [1]:
org_name = 'mko'
org_full_name = 'Methylomonas_koyamae'

In [11]:
org_name = 'mbry'
org_full_name = 'Methylocystis_bryophila'

# build model from modelseed

## Auto build model from genome by modelseed

In [3]:
import modelseedpy
import sys
from modelseedpy import KBaseMediaPkg
from modelseedpy import FBAHelper, MSBuilder, MSGenome
from modelseedpy import RastClient
import os
import cobra
os.environ["HOME"] = "1"
import cobrakbase

# download genome from NCBI https://www.ncbi.nlm.nih.gov/data-hub/genome/GCA_027925445.1/
for root, dirs, files in os.walk(org_full_name+'_genome'):
    if 'protein.faa' in files:
        genome_path = root+'\\protein.faa'

genome = MSGenome.from_fasta(genome_path,split=' ')



In [None]:
rast = RastClient()
rast.annotate_genome(genome)

In [6]:
kbase_api = cobrakbase.KBaseAPI('5IS2FWVJ5FJZCHJ6KDTDHUUZLIXKTPQR')
kb_template = kbase_api.get_from_ws("GramNegModelTemplateV4","NewKBaseModelTemplates")
media1 = kbase_api.get_from_ws('Carbon-D-Glucose', 'KBaseMedia')
media2 = kbase_api.get_from_ws('C-acetate', 'KBaseMedia')

In [None]:
kb_template = kbase_api.get_from_ws("GramNegModelTemplateV4","NewKBaseModelTemplates")
driftmodel = MSBuilder.build_metabolic_model(org_full_name, genome, template = kb_template, allow_all_non_grp_reactions = True)
driftmodel
cobra.io.write_sbml_model(driftmodel,filename=org_name+'_model\\'+org_name+'_drift_model_from_modelseed.xml')

'''
or build this model by Kbase

'''

In [22]:
from cobrakbase.core.kbasefba.newmodeltemplate_builder import NewModelTemplateBuilder
from modelseedpy.helpers import get_template, get_classifier
template_core = get_template('template_core')
template_gramneg = get_template('template_gram_neg')
template_core = NewModelTemplateBuilder.from_dict(template_core, None).build()
template_gramneg = NewModelTemplateBuilder.from_dict(template_gramneg, None).build()



In [25]:
model_builder = MSBuilder(genome, template_gramneg)
base_model = model_builder.build('model', '0', allow_all_non_grp_reactions=True, annotate_with_rast=False)
from modelseedpy.core.msbuilder import build_biomass, core_atp
base_model.add_reactions([build_biomass('bio2', base_model, template_core, core_atp, '0')])

## Initialize new model

In [None]:
import cobra

new_model = cobra.Model(org_full_name)
new_model

0,1
Name,Methylomonas_koyamae
Memory address,275915d1f40
Number of metabolites,0
Number of reactions,0
Number of genes,0
Number of groups,0
Objective expression,0
Compartments,


## KEGG ID mapping to modelseed

In [None]:
import pandas as pd

#org_name = 'mbry'
df_reaction = pd.read_excel(org_name+'_model\\'+org_name+'_clean_kegg_reaction.xlsx', index_col=1)  
titlename= ['entry_name','pathway_title','reaction_type','reaction_substrates','reaction_products']
df_keggID_modelseedID = pd.read_csv('reaction_keggID_modelseedID.txt', sep='\t')
mapping_reaction = pd.merge(df_reaction,df_keggID_modelseedID,how='left',on='Kegg_ID').drop_duplicates(subset=['Kegg_ID'], keep='first')[['ModelSeed_ID','Kegg_ID']+titlename]
mapping_reaction.to_excel(org_name+'_model\\'+org_name+'_mapping_reaction.xlsx', sheet_name='Sheet1', header=True)

print('done!')

done!


## add reactions from drift model

In [None]:
import cobra
import pandas as pd
'''
org_name = 'mbry'
org_full_name = 'Methylocystis_bryophila'
drift_model_path = org_name+'_model\\ModelSeed_model_'+org_full_name+'.xml'
drift_model = cobra.io.read_sbml_model(drift_model_path)
new_model = cobra.io.read_sbml_model(org_name+'_model\\'+org_name+'_blank.xml')
'''

mapping_reaction = pd.read_excel(org_name+'_model\\'+org_name+'_mapping_reaction.xlsx', index_col=0)  
modelseed_id_list = list(mapping_reaction['ModelSeed_ID'].dropna(axis = 0, how = 'any'))  # reaction ID that need to be added

added_rxn_id_list = []
unadded_rxn_id_list = []
for rxn_id in modelseed_id_list:
    try:
        reaction = drift_model.reactions.get_by_id(rxn_id+'_c0')
        new_model.add_reactions([reaction])
    except:
        unadded_rxn_id_list.append(rxn_id)
        continue
    added_rxn_id_list.append(rxn_id)
'''
bioreaction = drift_model.reactions.get_by_id('bio1_biomass')
new_model.add_reactions([bioreaction])
new_model.objective = new_model.problem.Objective(bioreaction.flux_expression, direction='max')
'''
print(str(len(added_rxn_id_list)) + ' / ' + str(len(modelseed_id_list)) + ' reactions and biomass reaction are added, which are copied from drift model')


#cobra.io.write_sbml_model(new_model,filename=org_name+'_copyfrom_DriftModel.xml')

In [None]:
unadded_rxn_id_list

['rxn40277',
 'rxn40276',
 'rxn40037',
 'rxn40273',
 'rxn40254',
 'rxn40065',
 'rxn39352',
 'rxn40102',
 'rxn40015',
 'rxn40016',
 'rxn40018',
 'rxn40019',
 'rxn40020',
 'rxn40014',
 'rxn39964',
 'rxn40041',
 'rxn40040']

### add reactions not included in drift model

In [None]:
import cobra
import pandas as pd

import modelseedpy
modelseed_path = 'C:\\Users\\vickenlee\\ModelSEEDDatabase'
modelseed = modelseedpy.biochem.from_local(modelseed_path)


print('======================== \n %d reactions begin to added \n'%len(unadded_rxn_id_list))

i,j = 0,0
for rxn_id in unadded_rxn_id_list:
    reaction_type = mapping_reaction.query('ModelSeed_ID == "'+ rxn_id +'"')['reaction_type'].values[0]
    if reaction_type == 'reversible':
        direction = 'reversible'
    elif reaction_type == 'irreversible':
        direction = 'forward'
    try:
        add_ms_reaction(new_model, rxn_id = rxn_id, modelseed = modelseed, compartment = 'c0', direction = direction)
        print(rxn_id+' added')
        i+=1
    except:
        print(rxn_id+' added wrong !!!')
        j+=1

print('\n %d / %d reactions added successfully \n ====================='%(i,i+j))

for metabolite in new_model.metabolites:
    metaid = [meta for meta in metabolite.id.split('_') if meta.startswith('cpd')]
    if metaid:
        ms_metabolite = modelseed.get_seed_compound(metaid[0])
        if  ms_metabolite.formula == ms_metabolite.formula:
            metabolite.formula = ms_metabolite.formula
        metabolite.charge = ms_metabolite.data['charge']

# add gene
mapping_reaction = pd.read_excel(org_name+'_mapping_reaction.xlsx', index_col=0)  
i=1
for reaction in new_model.reactions:
    rxnid = [rxn for rxn in reaction.id.split('_') if rxn.startswith('rxn')]
    if rxnid:
        gene_list_str = '( '+mapping_reaction.query('ModelSeed_ID == "'+ rxnid[0] +'"')['entry_name'].values[0].replace(' ', ' or ')+' )'
        reaction.gene_reaction_rule = gene_list_str
    if i%100 == 0:
        print('%d / %d reactions added succesfully'%(i,len(new_model.reactions)))
    i+=1
print('%d / %d reactions added succesfully'%(len(new_model.reactions),len(new_model.reactions)))




cobra.io.write_sbml_model(new_model,filename=org_name+'_copyfrom_DriftModel_and_add_from_ModelSEED.xml')

 433 reactions begin to added 

rxn15116 added
rxn00536 added
rxn15494 added
rxn15364 added
rxn15249 added
rxn15694 added
rxn15314 added
rxn06120 added
rxn15989 added
rxn13974 added
rxn00175 added
rxn15298 added
rxn00250 added
rxn06109 added
rxn15271 added
rxn29919 added
rxn00778 added
rxn08647 added
rxn03643 added
rxn16622 added
rxn00211 added
rxn01042 added
rxn15081 added
rxn15270 added
rxn02100 added
rxn01008 added
rxn07578 added
rxn07576 added
rxn07577 added
rxn05336 added
rxn05342 added
rxn05340 added
rxn05338 added
rxn05341 added
rxn05337 added
rxn05339 added
rxn00533 added
rxn08766 added
rxn05322 added
rxn05326 added
rxn05325 added
rxn05327 added
rxn05324 added
rxn05323 added
rxn05328 added
rxn07455 added
rxn00947 added
rxn06237 added
rxn05994 added
rxn06238 added
rxn05830 added
rxn17757 added
rxn05996 added
rxn05871 added
rxn14029 added
rxn11642 added
rxn11643 added
rxn16584 added
rxn07335 added
rxn05004 added
rxn16585 added
rxn11156 added
rxn11994 added
rxn40277 added
rxn40276